diff options
| author | erdgeist <> | 2007-10-18 00:21:22 +0000 |
|---|---|---|
| committer | erdgeist <> | 2007-10-18 00:21:22 +0000 |
| commit | 243d5961d0425b199319967e1c296c5d0124f3f2 (patch) | |
| tree | 6b8c085b95398b1daac7e4ed9112770a729cea4a | |
| parent | be117f96a22e0d6d1052c2fea4991cb65dec731f (diff) | |
fixed one performance bug, where "skipping values from a ¶m=values pair" was requested, the requestor ended up with "values" to be parsed again.
improved performance of fromhex
improved performance of is_unreserved() by moving it all into a simple byte array
improved performance of %41 => 'A' conversion by reordering variables
| -rw-r--r-- | scan_urlencoded_query.c | 41 |
1 files changed, 26 insertions, 15 deletions
diff --git a/scan_urlencoded_query.c b/scan_urlencoded_query.c index 296e829..f61d79e 100644 --- a/scan_urlencoded_query.c +++ b/scan_urlencoded_query.c | |||
| @@ -12,39 +12,51 @@ | |||
| 12 | we add '%' to the matrix to not stop at encoded chars. | 12 | we add '%' to the matrix to not stop at encoded chars. |
| 13 | After losing too many requests to being too strict, add the following characters to reserved matrix | 13 | After losing too many requests to being too strict, add the following characters to reserved matrix |
| 14 | relax = "+" | "," | "/" | ";" | "<" | ">" | ":" | 14 | relax = "+" | "," | "/" | ";" | "<" | ">" | ":" |
| 15 | |||
| 16 | static const unsigned char reserved_matrix_strict[] = { 0xA2, 0x67, 0xFF, 0x03, 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x47}; | ||
| 17 | */ | 15 | */ |
| 18 | static const unsigned char reserved_matrix[] = { 0xA2, 0xFF, 0xFF, 0x5F, 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x47}; | ||
| 19 | 16 | ||
| 20 | static int is_unreserved( unsigned char c ) { | 17 | static const unsigned char is_unreserved[256] = { |
| 21 | if( ( c <= 32 ) || ( c >= 127 ) ) return 0; return 1&(reserved_matrix[(c-32)>>3]>>(c&7)); | 18 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
| 19 | 0,1,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0, | ||
| 20 | 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, | ||
| 21 | 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,1,0, | ||
| 22 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | ||
| 23 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | ||
| 24 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | ||
| 25 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 | ||
| 26 | }; | ||
| 27 | |||
| 28 | static unsigned char fromhex(unsigned char c) { | ||
| 29 | if (c>='0' && c<='9') return c-'0'; | ||
| 30 | c &= 0xdf; /* Toggle off lower case bit */ | ||
| 31 | if (c>='A' && c<='F') return c-'A'+10; | ||
| 32 | return 0xff; | ||
| 22 | } | 33 | } |
| 23 | 34 | ||
| 24 | ssize_t scan_urlencoded_query(char **string, char *deststring, int flags) { | 35 | ssize_t scan_urlencoded_query(char **string, char *deststring, int flags) { |
| 25 | register const unsigned char* s=*(const unsigned char**) string; | 36 | const unsigned char* s=*(const unsigned char**) string; |
| 26 | unsigned char *d = (unsigned char*)deststring; | 37 | unsigned char *d = (unsigned char*)deststring; |
| 27 | register unsigned char b, c; | 38 | register unsigned char b, c; |
| 28 | 39 | ||
| 29 | retry_parsing: | 40 | retry_parsing: |
| 30 | while( is_unreserved( c = *s++) ) { | 41 | while( is_unreserved[ c = *s++ ] ) { |
| 31 | if( c=='%') { | 42 | if( c=='%') { |
| 32 | if( ( c = scan_fromhex(*s++) ) == 0xff ) return -1; | 43 | if( ( b = fromhex(*s++) ) == 0xff ) return -1; |
| 33 | if( ( b = scan_fromhex(*s++) ) == 0xff ) return -1; | 44 | if( ( c = fromhex(*s++) ) == 0xff ) return -1; |
| 34 | c=(c<<4)|b; | 45 | c|=(b<<4); |
| 35 | } | 46 | } |
| 36 | if( d ) *d++ = c; | 47 | if( d ) *d++ = c; |
| 37 | } | 48 | } |
| 38 | 49 | ||
| 39 | switch( c ) { | 50 | switch( c ) { |
| 40 | case 0: case '\r': case '\n': case ' ': | 51 | case 0: case '\r': case '\n': case ' ': |
| 41 | if( d == (unsigned char*)deststring ) return -2; | 52 | if( d && ( d == (unsigned char*)deststring ) ) return -2; |
| 42 | --s; | 53 | --s; |
| 43 | break; | 54 | break; |
| 44 | case '?': | 55 | case '?': |
| 45 | if( flags == SCAN_PATH ) goto found_terminator; | 56 | if( flags != SCAN_PATH ) { |
| 46 | if( d ) *d++ = c; | 57 | if( d ) *d++ = c; |
| 47 | goto retry_parsing; | 58 | goto retry_parsing; |
| 59 | } | ||
| 48 | break; | 60 | break; |
| 49 | case '=': | 61 | case '=': |
| 50 | if( flags != SCAN_SEARCHPATH_PARAM ) return -1; | 62 | if( flags != SCAN_SEARCHPATH_PARAM ) return -1; |
| @@ -57,7 +69,6 @@ retry_parsing: | |||
| 57 | return -1; | 69 | return -1; |
| 58 | } | 70 | } |
| 59 | 71 | ||
| 60 | found_terminator: | ||
| 61 | *string = (char *)s; | 72 | *string = (char *)s; |
| 62 | return d - (unsigned char*)deststring; | 73 | return d - (unsigned char*)deststring; |
| 63 | } | 74 | } |
