diff options
author | erdgeist <> | 2007-10-18 00:21:22 +0000 |
---|---|---|
committer | erdgeist <> | 2007-10-18 00:21:22 +0000 |
commit | 243d5961d0425b199319967e1c296c5d0124f3f2 (patch) | |
tree | 6b8c085b95398b1daac7e4ed9112770a729cea4a /scan_urlencoded_query.c | |
parent | be117f96a22e0d6d1052c2fea4991cb65dec731f (diff) |
fixed one performance bug, where "skipping values from a ¶m=values pair" was requested, the requestor ended up with "values" to be parsed again.
improved performance of fromhex
improved performance of is_unreserved() by moving it all into a simple byte array
improved performance of %41 => 'A' conversion by reordering variables
Diffstat (limited to 'scan_urlencoded_query.c')
-rw-r--r-- | scan_urlencoded_query.c | 41 |
1 files changed, 26 insertions, 15 deletions
diff --git a/scan_urlencoded_query.c b/scan_urlencoded_query.c index 296e829..f61d79e 100644 --- a/scan_urlencoded_query.c +++ b/scan_urlencoded_query.c | |||
@@ -12,39 +12,51 @@ | |||
12 | we add '%' to the matrix to not stop at encoded chars. | 12 | we add '%' to the matrix to not stop at encoded chars. |
13 | After losing too many requests to being too strict, add the following characters to reserved matrix | 13 | After losing too many requests to being too strict, add the following characters to reserved matrix |
14 | relax = "+" | "," | "/" | ";" | "<" | ">" | ":" | 14 | relax = "+" | "," | "/" | ";" | "<" | ">" | ":" |
15 | |||
16 | static const unsigned char reserved_matrix_strict[] = { 0xA2, 0x67, 0xFF, 0x03, 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x47}; | ||
17 | */ | 15 | */ |
18 | static const unsigned char reserved_matrix[] = { 0xA2, 0xFF, 0xFF, 0x5F, 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x47}; | ||
19 | 16 | ||
20 | static int is_unreserved( unsigned char c ) { | 17 | static const unsigned char is_unreserved[256] = { |
21 | if( ( c <= 32 ) || ( c >= 127 ) ) return 0; return 1&(reserved_matrix[(c-32)>>3]>>(c&7)); | 18 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
19 | 0,1,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0, | ||
20 | 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, | ||
21 | 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,1,0, | ||
22 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | ||
23 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | ||
24 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | ||
25 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 | ||
26 | }; | ||
27 | |||
28 | static unsigned char fromhex(unsigned char c) { | ||
29 | if (c>='0' && c<='9') return c-'0'; | ||
30 | c &= 0xdf; /* Toggle off lower case bit */ | ||
31 | if (c>='A' && c<='F') return c-'A'+10; | ||
32 | return 0xff; | ||
22 | } | 33 | } |
23 | 34 | ||
24 | ssize_t scan_urlencoded_query(char **string, char *deststring, int flags) { | 35 | ssize_t scan_urlencoded_query(char **string, char *deststring, int flags) { |
25 | register const unsigned char* s=*(const unsigned char**) string; | 36 | const unsigned char* s=*(const unsigned char**) string; |
26 | unsigned char *d = (unsigned char*)deststring; | 37 | unsigned char *d = (unsigned char*)deststring; |
27 | register unsigned char b, c; | 38 | register unsigned char b, c; |
28 | 39 | ||
29 | retry_parsing: | 40 | retry_parsing: |
30 | while( is_unreserved( c = *s++) ) { | 41 | while( is_unreserved[ c = *s++ ] ) { |
31 | if( c=='%') { | 42 | if( c=='%') { |
32 | if( ( c = scan_fromhex(*s++) ) == 0xff ) return -1; | 43 | if( ( b = fromhex(*s++) ) == 0xff ) return -1; |
33 | if( ( b = scan_fromhex(*s++) ) == 0xff ) return -1; | 44 | if( ( c = fromhex(*s++) ) == 0xff ) return -1; |
34 | c=(c<<4)|b; | 45 | c|=(b<<4); |
35 | } | 46 | } |
36 | if( d ) *d++ = c; | 47 | if( d ) *d++ = c; |
37 | } | 48 | } |
38 | 49 | ||
39 | switch( c ) { | 50 | switch( c ) { |
40 | case 0: case '\r': case '\n': case ' ': | 51 | case 0: case '\r': case '\n': case ' ': |
41 | if( d == (unsigned char*)deststring ) return -2; | 52 | if( d && ( d == (unsigned char*)deststring ) ) return -2; |
42 | --s; | 53 | --s; |
43 | break; | 54 | break; |
44 | case '?': | 55 | case '?': |
45 | if( flags == SCAN_PATH ) goto found_terminator; | 56 | if( flags != SCAN_PATH ) { |
46 | if( d ) *d++ = c; | 57 | if( d ) *d++ = c; |
47 | goto retry_parsing; | 58 | goto retry_parsing; |
59 | } | ||
48 | break; | 60 | break; |
49 | case '=': | 61 | case '=': |
50 | if( flags != SCAN_SEARCHPATH_PARAM ) return -1; | 62 | if( flags != SCAN_SEARCHPATH_PARAM ) return -1; |
@@ -57,7 +69,6 @@ retry_parsing: | |||
57 | return -1; | 69 | return -1; |
58 | } | 70 | } |
59 | 71 | ||
60 | found_terminator: | ||
61 | *string = (char *)s; | 72 | *string = (char *)s; |
62 | return d - (unsigned char*)deststring; | 73 | return d - (unsigned char*)deststring; |
63 | } | 74 | } |