diff options
| author | erdgeist <> | 2007-12-17 13:23:27 +0000 |
|---|---|---|
| committer | erdgeist <> | 2007-12-17 13:23:27 +0000 |
| commit | 0cfd1e575dae3a5705203b6b06b8a534a12ee652 (patch) | |
| tree | ad9c95204430bf86504725905dc794c2c0bf5763 | |
| parent | ac078bccf2bec2220233bb7ff40560da2131c10d (diff) | |
Add documentation to our uri scanner
| -rw-r--r-- | scan_urlencoded_query.c | 32 | ||||
| -rw-r--r-- | scan_urlencoded_query.h | 6 |
2 files changed, 36 insertions, 2 deletions
diff --git a/scan_urlencoded_query.c b/scan_urlencoded_query.c index ba4bbd8..e0c2e30 100644 --- a/scan_urlencoded_query.c +++ b/scan_urlencoded_query.c | |||
| @@ -14,6 +14,16 @@ | |||
| 14 | relax = "+" | "," | "/" | ";" | "<" | ">" | ":" | 14 | relax = "+" | "," | "/" | ";" | "<" | ">" | ":" |
| 15 | */ | 15 | */ |
| 16 | 16 | ||
| 17 | /* This matrix holds for each ascii character the information, | ||
| 18 | whether it is a non-terminating character for on of the three | ||
| 19 | scan states we are in, that is 'path', 'param' and 'value' from | ||
| 20 | /path?param=value¶m=value, it is encoded in bit 0, 1 and 2 | ||
| 21 | respectively | ||
| 22 | |||
| 23 | The top bit of lower nibble indicates, whether this character is | ||
| 24 | a hard terminator, ie. \0, \n or \s, where the whole scanning | ||
| 25 | process should terminate | ||
| 26 | */ | ||
| 17 | static const unsigned char is_unreserved[256] = { | 27 | static const unsigned char is_unreserved[256] = { |
| 18 | 8,0,0,0,0,0,0,0,0,0,8,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | 28 | 8,0,0,0,0,0,0,0,0,0,8,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
| 19 | 0,7,8,8,8,7,0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,4,7,6, | 29 | 0,7,8,8,8,7,0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,4,7,6, |
| @@ -25,6 +35,7 @@ static const unsigned char is_unreserved[256] = { | |||
| 25 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 | 35 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 |
| 26 | }; | 36 | }; |
| 27 | 37 | ||
| 38 | /* Do a fast nibble to hex representation conversion */ | ||
| 28 | static unsigned char fromhex(unsigned char x) { | 39 | static unsigned char fromhex(unsigned char x) { |
| 29 | x-='0'; if( x<=9) return x; | 40 | x-='0'; if( x<=9) return x; |
| 30 | x&=~0x20; x-='A'-'0'; | 41 | x&=~0x20; x-='A'-'0'; |
| @@ -32,12 +43,19 @@ static unsigned char fromhex(unsigned char x) { | |||
| 32 | return 0xff; | 43 | return 0xff; |
| 33 | } | 44 | } |
| 34 | 45 | ||
| 46 | /* Skip the value of a param=value pair */ | ||
| 35 | void scan_urlencoded_skipvalue( char **string ) { | 47 | void scan_urlencoded_skipvalue( char **string ) { |
| 36 | const unsigned char* s=*(const unsigned char**) string; | 48 | const unsigned char* s=*(const unsigned char**) string; |
| 37 | unsigned char f; | 49 | unsigned char f; |
| 38 | 50 | ||
| 51 | /* Since we are asked to skip the 'value', we assume to stop at | ||
| 52 | terminators for a 'value' string position */ | ||
| 39 | while( ( f = is_unreserved[ *s++ ] ) & SCAN_SEARCHPATH_VALUE ); | 53 | while( ( f = is_unreserved[ *s++ ] ) & SCAN_SEARCHPATH_VALUE ); |
| 54 | |||
| 55 | /* If we stopped at a hard terminator like \0 or \n, make the | ||
| 56 | next scan_urlencoded_query encounter it again */ | ||
| 40 | if( f & SCAN_SEARCHPATH_TERMINATOR ) --s; | 57 | if( f & SCAN_SEARCHPATH_TERMINATOR ) --s; |
| 58 | |||
| 41 | *string = (char*)s; | 59 | *string = (char*)s; |
| 42 | } | 60 | } |
| 43 | 61 | ||
| @@ -46,21 +64,35 @@ ssize_t scan_urlencoded_query(char **string, char *deststring, SCAN_SEARCHPATH_F | |||
| 46 | unsigned char *d = (unsigned char*)deststring; | 64 | unsigned char *d = (unsigned char*)deststring; |
| 47 | unsigned char b, c, f; | 65 | unsigned char b, c, f; |
| 48 | 66 | ||
| 67 | /* This is the main decoding loop. | ||
| 68 | 'flag' determines, which characters are non-terminating in current context | ||
| 69 | (ie. stop at '=' and '&' if scanning for a 'param'; stop at '?' if scanning for the path ) | ||
| 70 | */ | ||
| 49 | while( ( f = is_unreserved[ c = *s++ ] ) & flags ) { | 71 | while( ( f = is_unreserved[ c = *s++ ] ) & flags ) { |
| 72 | |||
| 73 | /* When encountering an url escaped character, try to decode */ | ||
| 50 | if( c=='%') { | 74 | if( c=='%') { |
| 51 | if( ( b = fromhex(*s++) ) == 0xff ) return -1; | 75 | if( ( b = fromhex(*s++) ) == 0xff ) return -1; |
| 52 | if( ( c = fromhex(*s++) ) == 0xff ) return -1; | 76 | if( ( c = fromhex(*s++) ) == 0xff ) return -1; |
| 53 | c|=(b<<4); | 77 | c|=(b<<4); |
| 54 | } | 78 | } |
| 79 | |||
| 80 | /* Write (possibly decoded) character to output */ | ||
| 55 | *d++ = c; | 81 | *d++ = c; |
| 56 | } | 82 | } |
| 57 | 83 | ||
| 58 | switch( c ) { | 84 | switch( c ) { |
| 59 | case 0: case '\r': case '\n': case ' ': | 85 | case 0: case '\r': case '\n': case ' ': |
| 86 | /* If we started scanning on a hard terminator, indicate we've finished */ | ||
| 60 | if( d == (unsigned char*)deststring ) return -2; | 87 | if( d == (unsigned char*)deststring ) return -2; |
| 88 | |||
| 89 | /* Else make the next call to scan_urlencoded_param encounter it again */ | ||
| 61 | --s; | 90 | --s; |
| 62 | break; | 91 | break; |
| 63 | case '?': | 92 | case '?': |
| 93 | /* XXX to help us parse path?param=value?param=value?... sent by µTorrent 1600 | ||
| 94 | do not return an error but silently terminate | ||
| 95 | if( flags != SCAN_PATH ) return -1; */ | ||
| 64 | break; | 96 | break; |
| 65 | case '=': | 97 | case '=': |
| 66 | if( flags != SCAN_SEARCHPATH_PARAM ) return -1; | 98 | if( flags != SCAN_SEARCHPATH_PARAM ) return -1; |
diff --git a/scan_urlencoded_query.h b/scan_urlencoded_query.h index 4fa35c4..f0ad781 100644 --- a/scan_urlencoded_query.h +++ b/scan_urlencoded_query.h | |||
| @@ -11,7 +11,8 @@ typedef enum { | |||
| 11 | SCAN_SEARCHPATH_TERMINATOR = 8 | 11 | SCAN_SEARCHPATH_TERMINATOR = 8 |
| 12 | } SCAN_SEARCHPATH_FLAG; | 12 | } SCAN_SEARCHPATH_FLAG; |
| 13 | 13 | ||
| 14 | /* string pointer to source, pointer to next scan position on return | 14 | /* string in: pointer to source |
| 15 | out: pointer to next scan position | ||
| 15 | deststring pointer to destination | 16 | deststring pointer to destination |
| 16 | flags determines, what to parse | 17 | flags determines, what to parse |
| 17 | returns number of valid converted characters in deststring | 18 | returns number of valid converted characters in deststring |
| @@ -19,7 +20,8 @@ typedef enum { | |||
| 19 | */ | 20 | */ |
| 20 | ssize_t scan_urlencoded_query(char **string, char *deststring, SCAN_SEARCHPATH_FLAG flags); | 21 | ssize_t scan_urlencoded_query(char **string, char *deststring, SCAN_SEARCHPATH_FLAG flags); |
| 21 | 22 | ||
| 22 | /* string pointer to source, pointer to next scan position on return | 23 | /* string in: pointer to value of a param=value pair to skip |
| 24 | out: pointer to next scan position on return | ||
| 23 | */ | 25 | */ |
| 24 | void scan_urlencoded_skipvalue( char **string ); | 26 | void scan_urlencoded_skipvalue( char **string ); |
| 25 | 27 | ||
