summaryrefslogtreecommitdiff
path: root/scan_urlencoded_query.c
diff options
context:
space:
mode:
authorerdgeist <>2007-12-17 13:23:27 +0000
committererdgeist <>2007-12-17 13:23:27 +0000
commit0cfd1e575dae3a5705203b6b06b8a534a12ee652 (patch)
treead9c95204430bf86504725905dc794c2c0bf5763 /scan_urlencoded_query.c
parentac078bccf2bec2220233bb7ff40560da2131c10d (diff)
Add documentation to our uri scanner
Diffstat (limited to 'scan_urlencoded_query.c')
-rw-r--r--scan_urlencoded_query.c32
1 files changed, 32 insertions, 0 deletions
diff --git a/scan_urlencoded_query.c b/scan_urlencoded_query.c
index ba4bbd8..e0c2e30 100644
--- a/scan_urlencoded_query.c
+++ b/scan_urlencoded_query.c
@@ -14,6 +14,16 @@
14 relax = "+" | "," | "/" | ";" | "<" | ">" | ":" 14 relax = "+" | "," | "/" | ";" | "<" | ">" | ":"
15*/ 15*/
16 16
17/* This matrix holds for each ascii character the information,
18 whether it is a non-terminating character for on of the three
19 scan states we are in, that is 'path', 'param' and 'value' from
20 /path?param=value&param=value, it is encoded in bit 0, 1 and 2
21 respectively
22
23 The top bit of lower nibble indicates, whether this character is
24 a hard terminator, ie. \0, \n or \s, where the whole scanning
25 process should terminate
26 */
17static const unsigned char is_unreserved[256] = { 27static const unsigned char is_unreserved[256] = {
18 8,0,0,0,0,0,0,0,0,0,8,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 28 8,0,0,0,0,0,0,0,0,0,8,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
19 0,7,8,8,8,7,0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,4,7,6, 29 0,7,8,8,8,7,0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,4,7,6,
@@ -25,6 +35,7 @@ static const unsigned char is_unreserved[256] = {
25 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 35 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
26}; 36};
27 37
38/* Do a fast nibble to hex representation conversion */
28static unsigned char fromhex(unsigned char x) { 39static unsigned char fromhex(unsigned char x) {
29 x-='0'; if( x<=9) return x; 40 x-='0'; if( x<=9) return x;
30 x&=~0x20; x-='A'-'0'; 41 x&=~0x20; x-='A'-'0';
@@ -32,12 +43,19 @@ static unsigned char fromhex(unsigned char x) {
32 return 0xff; 43 return 0xff;
33} 44}
34 45
46/* Skip the value of a param=value pair */
35void scan_urlencoded_skipvalue( char **string ) { 47void scan_urlencoded_skipvalue( char **string ) {
36 const unsigned char* s=*(const unsigned char**) string; 48 const unsigned char* s=*(const unsigned char**) string;
37 unsigned char f; 49 unsigned char f;
38 50
51 /* Since we are asked to skip the 'value', we assume to stop at
52 terminators for a 'value' string position */
39 while( ( f = is_unreserved[ *s++ ] ) & SCAN_SEARCHPATH_VALUE ); 53 while( ( f = is_unreserved[ *s++ ] ) & SCAN_SEARCHPATH_VALUE );
54
55 /* If we stopped at a hard terminator like \0 or \n, make the
56 next scan_urlencoded_query encounter it again */
40 if( f & SCAN_SEARCHPATH_TERMINATOR ) --s; 57 if( f & SCAN_SEARCHPATH_TERMINATOR ) --s;
58
41 *string = (char*)s; 59 *string = (char*)s;
42} 60}
43 61
@@ -46,21 +64,35 @@ ssize_t scan_urlencoded_query(char **string, char *deststring, SCAN_SEARCHPATH_F
46 unsigned char *d = (unsigned char*)deststring; 64 unsigned char *d = (unsigned char*)deststring;
47 unsigned char b, c, f; 65 unsigned char b, c, f;
48 66
67 /* This is the main decoding loop.
68 'flag' determines, which characters are non-terminating in current context
69 (ie. stop at '=' and '&' if scanning for a 'param'; stop at '?' if scanning for the path )
70 */
49 while( ( f = is_unreserved[ c = *s++ ] ) & flags ) { 71 while( ( f = is_unreserved[ c = *s++ ] ) & flags ) {
72
73 /* When encountering an url escaped character, try to decode */
50 if( c=='%') { 74 if( c=='%') {
51 if( ( b = fromhex(*s++) ) == 0xff ) return -1; 75 if( ( b = fromhex(*s++) ) == 0xff ) return -1;
52 if( ( c = fromhex(*s++) ) == 0xff ) return -1; 76 if( ( c = fromhex(*s++) ) == 0xff ) return -1;
53 c|=(b<<4); 77 c|=(b<<4);
54 } 78 }
79
80 /* Write (possibly decoded) character to output */
55 *d++ = c; 81 *d++ = c;
56 } 82 }
57 83
58 switch( c ) { 84 switch( c ) {
59 case 0: case '\r': case '\n': case ' ': 85 case 0: case '\r': case '\n': case ' ':
86 /* If we started scanning on a hard terminator, indicate we've finished */
60 if( d == (unsigned char*)deststring ) return -2; 87 if( d == (unsigned char*)deststring ) return -2;
88
89 /* Else make the next call to scan_urlencoded_param encounter it again */
61 --s; 90 --s;
62 break; 91 break;
63 case '?': 92 case '?':
93 /* XXX to help us parse path?param=value?param=value?... sent by µTorrent 1600
94 do not return an error but silently terminate
95 if( flags != SCAN_PATH ) return -1; */
64 break; 96 break;
65 case '=': 97 case '=':
66 if( flags != SCAN_SEARCHPATH_PARAM ) return -1; 98 if( flags != SCAN_SEARCHPATH_PARAM ) return -1;