From b86e6382a1d496cdb2acb0b85732c644de3add11 Mon Sep 17 00:00:00 2001
From: erdgeist <>
Date: Thu, 18 Oct 2007 23:33:07 +0000
Subject: Save a lot of work when skipping through uninteresting http request
 parameters

---
 opentracker.c           | 24 ++++++++++++------------
 scan_urlencoded_query.c | 32 ++++++++++++++++++--------------
 scan_urlencoded_query.h | 17 ++++++++++++-----
 3 files changed, 42 insertions(+), 31 deletions(-)

diff --git a/opentracker.c b/opentracker.c
index b77773d..380168e 100644
--- a/opentracker.c
+++ b/opentracker.c
@@ -237,10 +237,10 @@ LOG_TO_STDERR( "sync: %d.%d.%d.%d\n", h->ip[0], h->ip[1], h->ip[2], h->ip[3] );
       switch( scan_urlencoded_query( &c, data = c, SCAN_SEARCHPATH_PARAM ) ) {
       case -2: scanon = 0; break;   /* TERMINATOR */
       case -1: HTTPERROR_400_PARAM; /* PARSE ERROR */
-      default: scan_urlencoded_query( &c, NULL, SCAN_SEARCHPATH_VALUE ); break;
+      default: scan_urlencoded_skipvalue( &c ); break;
       case 9:
         if(byte_diff(data,9,"changeset")) {
-          scan_urlencoded_query( &c, NULL, SCAN_SEARCHPATH_VALUE );
+          scan_urlencoded_skipvalue( &c );
           continue;
         }
         /* ignore this, when we dont at least see "d4:syncdee" */
@@ -273,10 +273,10 @@ LOG_TO_STDERR( "sync: %d.%d.%d.%d\n", h->ip[0], h->ip[1], h->ip[2], h->ip[3] );
       switch( scan_urlencoded_query( &c, data = c, SCAN_SEARCHPATH_PARAM ) ) {
       case -2: scanon = 0; break;   /* TERMINATOR */
       case -1: HTTPERROR_400_PARAM; /* PARSE ERROR */
-      default: scan_urlencoded_query( &c, NULL, SCAN_SEARCHPATH_VALUE ); break;
+      default: scan_urlencoded_skipvalue( &c ); break;
       case 4:
         if( byte_diff(data,4,"mode")) {
-          scan_urlencoded_query( &c, NULL, SCAN_SEARCHPATH_VALUE );
+          scan_urlencoded_skipvalue( &c );
           continue;
         }
         if( scan_urlencoded_query( &c, data = c, SCAN_SEARCHPATH_VALUE ) != 4 ) HTTPERROR_400_PARAM;
@@ -362,10 +362,10 @@ SCRAPE_WORKAROUND:
       switch( scan_urlencoded_query( &c, data = c, SCAN_SEARCHPATH_PARAM ) ) {
       case -2: scanon = 0; break;   /* TERMINATOR */
       case -1: HTTPERROR_400_PARAM; /* PARSE ERROR */
-      default: scan_urlencoded_query( &c, NULL, SCAN_SEARCHPATH_VALUE ); break;
+      default: scan_urlencoded_skipvalue( &c ); break;
       case 9:
         if(byte_diff(data,9,"info_hash")) {
-          scan_urlencoded_query( &c, NULL, SCAN_SEARCHPATH_VALUE );
+          scan_urlencoded_skipvalue( &c );
           continue;
         }
         /* ignore this, when we have less than 20 bytes */
@@ -407,7 +407,7 @@ ANNOUNCE_WORKAROUND:
       switch( scan_urlencoded_query( &c, data = c, SCAN_SEARCHPATH_PARAM ) ) {
       case -2: scanon = 0; break;   /* TERMINATOR */
       case -1: HTTPERROR_400_PARAM; /* PARSE ERROR */
-      default: scan_urlencoded_query( &c, NULL, SCAN_SEARCHPATH_VALUE ); break;
+      default: scan_urlencoded_skipvalue( &c ); break;
 #ifdef WANT_IP_FROM_QUERY_STRING
       case 2:
         if(!byte_diff(data,2,"ip")) {
@@ -416,7 +416,7 @@ ANNOUNCE_WORKAROUND:
           if( ( len <= 0 ) || scan_fixed_ip( data, len, ip ) ) HTTPERROR_400_PARAM;
           OT_SETIP( &peer, ip );
        } else
-          scan_urlencoded_query( &c, NULL, SCAN_SEARCHPATH_VALUE );
+          scan_urlencoded_skipvalue( &c );
        break;
 #endif
       case 4:
@@ -429,11 +429,11 @@ ANNOUNCE_WORKAROUND:
           if( scan_fixed_int( data, len, &tmp ) ) tmp = 0;
           if( !tmp ) OT_FLAG( &peer ) |= PEER_FLAG_SEEDING;
         } else
-          scan_urlencoded_query( &c, NULL, SCAN_SEARCHPATH_VALUE );
+          scan_urlencoded_skipvalue( &c );
         break;
       case 5:
         if( byte_diff( data, 5, "event" ) )
-          scan_urlencoded_query( &c, NULL, SCAN_SEARCHPATH_VALUE );
+          scan_urlencoded_skipvalue( &c );
         else switch( scan_urlencoded_query( &c, data = c, SCAN_SEARCHPATH_VALUE ) ) {
         case -1:
           HTTPERROR_400_PARAM;
@@ -456,11 +456,11 @@ ANNOUNCE_WORKAROUND:
           if( ( len <= 0 ) || scan_fixed_int( data, len, &tmp ) ) HTTPERROR_400_PARAM;
           if( !tmp ) HTTPERROR_400_COMPACT;
         } else
-          scan_urlencoded_query( &c, NULL, SCAN_SEARCHPATH_VALUE );
+          scan_urlencoded_skipvalue( &c );
         break;
       case 9:
         if(byte_diff(data,9,"info_hash")) {
-          scan_urlencoded_query( &c, NULL, SCAN_SEARCHPATH_VALUE );
+          scan_urlencoded_skipvalue( &c );
           continue;
         }
         /* ignore this, when we have less than 20 bytes */
diff --git a/scan_urlencoded_query.c b/scan_urlencoded_query.c
index a11b65c..f754fdc 100644
--- a/scan_urlencoded_query.c
+++ b/scan_urlencoded_query.c
@@ -15,10 +15,10 @@
 */
 
 static const unsigned char is_unreserved[256] = {
-  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-  0,1,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,
-  0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,
-  0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,1,0,
+  8,0,0,0,0,0,0,0,0,0,8,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+  8,7,0,0,0,7,0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,0,7,6,
+  0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7,
+  0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,0,0,0,7,0,
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@@ -32,31 +32,35 @@ static unsigned char fromhex(unsigned char x) {
   return 0xff;
 }
 
-ssize_t scan_urlencoded_query(char **string, char *deststring, int flags) {
+void scan_urlencoded_skipvalue( char **string ) {
+  const unsigned char* s=*(const unsigned char**) string;
+  unsigned char f;
+
+  while( ( f = is_unreserved[ *s++ ] ) & SCAN_SEARCHPATH_VALUE );
+  if( f & SCAN_SEARCHPATH_TERMINATOR ) --s;
+  *string = (char*)s;
+}
+
+ssize_t scan_urlencoded_query(char **string, char *deststring, SCAN_SEARCHPATH_FLAG flags) {
   const unsigned char* s=*(const unsigned char**) string;
   unsigned char *d = (unsigned char*)deststring;
-  register unsigned char b, c;
+  unsigned char b, c, f;
 
-retry_parsing:
-  while( is_unreserved[ c = *s++ ] ) {
+  while( ( f = is_unreserved[ c = *s++ ] ) & flags ) {
     if( c=='%') {
       if( ( b = fromhex(*s++) ) == 0xff ) return -1;
       if( ( c = fromhex(*s++) ) == 0xff ) return -1;
       c|=(b<<4);
     }
-    if( d ) *d++ = c;
+    *d++ = c;
   }
 
   switch( c ) {
   case 0: case '\r': case '\n': case ' ':
-    if( d && ( d == (unsigned char*)deststring ) ) return -2;
+    if( d == (unsigned char*)deststring ) return -2;
     --s;
     break;
   case '?':
-    if( flags != SCAN_PATH ) {
-      if( d ) *d++ = c;
-      goto retry_parsing;
-    }
     break;
   case '=':
     if( flags != SCAN_SEARCHPATH_PARAM ) return -1;
diff --git a/scan_urlencoded_query.h b/scan_urlencoded_query.h
index 56d93c8..4fa35c4 100644
--- a/scan_urlencoded_query.h
+++ b/scan_urlencoded_query.h
@@ -4,17 +4,24 @@
 #ifndef __SCAN_URLENCODED_QUERY_H__
 #define __SCAN_URLENCODED_QUERY_H__
 
-#define SCAN_PATH             0
-#define SCAN_SEARCHPATH_PARAM 1
-#define SCAN_SEARCHPATH_VALUE 2
+typedef enum {
+  SCAN_PATH                  = 1,
+  SCAN_SEARCHPATH_PARAM      = 2,
+  SCAN_SEARCHPATH_VALUE      = 4,
+  SCAN_SEARCHPATH_TERMINATOR = 8
+} SCAN_SEARCHPATH_FLAG;
 
-/* string     pointer to source, pointer to after terminator on return
+/* string     pointer to source, pointer to next scan position on return
    deststring pointer to destination
    flags      determines, what to parse
    returns    number of valid converted characters in deststring
               or -1 for parse error
 */
-ssize_t scan_urlencoded_query(char **string, char *deststring, int flags);
+ssize_t scan_urlencoded_query(char **string, char *deststring, SCAN_SEARCHPATH_FLAG flags);
+
+/* string     pointer to source, pointer to next scan position on return
+*/
+void scan_urlencoded_skipvalue( char **string );
 
 /* data       pointer to len chars of string
    len        length of chars in data to parse
-- 
cgit v1.2.3