From 2adf4fb28af99dd72c6b2fc816bcc11e5dde1ffc Mon Sep 17 00:00:00 2001
From: erdgeist <>
Date: Fri, 8 Dec 2006 19:20:51 +0000
Subject: Our scanner routine for the URI query string

---
 scan_urlencoded_query.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++
 scan_urlencoded_query.h | 20 +++++++++++++++++
 trackerlogic.c          |  4 ++--
 3 files changed, 79 insertions(+), 2 deletions(-)
 create mode 100644 scan_urlencoded_query.c
 create mode 100644 scan_urlencoded_query.h

diff --git a/scan_urlencoded_query.c b/scan_urlencoded_query.c
new file mode 100644
index 0000000..7aeabab
--- /dev/null
+++ b/scan_urlencoded_query.c
@@ -0,0 +1,57 @@
+#include "scan.h"
+
+#define BREAK_AT_QUESTIONMARK (1<<0)
+#define BREAK_AT_WHITESPACE   (1<<1)
+#define BREAK_AT_AMPERSAND    (1<<2)
+#define BREAK_AT_EQUALSIGN    (1<<3)
+
+#define SCAN_PATH             ( BREAK_AT_QUESTIONMARK | BREAK_AT_WHITESPACE )
+#define SCAN_SEARCHPATH_PARAM ( BREAK_AT_EQUALSIGN )
+#define SCAN_SEARCHPATH_VALUE ( BREAK_AT_AMPERSAND | BREAK_AT_WHITESPACE )
+
+// Idea is to do a in place replacement or guarantee at least
+// strlen( string ) bytes in deststring
+// watch http://www.ietf.org/rfc/rfc2396.txt
+//       unreserved    = alphanum | mark
+//       mark          = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
+// we add '%' to the matrix to not stop at encoded chars.
+
+static const unsigned char reserved_matrix[] = { 0xA2, 0x63, 0xFF, 0x03, 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x47};
+inline int is_unreserved( unsigned char c ) const {
+  if( ( c <= 32 ) || ( c >= 127 ) ) return 0; return 1&(reserved_matrix[(c-32)>>3]>>(c&7));
+}
+
+size_t scan_urlencoded_query(char **string, char *deststring, int flags) {
+  register const unsigned char* s=*(const unsigned char*) string;
+  const unsigned char *d = deststring;
+  register unsigned char b, c;
+
+  while ( is_unreserved( c = *s++) ) {
+    if (c=='%') {
+      if( ( c = scan_fromhex(*s++) ) < 0 ) return -1;
+      if( ( b = scan_fromhex(*s++) ) < 0 ) return -1;
+      c=(c<<4)|b;
+    }
+    *d++ = c;
+  }
+
+  switch( c ) {
+  case 0: case '\r': case '\n': case ' ':
+    if ( flags & BREAK_AT_WHITESPACE == 0 ) return -1;
+    break;
+  case '?':
+    if ( flags & BREAK_AT_QUESTIONMARK == 0 ) return -1;
+    break;
+  case '=':
+    if ( flags & BREAK_AT_EQUALSIGN == 0 ) return -1;
+    break;
+  case '&':
+    if ( flags & BREAK_AT_AMPERSAND == 0 ) return -1;
+    break;
+  default:
+    return -1;
+  }
+
+  *string = s;
+  return d - deststring;
+}
diff --git a/scan_urlencoded_query.h b/scan_urlencoded_query.h
new file mode 100644
index 0000000..379bc32
--- /dev/null
+++ b/scan_urlencoded_query.h
@@ -0,0 +1,20 @@
+#ifdef  __SCAN_URLENCODED_QUERY_H__
+#define __SCAN_URLENCODED_QUERY_H__
+
+#define BREAK_AT_QUESTIONMARK (1<<0)
+#define BREAK_AT_WHITESPACE   (1<<1)
+#define BREAK_AT_AMPERSAND    (1<<2)
+#define BREAK_AT_EQUALSIGN    (1<<3)
+
+#define SCAN_PATH             ( BREAK_AT_QUESTIONMARK | BREAK_AT_WHITESPACE )
+#define SCAN_SEARCHPATH_PARAM ( BREAK_AT_EQUALSIGN )
+#define SCAN_SEARCHPATH_VALUE ( BREAK_AT_AMPERSAND | BREAK_AT_WHITESPACE )
+
+// string     pointer to source, pointer to after terminator on return
+// deststring pointer to destination
+// flags      determines, what to parse
+// returns    number of valid converted characters in deststring
+//            or -1 for parse error
+size_t scan_urlencoded_query(char **string, char *deststring, int flags);
+
+#endif
diff --git a/trackerlogic.c b/trackerlogic.c
index 735041e..6274c41 100644
--- a/trackerlogic.c
+++ b/trackerlogic.c
@@ -162,7 +162,7 @@ void return_peers_for_torrent( ot_torrent torrent, unsigned long amount, char *r
 
 // Compacts a torrents peer list
 // * torrents older than OT_TIMEOUT are being kicked
-// * is rather expansive
+// * is rather expensive
 // * if this fails, torrent file is invalid, should add flag
 //
 void heal_torrent( ot_torrent torrent ) {
@@ -269,7 +269,7 @@ int init_logic( char *directory ) {
 
   // Scan directory for filenames in the form [0-9A-F]{20}
   // * I know this looks ugly, but I've seen A-F to match umlauts as well in strange locales
-  // * lower case for .. better being safe than sorry, this is not expansive here :)
+  // * lower case for .. better being safe than sorry, this is not expensive here :)
   if( !glob(
     "[0-9ABCDEFabcdef][0-9ABCDEFabcdef][0-9ABCDEFabcdef][0-9ABCDEFabcdef]"
     "[0-9ABCDEFabcdef][0-9ABCDEFabcdef][0-9ABCDEFabcdef][0-9ABCDEFabcdef]"
-- 
cgit v1.2.3