diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/dumpindex2.c | 33 | ||||
| -rw-r--r-- | src/dumppointers2.c | 127 | ||||
| -rw-r--r-- | src/mystdlib.c | 54 | ||||
| -rw-r--r-- | src/mystdlib.h | 32 | ||||
| -rw-r--r-- | src/sortindex.c | 81 |
5 files changed, 327 insertions, 0 deletions
diff --git a/src/dumpindex2.c b/src/dumpindex2.c new file mode 100644 index 0000000..f49a329 --- /dev/null +++ b/src/dumpindex2.c | |||
| @@ -0,0 +1,33 @@ | |||
| 1 | #include "mystdlib.h" | ||
| 2 | #include <sys/mman.h> | ||
| 3 | #include <fcntl.h> | ||
| 4 | |||
| 5 | static int indexed = -1; | ||
| 6 | |||
| 7 | int main( int argc, char **argv ) { | ||
| 8 | MAP index = NULL; | ||
| 9 | int i,j; | ||
| 10 | char out[50]; | ||
| 11 | |||
| 12 | if( argc != 3 ) | ||
| 13 | { fputs( "Syntax: sortindex <indexedfile> <indexfile>", stderr); exit( 1 ); } | ||
| 14 | |||
| 15 | if( ( indexed = open( argv[1], O_RDONLY ) ) == -1 ) | ||
| 16 | { fprintf( stderr, "Could not open file: %s\n", argv[1] ); exit( 1 ); } | ||
| 17 | |||
| 18 | if( !(index = map_file( argv[2], 0 ) ) ) exit( 1 ); | ||
| 19 | |||
| 20 | for( i = 0; i < index->size; i+= 16 ) { | ||
| 21 | unsigned char *x = i + (unsigned char*)index->addr; | ||
| 22 | unsigned long p = *(unsigned long*)x; | ||
| 23 | |||
| 24 | pread( indexed, out, 40, (off_t)p ); | ||
| 25 | for( j=0;j<40;++j) if( out[j] == '\t' || out[j] == '\n' ) out[j] = 0; out[j] = 0; | ||
| 26 | puts( out ); | ||
| 27 | } | ||
| 28 | |||
| 29 | unmap_file( &index ); | ||
| 30 | close( indexed ); | ||
| 31 | |||
| 32 | return 0; | ||
| 33 | } | ||
diff --git a/src/dumppointers2.c b/src/dumppointers2.c new file mode 100644 index 0000000..80a4b61 --- /dev/null +++ b/src/dumppointers2.c | |||
| @@ -0,0 +1,127 @@ | |||
| 1 | #include <stdio.h> | ||
| 2 | #include <unistd.h> | ||
| 3 | #include <fcntl.h> | ||
| 4 | |||
| 5 | #define BUFSIZE 0x1000000 | ||
| 6 | #define BUFLOW 0x400 | ||
| 7 | static int infile = -1; | ||
| 8 | static unsigned char nblock[ BUFSIZE + 16 ]; | ||
| 9 | static unsigned char*inblock = nblock + 16; | ||
| 10 | static unsigned long inblockoffs = 0; | ||
| 11 | |||
| 12 | static unsigned long infilesize = 0; | ||
| 13 | static unsigned long infileoffs = 0; | ||
| 14 | static unsigned char to_lower[]; | ||
| 15 | static unsigned char run = 1; | ||
| 16 | |||
| 17 | static void buffer_lowwatermark( void ) | ||
| 18 | { | ||
| 19 | int i; | ||
| 20 | if( infilesize - infileoffs < BUFSIZE ) | ||
| 21 | return; | ||
| 22 | memmove( inblock, inblock + inblockoffs, BUFSIZE - inblockoffs ); | ||
| 23 | read( infile, inblock + (BUFSIZE - inblockoffs), inblockoffs); | ||
| 24 | for ( i = BUFSIZE - inblockoffs; i<BUFSIZE; ++i) inblock[i] = to_lower[inblock[i]]; | ||
| 25 | infileoffs += inblockoffs; | ||
| 26 | inblockoffs = 0; | ||
| 27 | } | ||
| 28 | |||
| 29 | static void buffer_init( void ) | ||
| 30 | { | ||
| 31 | int i; | ||
| 32 | read( infile, inblock, BUFSIZE); | ||
| 33 | for ( i = 0; i<BUFSIZE; ++i) inblock[i] = to_lower[inblock[i]]; | ||
| 34 | } | ||
| 35 | |||
| 36 | static unsigned char to_lower[] = { | ||
| 37 | 0x00, 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x01, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, | ||
| 38 | 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, | ||
| 39 | 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, | ||
| 40 | 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, | ||
| 41 | 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, | ||
| 42 | 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, | ||
| 43 | 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, | ||
| 44 | 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, | ||
| 45 | 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, | ||
| 46 | 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, | ||
| 47 | 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, | ||
| 48 | 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, | ||
| 49 | 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, | ||
| 50 | 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xF6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xFC, 0xDD, 0xDE, 0xDF, | ||
| 51 | 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, | ||
| 52 | 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF | ||
| 53 | }; | ||
| 54 | |||
| 55 | int makefile( int currrow, unsigned char a, unsigned char b) | ||
| 56 | { | ||
| 57 | int fid; char fn[16]; sprintf( fn, "%02d/%2X%2X", currrow, a, b); | ||
| 58 | fid = open( fn, O_RDWR | O_CREAT, 0644 ); | ||
| 59 | if( fid == -1 ) { | ||
| 60 | fprintf( stderr, "Unable to open: %s\n", fn ); | ||
| 61 | exit( 1 ); | ||
| 62 | } | ||
| 63 | return fid; | ||
| 64 | } | ||
| 65 | |||
| 66 | int main( int args, char **argv ) | ||
| 67 | { | ||
| 68 | int currrow = 1, inrow = 0, nowfd, use, mycount = 0; | ||
| 69 | unsigned long row; | ||
| 70 | static int fd[0x10000][17]; | ||
| 71 | |||
| 72 | memset( fd, 0, sizeof(fd)); | ||
| 73 | setvbuf( stdout, NULL, _IONBF, 0); | ||
| 74 | |||
| 75 | if( (args != 3) || ((row = atol(argv[1]))==0)) | ||
| 76 | { | ||
| 77 | fprintf( stderr, "syntax: %s row toindex\n", *argv ); | ||
| 78 | exit( 1 ); | ||
| 79 | } | ||
| 80 | |||
| 81 | if( ( infile = open( argv[2], O_RDONLY ) ) == -1 ) | ||
| 82 | { | ||
| 83 | fprintf( stderr, "Could not open file %s.\n", argv[2] ); | ||
| 84 | exit( 1 ); | ||
| 85 | } | ||
| 86 | getfilesize( infile, &infilesize ); | ||
| 87 | |||
| 88 | buffer_init(); | ||
| 89 | |||
| 90 | row = 1 << row; | ||
| 91 | use = row & 1; | ||
| 92 | while( 1 ) | ||
| 93 | { | ||
| 94 | unsigned char a, b; | ||
| 95 | switch( a = inblock[inblockoffs++] ) | ||
| 96 | { | ||
| 97 | case 0 : | ||
| 98 | currrow = 0; | ||
| 99 | if( inblockoffs > BUFSIZE - BUFLOW ) | ||
| 100 | buffer_lowwatermark(); | ||
| 101 | if( infilesize <= infileoffs + inblockoffs) | ||
| 102 | return 0; | ||
| 103 | if( !(mycount++ & 255) ) | ||
| 104 | printf( "%cSo far: %010.6f%% (%010lu / %010lu)", 13, 100.00 * (double)(infileoffs+inblockoffs) / (double)infilesize, infileoffs+inblockoffs, infilesize ); | ||
| 105 | // Fall through | ||
| 106 | case 1 : | ||
| 107 | use = (1<<currrow) & row; currrow++; inrow = 0; | ||
| 108 | break; | ||
| 109 | default : | ||
| 110 | if( use && ( (b = inblock[inblockoffs] ) > 1 ) ) | ||
| 111 | { | ||
| 112 | if( !fd[ b | (a*256) ][currrow] ) | ||
| 113 | nowfd = fd[ b | (a*256) ][currrow] = makefile( currrow, a, b); | ||
| 114 | else | ||
| 115 | nowfd = fd[ b | (a*256) ][currrow]; | ||
| 116 | |||
| 117 | *(unsigned long*)(inblock+inblockoffs-4) = infileoffs + inblockoffs - 1 - inrow; | ||
| 118 | inblock[inblockoffs] = inrow++; | ||
| 119 | write( nowfd, inblock+inblockoffs-4, 16); | ||
| 120 | inblock[inblockoffs] = b; | ||
| 121 | break; | ||
| 122 | } | ||
| 123 | } | ||
| 124 | } | ||
| 125 | |||
| 126 | return 0; | ||
| 127 | } | ||
diff --git a/src/mystdlib.c b/src/mystdlib.c new file mode 100644 index 0000000..2deda22 --- /dev/null +++ b/src/mystdlib.c | |||
| @@ -0,0 +1,54 @@ | |||
| 1 | #include <sys/types.h> | ||
| 2 | #include <sys/stat.h> | ||
| 3 | #include <sys/mman.h> | ||
| 4 | #include <unistd.h> | ||
| 5 | #include <fcntl.h> | ||
| 6 | #include <stdio.h> | ||
| 7 | |||
| 8 | #include "mystdlib.h" | ||
| 9 | |||
| 10 | MAP map_file( char *filename, int readonly ) | ||
| 11 | { | ||
| 12 | struct stat fstatus; | ||
| 13 | MAP map = (MAP)malloc( sizeof( *map )); | ||
| 14 | |||
| 15 | if( map ) | ||
| 16 | { | ||
| 17 | memset( map, 0, sizeof( *map )); | ||
| 18 | |||
| 19 | if( ( map->fh = open( filename, readonly ? O_RDONLY : O_RDWR ) ) >= 0 ) | ||
| 20 | { | ||
| 21 | fstat( map->fh, &fstatus ); | ||
| 22 | if( ( map->addr = mmap( NULL, map->size = (size_t)fstatus.st_size, | ||
| 23 | PROT_READ | ( readonly ? 0 : PROT_WRITE), MAP_NOCORE | (readonly ? 0 : MAP_SHARED), map->fh, 0) ) == MAP_FAILED ) | ||
| 24 | { | ||
| 25 | fprintf( stderr, "Mapping file '%s' failed\n", filename ); | ||
| 26 | close( map->fh ); free( map ); map = NULL; | ||
| 27 | } | ||
| 28 | } else { | ||
| 29 | fprintf( stderr, "Couldn't open file: '%s'\n", filename ); | ||
| 30 | free( map ); map = NULL; | ||
| 31 | } | ||
| 32 | } else { | ||
| 33 | fputs( "Couldn't allocate memory", stderr ); | ||
| 34 | } | ||
| 35 | |||
| 36 | return map; | ||
| 37 | } | ||
| 38 | |||
| 39 | void unmap_file ( MAP *pMap ) | ||
| 40 | { | ||
| 41 | if( !pMap || !*pMap ) return; | ||
| 42 | munmap( (*pMap)->addr, (*pMap)->size); | ||
| 43 | close( (*pMap)->fh); | ||
| 44 | free( *pMap ); *pMap = NULL; | ||
| 45 | } | ||
| 46 | |||
| 47 | int getfilesize( int fd, unsigned long *size) | ||
| 48 | { | ||
| 49 | struct stat sb; | ||
| 50 | int ret; | ||
| 51 | if( fstat( fd, &sb )) return -1; | ||
| 52 | *size = sb.st_size; | ||
| 53 | return 0; | ||
| 54 | } | ||
diff --git a/src/mystdlib.h b/src/mystdlib.h new file mode 100644 index 0000000..2e9499f --- /dev/null +++ b/src/mystdlib.h | |||
| @@ -0,0 +1,32 @@ | |||
| 1 | #include <sys/types.h> | ||
| 2 | #include <stdio.h> | ||
| 3 | |||
| 4 | typedef struct { int fh; unsigned char *addr; size_t size; } *MAP; | ||
| 5 | |||
| 6 | /* Mapps a file into memory | ||
| 7 | returns pointer to the mapping struct, | ||
| 8 | containing the file's size, the mapped | ||
| 9 | address and its file handle. | ||
| 10 | |||
| 11 | If readonly is true, the file will be | ||
| 12 | opened and mapped read only. File is | ||
| 13 | opened and mapped writable, if false. | ||
| 14 | |||
| 15 | Returns NULL if memory could not be | ||
| 16 | allocated, file could not be opened or | ||
| 17 | mapped. Gives out an diagnostic message | ||
| 18 | on stderr | ||
| 19 | */ | ||
| 20 | MAP map_file( char *filename, int readonly ); | ||
| 21 | |||
| 22 | /* Unmapps a file from memory. NULL pointer | ||
| 23 | checks are being done, so this is safe | ||
| 24 | to be called from cleanup without knowing | ||
| 25 | whether there actually is a map. | ||
| 26 | */ | ||
| 27 | void unmap_file ( MAP *pMap ); | ||
| 28 | |||
| 29 | /* Gets file size of open file | ||
| 30 | returns != 0 in case of error */ | ||
| 31 | inline int getfilesize( int fd, unsigned long *size ); | ||
| 32 | |||
diff --git a/src/sortindex.c b/src/sortindex.c new file mode 100644 index 0000000..b3b3bfe --- /dev/null +++ b/src/sortindex.c | |||
| @@ -0,0 +1,81 @@ | |||
| 1 | #include "mystdlib.h" | ||
| 2 | #include <sys/mman.h> | ||
| 3 | #include <fcntl.h> | ||
| 4 | |||
| 5 | static int indexed = -1; | ||
| 6 | static unsigned char to_lower[] = { | ||
| 7 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||
| 8 | 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, | ||
| 9 | 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, | ||
| 10 | 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, | ||
| 11 | 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, | ||
| 12 | 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, | ||
| 13 | 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, | ||
| 14 | 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, | ||
| 15 | 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, | ||
| 16 | 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, | ||
| 17 | 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, | ||
| 18 | 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, | ||
| 19 | 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, | ||
| 20 | 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xF6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xFC, 0xDD, 0xDE, 0xDF, | ||
| 21 | 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, | ||
| 22 | 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF | ||
| 23 | }; | ||
| 24 | |||
| 25 | int | ||
| 26 | mystrcasecmp(const unsigned char *s1, const unsigned char *s2) { | ||
| 27 | while (to_lower[*s1] == to_lower[*s2++]) | ||
| 28 | if ( to_lower[*s1++] == '\0' ) return (0); | ||
| 29 | return (to_lower[*s1] - to_lower[*--s2]); | ||
| 30 | } | ||
| 31 | |||
| 32 | int | ||
| 33 | mystrcasecmp2(const unsigned char *s1, const unsigned char *s2) { | ||
| 34 | int i = 0; | ||
| 35 | while (i++<11 && to_lower[*s1] == to_lower[*s2++]) | ||
| 36 | if ( to_lower[*s1++] == '\0' ) return (0); | ||
| 37 | if( i != 11 ) | ||
| 38 | return (to_lower[*s1] - to_lower[*--s2]); | ||
| 39 | } | ||
| 40 | |||
| 41 | int cb_compare( const void* a, const void* b) { | ||
| 42 | int result; | ||
| 43 | |||
| 44 | result = mystrcasecmp2( 5+(unsigned char*)a, 5+(unsigned char*)b ); | ||
| 45 | if( result != 0x1000 ) | ||
| 46 | return result; | ||
| 47 | |||
| 48 | { | ||
| 49 | void *myptr1 = mmap( NULL, 1024, PROT_READ, MAP_NOCORE, indexed, 16+(off_t)*(unsigned long*)a ); | ||
| 50 | void *myptr2 = mmap( NULL, 1024, PROT_READ, MAP_NOCORE, indexed, 16+(off_t)*(unsigned long*)b ); | ||
| 51 | |||
| 52 | if( myptr1 && myptr2) | ||
| 53 | result = mystrcasecmp( myptr1, myptr2 ); | ||
| 54 | else { | ||
| 55 | fprintf( stderr, "Mapping during sort failed.\n" ); | ||
| 56 | exit( 1 ); | ||
| 57 | } | ||
| 58 | munmap( myptr1, 1024 ); | ||
| 59 | munmap( myptr2, 1024 ); | ||
| 60 | } | ||
| 61 | return result; | ||
| 62 | } | ||
| 63 | |||
| 64 | int main( int argc, char **argv ) { | ||
| 65 | MAP index = NULL; | ||
| 66 | |||
| 67 | if( argc != 3 ) | ||
| 68 | { fputs( "Syntax: sortindex <indexedfile> <indexfile>", stderr); exit( 1 ); } | ||
| 69 | |||
| 70 | if( ( indexed = open( argv[1], O_RDONLY ) ) == -1 ) | ||
| 71 | { fprintf( stderr, "Could not open file: %s\n", argv[1] ); exit( 1 ); } | ||
| 72 | |||
| 73 | if( !(index = map_file( argv[2], 0 ) ) ) exit( 1 ); | ||
| 74 | |||
| 75 | qsort( index->addr, index->size / 16, 16, cb_compare ); | ||
| 76 | |||
| 77 | unmap_file( &index ); | ||
| 78 | close( indexed ); | ||
| 79 | |||
| 80 | return 0; | ||
| 81 | } | ||
