From 046857dfb88f05e6b310fe9ef07b9f2d3ac5922d Mon Sep 17 00:00:00 2001 From: Dirk Engling Date: Thu, 20 Feb 2014 22:42:56 +0100 Subject: Restructure project, make names more clear --- src/Makefile | 25 ++++---- src/convertcoords.c | 40 ------------- src/decompress.c | 61 ------------------- src/export/convert_coords.c | 40 +++++++++++++ src/export/extract_version_2.c | 131 +++++++++++++++++++++++++++++++++++++++++ src/export/extract_version_3.c | 61 +++++++++++++++++++ src/export/map_coords.c | 62 +++++++++++++++++++ src/export/mystdlib.c | 56 ++++++++++++++++++ src/export/mystdlib.h | 32 ++++++++++ src/export/split_version_2.c | 42 +++++++++++++ src/extractblocks_new.c | 131 ----------------------------------------- src/makecolumns.sh | 42 ++++++------- src/mapcoords.c | 62 ------------------- src/mystdlib.c | 56 ------------------ src/mystdlib.h | 32 ---------- src/splitold.c | 42 ------------- 16 files changed, 458 insertions(+), 457 deletions(-) delete mode 100644 src/convertcoords.c delete mode 100644 src/decompress.c create mode 100644 src/export/convert_coords.c create mode 100644 src/export/extract_version_2.c create mode 100644 src/export/extract_version_3.c create mode 100644 src/export/map_coords.c create mode 100644 src/export/mystdlib.c create mode 100644 src/export/mystdlib.h create mode 100644 src/export/split_version_2.c delete mode 100644 src/extractblocks_new.c delete mode 100644 src/mapcoords.c delete mode 100644 src/mystdlib.c delete mode 100644 src/mystdlib.h delete mode 100644 src/splitold.c diff --git a/src/Makefile b/src/Makefile index 1a09455..21370a6 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,19 +1,20 @@ -all: decompress extractblocks mapcoords convertcoords splitold +BINARIES=../bin/extract_version_2 ../bin/extract_version_3 ../bin/split_version_2 ../bin/map_coords ../bin/convert_coords +binaries: $(BINARIES) -decompress: decompress.c mystdlib.c - @$(CC) -O2 -o ../bin/decompress decompress.c mystdlib.c -lz +../bin/extract_version_3: export/extract_version_3.c export/mystdlib.c + @$(CC) -O2 -o ../bin/extract_version_3 export/extract_version_3.c export/mystdlib.c -lz -I export -extractblocks: extractblocks_new.c mystdlib.c - @$(CC) -o ../bin/extractblocks extractblocks_new.c mystdlib.c +../bin/extract_version_2: export/extract_version_2.c export/mystdlib.c + @$(CC) -O2 -o ../bin/extract_version_2 export/extract_version_2.c export/mystdlib.c -I export -mapcoords: mapcoords.c mystdlib.c - @$(CC) -o ../bin/mapcoords mapcoords.c mystdlib.c +../bin/split_version_2: export/split_version_2.c export/mystdlib.c + @$(CC) -o ../bin/split_version_2 export/split_version_2.c export/mystdlib.c -I export -splitold: splitold.c mystdlib.c - @$(CC) -o ../bin/splitold splitold.c mystdlib.c +../bin/map_coords: export/map_coords.c export/mystdlib.c + @$(CC) -o ../bin/map_coords export/map_coords.c export/mystdlib.c -I export -convertcoords: convertcoords.c - @$(CC) -o ../bin/convertcoords convertcoords.c -lm +../bin/convert_coords: export/convert_coords.c + @$(CC) -o ../bin/convert_coords export/convert_coords.c -lm clean: - @rm -f ../bin/{decompress,extractblocks,mapcoords,convertcoords,splitold} + @rm -f $(BINARIES) diff --git a/src/convertcoords.c b/src/convertcoords.c deleted file mode 100644 index 37d780a..0000000 --- a/src/convertcoords.c +++ /dev/null @@ -1,40 +0,0 @@ -#include -#include -#include -int main(int argc, char *argv[]) -{ - double x, y; - char buf[64]; - int in; - while( fgets( buf, sizeof(buf), stdin ) ) - { - if( sscanf( buf, "%lf %lf", &x, &y ) == 2 ) { - double R = 6365000; - double fe = 5200000; - double fn = 1200000; - double ph0 = 0.5235977; // 30deg - double ph1 = 0.7853980; // 45deg - double ph2 = 0.9599309; // 55deg - double l0 = 0.1745329; // 10deg - - double xs = (x-fe)/R; - double ys = (y-fn)/R; - double ph0_s = 0.25*M_PI+0.5*ph0; - double ph1_s = 0.25*M_PI+0.5*ph1; - double ph2_s = 0.25*M_PI+0.5*ph2; - - double n = log(cos(ph1)/cos(ph2))/log(tan(ph2_s)/tan(ph1_s)); - double F = cos(ph1)*pow(tan(ph1_s),n)/n; - double r0 = F / pow(tan(ph0_s),n); - double r = sqrt(pow(xs,2)+pow(r0-ys,2)); - double th = atan(xs/(r0-ys)); - - double lon = l0+th/n; - double lat = 2.0*atan(pow(F/r,1.0/n))-0.5*M_PI; - - printf("%lf\t%lf\n", lat*180.0/M_PI, lon*180.0/M_PI); - } else - printf("\t\n"); - } - return 0; -} diff --git a/src/decompress.c b/src/decompress.c deleted file mode 100644 index fef4241..0000000 --- a/src/decompress.c +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "mystdlib.h" - -#define XORLEN (29) -#define HUGEBLOCK (1024*1024) - -int main(int argc, char **argv) { - MAP in = map_file( argv[1], 1 ); - - unsigned const char xorkey [XORLEN] = "Just for Fun. Linus Torvalds."; - unsigned char input [XORLEN]; - unsigned char output [HUGEBLOCK]; - char respath[32]; /* file_XXXXX\0 */ - int i, zres = 0, filenum = 0, resfile; - size_t offs = 0, reported = 0; - - z_stream z; memset( &z, 0, sizeof(z)); - - while( offs < in->size - XORLEN ) { - for( i=0; iaddr[offs+i] ^ xorkey[i]; - z.next_in = input; z.avail_in = XORLEN; - z.next_out = output; z.avail_out = HUGEBLOCK; - inflateInit( &z ); zres = inflate( &z, Z_NO_FLUSH ); - if( (zres != Z_OK) && (zres != Z_STREAM_END) ) - goto error_continue; - - z.next_in = in->addr + offs + XORLEN; z.avail_in = in->size - offs - XORLEN; - while( zres == Z_OK ) zres = inflate( &z, Z_NO_FLUSH ); - - if( zres != Z_STREAM_END ) { -error_continue: - inflateEnd(&z); memset( &z, 0, sizeof(z)); - offs++; - continue; - } - - sprintf( respath, "file_%05X", filenum++ ); - resfile = open( respath, O_RDWR | O_CREAT, 0644 ); - if( resfile < 0 ) { - fprintf( stderr, "Could not open output file %s\n", respath ); - exit(1); - } - write( resfile, output, z.total_out ); - close( resfile ); - offs += z.total_in; - - if( reported < ( offs * 10 ) / in->size ) { - reported++; - printf( "%zd%% ", 10 * reported ); - fflush( stdout ); - } - - inflateEnd(&z); memset( &z, 0, sizeof(z)); - } - unmap_file(&in); -} diff --git a/src/export/convert_coords.c b/src/export/convert_coords.c new file mode 100644 index 0000000..37d780a --- /dev/null +++ b/src/export/convert_coords.c @@ -0,0 +1,40 @@ +#include +#include +#include +int main(int argc, char *argv[]) +{ + double x, y; + char buf[64]; + int in; + while( fgets( buf, sizeof(buf), stdin ) ) + { + if( sscanf( buf, "%lf %lf", &x, &y ) == 2 ) { + double R = 6365000; + double fe = 5200000; + double fn = 1200000; + double ph0 = 0.5235977; // 30deg + double ph1 = 0.7853980; // 45deg + double ph2 = 0.9599309; // 55deg + double l0 = 0.1745329; // 10deg + + double xs = (x-fe)/R; + double ys = (y-fn)/R; + double ph0_s = 0.25*M_PI+0.5*ph0; + double ph1_s = 0.25*M_PI+0.5*ph1; + double ph2_s = 0.25*M_PI+0.5*ph2; + + double n = log(cos(ph1)/cos(ph2))/log(tan(ph2_s)/tan(ph1_s)); + double F = cos(ph1)*pow(tan(ph1_s),n)/n; + double r0 = F / pow(tan(ph0_s),n); + double r = sqrt(pow(xs,2)+pow(r0-ys,2)); + double th = atan(xs/(r0-ys)); + + double lon = l0+th/n; + double lat = 2.0*atan(pow(F/r,1.0/n))-0.5*M_PI; + + printf("%lf\t%lf\n", lat*180.0/M_PI, lon*180.0/M_PI); + } else + printf("\t\n"); + } + return 0; +} diff --git a/src/export/extract_version_2.c b/src/export/extract_version_2.c new file mode 100644 index 0000000..fe85252 --- /dev/null +++ b/src/export/extract_version_2.c @@ -0,0 +1,131 @@ +#include +#include "mystdlib.h" +#include +#include +#include +#include + +/* lha header: + +00 Header length +01 Header checksum [02-length] +02 0x2d ('-') +03 0x6c ('l') +04 0x68 ('h') +05 0x?? ('0' or '5') unsure +06 0x2d ('-') +07 0x?? LSB of compressed size +08 0x?? .. +09 0x00 .. +10 0x00 MSB of compressed size, i.e. 0 +.. +21 Length of path name + + +*/ + +static uint8_t mantra_in[] = { 0x68, 0x35, 0x2d, 0x6c }; + +int main( int args, char **argv ) +{ + int filenum = 0, run = 1; + size_t offset = 0, oldoffset = -1, reported = 0, enc_len = 32; + uint8_t mantra[4], id0, id5, *mappedfile; + MAP map; + + /* For streets we do have a enc_len of 34 */ + while( run ) { + switch( getopt( args, argv, ":e:" ) ) { + case -1 : run = 0; break; + case 'e': + enc_len = atol( optarg ); + break; + default: + fputs( "Syntax: %s [-e encrypted_length (default: 32, for streets 34 or 0)] path-to-teiln.dat", stderr ); exit( 1 ); + break; + } + } + run = 1; + + if( optind == args ) + { fputs( "Missing filename.", stderr ); exit( 1 ); } + + map = map_file( argv[optind], 1 ); + mappedfile = map->addr; + + mantra[0] = mantra_in[0] ^ mappedfile[4]; + mantra[1] = mantra_in[1] ^ mappedfile[5]; + mantra[2] = mantra_in[2] ^ mappedfile[2]; + mantra[3] = mantra_in[3] ^ mappedfile[3]; + + id0 = mappedfile[0]; + id5 = mappedfile[5]; + + while( run ) + { + while( ( offset < map->size ) && ( + ( mappedfile[ offset + 0 ] != id0 ) || + ( mappedfile[ offset + 2 ] != ( '-' ^ mantra[2] )) || + ( mappedfile[ offset + 3 ] != ( 'l' ^ mantra[3] )) || + ( mappedfile[ offset + 4 ] != ( 'h' ^ mantra[0] )) || + ( mappedfile[ offset + 5 ] != id5 ) || + ( mappedfile[ offset + 6 ] != ( '-' ^ mantra[2] )) + ) ) offset++; + + // printf( "Found an appropriate offset at: %zd\n", offset ); + + if( reported < ( offset * 10 ) / map->size ) + { + reported++; + printf( "%zd%% ", 10 * reported ); + fflush( stdout ); + } + + if( offset == map->size ) + run = 0; + + if( oldoffset != -1 ) + { + uint8_t *mf = mappedfile + oldoffset, df[128]; + size_t filename_len, header_len; + char filename_template[32], filename[32]; + int i; + + /* De-"crypt" obfuscation to our header copy */ + for( i=0; i header_len ) { + write( i, df, enc_len ); + write( i, mf + enc_len, offset - oldoffset - enc_len ); + } else { + write( i, df, header_len ); + write( i, mf + header_len, offset - oldoffset - header_len ); + } + close( i ); + } + oldoffset = offset; + offset++; + } + + unmap_file( &map ); + return 0; +} diff --git a/src/export/extract_version_3.c b/src/export/extract_version_3.c new file mode 100644 index 0000000..fef4241 --- /dev/null +++ b/src/export/extract_version_3.c @@ -0,0 +1,61 @@ +#include +#include +#include +#include +#include +#include +#include "mystdlib.h" + +#define XORLEN (29) +#define HUGEBLOCK (1024*1024) + +int main(int argc, char **argv) { + MAP in = map_file( argv[1], 1 ); + + unsigned const char xorkey [XORLEN] = "Just for Fun. Linus Torvalds."; + unsigned char input [XORLEN]; + unsigned char output [HUGEBLOCK]; + char respath[32]; /* file_XXXXX\0 */ + int i, zres = 0, filenum = 0, resfile; + size_t offs = 0, reported = 0; + + z_stream z; memset( &z, 0, sizeof(z)); + + while( offs < in->size - XORLEN ) { + for( i=0; iaddr[offs+i] ^ xorkey[i]; + z.next_in = input; z.avail_in = XORLEN; + z.next_out = output; z.avail_out = HUGEBLOCK; + inflateInit( &z ); zres = inflate( &z, Z_NO_FLUSH ); + if( (zres != Z_OK) && (zres != Z_STREAM_END) ) + goto error_continue; + + z.next_in = in->addr + offs + XORLEN; z.avail_in = in->size - offs - XORLEN; + while( zres == Z_OK ) zres = inflate( &z, Z_NO_FLUSH ); + + if( zres != Z_STREAM_END ) { +error_continue: + inflateEnd(&z); memset( &z, 0, sizeof(z)); + offs++; + continue; + } + + sprintf( respath, "file_%05X", filenum++ ); + resfile = open( respath, O_RDWR | O_CREAT, 0644 ); + if( resfile < 0 ) { + fprintf( stderr, "Could not open output file %s\n", respath ); + exit(1); + } + write( resfile, output, z.total_out ); + close( resfile ); + offs += z.total_in; + + if( reported < ( offs * 10 ) / in->size ) { + reported++; + printf( "%zd%% ", 10 * reported ); + fflush( stdout ); + } + + inflateEnd(&z); memset( &z, 0, sizeof(z)); + } + unmap_file(&in); +} diff --git a/src/export/map_coords.c b/src/export/map_coords.c new file mode 100644 index 0000000..b46f1cf --- /dev/null +++ b/src/export/map_coords.c @@ -0,0 +1,62 @@ +#define _WITH_GETLINE +#include "mystdlib.h" +#include +#include +#include +#include + +int find_offset( const void *key, const void *line ) +{ + size_t l = strlen( (char*)key ); + return strncmp( (char*)key, *(char**)line, l ); +} + +int qsort_cmp( const void *a, const void *b ) +{ + return strcmp( *(char**)a, *(char**)b ); +} + +int main( int argc, char ** args ) +{ + MAP coords = map_file( args[1], 1 ); + int i, l, lines; + char *p, **offsets, *input = malloc(1024); + ssize_t ll; + size_t input_length = 1024; + + if( !coords ) exit( 111 ); + p = (char *)coords->addr; + for ( i=0, lines=0; isize; ++i ) + if( p[i] == 0x00 ) + ++lines; + + offsets = malloc( lines * sizeof(char*)); + if( !offsets ) exit( 111 ); + + offsets[0] = p; l = 1; + for ( i=0; isize; ++i ) + if( p[i] == 0x00 ) + offsets[l++] = p+i+1; + + l--; qsort(offsets, l, sizeof(char*), qsort_cmp ); + + while( ( ll = getline( &input, &input_length, stdin ) ) >= 0 ) + { + char **coord_line; + input[ll-1]='\t'; + coord_line = bsearch( input, offsets, l, sizeof(char*), find_offset ); + if( !coord_line && ll > 2 && isalpha( input[ll-2] ) ) + { + input[ll-2] = '\t'; input[ll-1]=0; + ll--; + coord_line = bsearch( input, offsets, l, sizeof(char*), find_offset ); + } + + if( coord_line ) + printf( "%s\n", *coord_line + ll ); + else + puts( "\t" ); + } + + return 0; +} diff --git a/src/export/mystdlib.c b/src/export/mystdlib.c new file mode 100644 index 0000000..17f123b --- /dev/null +++ b/src/export/mystdlib.c @@ -0,0 +1,56 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mystdlib.h" + +MAP map_file( char *filename, int readonly ) +{ + struct stat fstatus; + MAP map = (MAP)malloc( sizeof( *map )); + + if( map ) + { + memset( map, 0, sizeof( *map )); + + if( ( map->fh = open( filename, readonly ? O_RDONLY : O_RDWR ) ) >= 0 ) + { + fstat( map->fh, &fstatus ); + if( ( map->addr = mmap( NULL, map->size = (size_t)fstatus.st_size, + PROT_READ | ( readonly ? 0 : PROT_WRITE), (readonly ? MAP_PRIVATE : MAP_SHARED), map->fh, 0) ) == MAP_FAILED ) + { + fprintf( stderr, "Mapping file '%s' failed\n", filename ); + close( map->fh ); free( map ); map = NULL; + } + } else { + fprintf( stderr, "Couldn't open file: '%s'\n", filename ); + free( map ); map = NULL; + } + } else { + fputs( "Couldn't allocate memory", stderr ); + } + + return map; +} + +void unmap_file ( MAP *pMap ) +{ + if( !pMap || !*pMap ) return; + munmap( (*pMap)->addr, (*pMap)->size); + close( (*pMap)->fh); + free( *pMap ); *pMap = NULL; +} + +int getfilesize( int fd, unsigned long *size) +{ + struct stat sb; + int ret; + if( fstat( fd, &sb )) return -1; + *size = sb.st_size; + return 0; +} diff --git a/src/export/mystdlib.h b/src/export/mystdlib.h new file mode 100644 index 0000000..2e9499f --- /dev/null +++ b/src/export/mystdlib.h @@ -0,0 +1,32 @@ +#include +#include + +typedef struct { int fh; unsigned char *addr; size_t size; } *MAP; + +/* Mapps a file into memory + returns pointer to the mapping struct, + containing the file's size, the mapped + address and its file handle. + + If readonly is true, the file will be + opened and mapped read only. File is + opened and mapped writable, if false. + + Returns NULL if memory could not be + allocated, file could not be opened or + mapped. Gives out an diagnostic message + on stderr +*/ +MAP map_file( char *filename, int readonly ); + +/* Unmapps a file from memory. NULL pointer + checks are being done, so this is safe + to be called from cleanup without knowing + whether there actually is a map. +*/ +void unmap_file ( MAP *pMap ); + +/* Gets file size of open file + returns != 0 in case of error */ +inline int getfilesize( int fd, unsigned long *size ); + diff --git a/src/export/split_version_2.c b/src/export/split_version_2.c new file mode 100644 index 0000000..bd85775 --- /dev/null +++ b/src/export/split_version_2.c @@ -0,0 +1,42 @@ +#include +#include +#include +#include +#include +#include + +int main( int argc, char **args ) { + char table[64], f[1024*1024*16]; + int outfiles[64], i, off, base = 0; + uint32_t fixed_columns = 0; + + if( argc > 1 ) base = atol( args[1] ); + if( argc > 2 ) fixed_columns = atol( args[2] ); + + for( i=0; i<64; ++i ) outfiles[i] = -1; + while( fgets( table, sizeof(table), stdin ) ) { + int off = ( table[strlen(table)-1] = 0 ); /* fgets sucks */ + int f_in = open( table, O_RDONLY ); + size_t s_in = read( f_in, f, sizeof(f)); + uint32_t *p = (uint32_t*)f; + uint32_t count = p[0], columns = fixed_columns ? fixed_columns : p[1] / 4 - 1; + unsigned int file, strnr; + + close(f_in); + + for( file=0; file strings long */ + for( strnr=0; strnr < count; ++strnr ) { while( f[off++] ) {}; f[off-1] = '\n'; } + write( outfiles[file], f + p[file+1], off - p[file+1] ); + } + } + for( i=0; i<64; ++i ) close( outfiles[i] ); + return 0; +} diff --git a/src/extractblocks_new.c b/src/extractblocks_new.c deleted file mode 100644 index fe85252..0000000 --- a/src/extractblocks_new.c +++ /dev/null @@ -1,131 +0,0 @@ -#include -#include "mystdlib.h" -#include -#include -#include -#include - -/* lha header: - -00 Header length -01 Header checksum [02-length] -02 0x2d ('-') -03 0x6c ('l') -04 0x68 ('h') -05 0x?? ('0' or '5') unsure -06 0x2d ('-') -07 0x?? LSB of compressed size -08 0x?? .. -09 0x00 .. -10 0x00 MSB of compressed size, i.e. 0 -.. -21 Length of path name - - -*/ - -static uint8_t mantra_in[] = { 0x68, 0x35, 0x2d, 0x6c }; - -int main( int args, char **argv ) -{ - int filenum = 0, run = 1; - size_t offset = 0, oldoffset = -1, reported = 0, enc_len = 32; - uint8_t mantra[4], id0, id5, *mappedfile; - MAP map; - - /* For streets we do have a enc_len of 34 */ - while( run ) { - switch( getopt( args, argv, ":e:" ) ) { - case -1 : run = 0; break; - case 'e': - enc_len = atol( optarg ); - break; - default: - fputs( "Syntax: %s [-e encrypted_length (default: 32, for streets 34 or 0)] path-to-teiln.dat", stderr ); exit( 1 ); - break; - } - } - run = 1; - - if( optind == args ) - { fputs( "Missing filename.", stderr ); exit( 1 ); } - - map = map_file( argv[optind], 1 ); - mappedfile = map->addr; - - mantra[0] = mantra_in[0] ^ mappedfile[4]; - mantra[1] = mantra_in[1] ^ mappedfile[5]; - mantra[2] = mantra_in[2] ^ mappedfile[2]; - mantra[3] = mantra_in[3] ^ mappedfile[3]; - - id0 = mappedfile[0]; - id5 = mappedfile[5]; - - while( run ) - { - while( ( offset < map->size ) && ( - ( mappedfile[ offset + 0 ] != id0 ) || - ( mappedfile[ offset + 2 ] != ( '-' ^ mantra[2] )) || - ( mappedfile[ offset + 3 ] != ( 'l' ^ mantra[3] )) || - ( mappedfile[ offset + 4 ] != ( 'h' ^ mantra[0] )) || - ( mappedfile[ offset + 5 ] != id5 ) || - ( mappedfile[ offset + 6 ] != ( '-' ^ mantra[2] )) - ) ) offset++; - - // printf( "Found an appropriate offset at: %zd\n", offset ); - - if( reported < ( offset * 10 ) / map->size ) - { - reported++; - printf( "%zd%% ", 10 * reported ); - fflush( stdout ); - } - - if( offset == map->size ) - run = 0; - - if( oldoffset != -1 ) - { - uint8_t *mf = mappedfile + oldoffset, df[128]; - size_t filename_len, header_len; - char filename_template[32], filename[32]; - int i; - - /* De-"crypt" obfuscation to our header copy */ - for( i=0; i header_len ) { - write( i, df, enc_len ); - write( i, mf + enc_len, offset - oldoffset - enc_len ); - } else { - write( i, df, header_len ); - write( i, mf + header_len, offset - oldoffset - header_len ); - } - close( i ); - } - oldoffset = offset; - offset++; - } - - unmap_file( &map ); - return 0; -} diff --git a/src/makecolumns.sh b/src/makecolumns.sh index 04806fb..28dd0a8 100755 --- a/src/makecolumns.sh +++ b/src/makecolumns.sh @@ -20,7 +20,7 @@ main() { fi # Compile all the binaries - make all + make binaries printf "Cleaning up old working directory ... " rm -rf ../work_`basename ${1#white_}` @@ -29,18 +29,18 @@ main() { cd ../work_`basename ${1#white_}` || exit 1 if [ -f "$1/phonebook.db" ]; then - handle_new_format $1 + handle_format_version_3 $1 elif [ -f $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] ]; then - handle_old_format $1 + handle_format_version_2 $1 else echo "Not a recognized Telefonbuch folder" fi cd .. } -do_decompress_old() { +do_decompress_version_2() { printf "Extracting $2 chunks ... " - extractblocks "${1}" + extract_version_2 "${1}" printf "done.\n" printf "Decompressing $2 chunks ... " @@ -55,10 +55,10 @@ do_decompress_old() { printf "done.\n" } -do_processfile_old() { +do_processfile_version_2() { working_on=`basename ${1}` mkdir $working_on && cd ${working_on} - do_decompress_old "${1}" "${2}" + do_decompress_version_2 "${1}" "${2}" cd .. printf "Combining $2 into single file ... " @@ -81,10 +81,10 @@ get_dword() { hexdump -n 4 -v -e '" " 1/4 "%u"' `printf %0${filename_len}d ${1}` } -handle_old_format() { +handle_format_version_2() { echo "Working on $1. Detected pre-2004 Telefonbuch version." # Extract teiln.dat - do_decompress_old $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] "teiln.dat" + do_decompress_version_2 $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] "teiln.dat" # See how long each filename is export filename_len=$(( `ls | head -n 1 | wc -c` - 1 )) @@ -112,7 +112,7 @@ handle_old_format() { # Now loop over all files and dump them printf "Splitting decompressed nname chunks into their columns ... " - jot -w %0${filename_len}d - ${nname_file} $(( number_of_files - 1 )) 3 | splitold 1 1 + jot -w %0${filename_len}d - ${nname_file} $(( number_of_files - 1 )) 3 | split_version_2 1 1 # set -- `hexdump -n 8 -v -e '" " 1/4 "%u"' ${file}` # tail -c +$(( $2 + 1 )) ${file} # done | tr '\n\0' '\t\n' > 01_02_Flags_Nachname @@ -126,7 +126,7 @@ handle_old_format() { printf "done.\n" printf "Splitting decompress table file chunks into their columns ... " - jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3 | splitold 4 0 + jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3 | split_version_2 4 0 # for file in `jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3`; do # # Offset into first table entry tells us how many # # fields are in table file @@ -162,7 +162,7 @@ handle_old_format() { # If street names come in an extra file, extract # street names first streets=$1/[Dd][Aa][Tt]/[Ss][Tt][Rr][Aa][Ss][Ss][Ee][Nn].[Dd][Aa][Tt] - [ -f ${streets} ] && do_processfile_old ${streets} "street name" 99_Strassenname convert_zeros + [ -f ${streets} ] && do_processfile_version_2 ${streets} "street name" 99_Strassenname convert_zeros # extract street names if 07_unknown contains street indexes # instead of street names @@ -177,27 +177,27 @@ handle_old_format() { karto=$1/[Dd][Aa][Tt]/[Kk][Aa][Rr][Tt][Oo].[Dd][Aa][Tt] if [ -f ${karto} ]; then - do_processfile_old ${karto} "geo coordinates" 90_Geokoordinaten_hnr_raw + do_processfile_version_2 ${karto} "geo coordinates" 90_Geokoordinaten_hnr_raw printf "Looking up geo coordinates for each phonebook entry ... " tr '\0' '\n' < 90_Geokoordinaten_hnr_raw | tr ';' '\t' | cut -f "1,2,3,4,6,7" | tr '\n' '\0' > 90_Geokoordinaten_hnr rm 90_Geokoordinaten_hnr_raw - lam 10_Postleitzahl -s $'\t' 11_Ort -s $'\t' 07_Strasse -s $'\t' 08_Hausnummer | mapcoords 90_Geokoordinaten_hnr | convertcoords > 16_Koordinaten + lam 10_Postleitzahl -s $'\t' 11_Ort -s $'\t' 07_Strasse -s $'\t' 08_Hausnummer | map_coords 90_Geokoordinaten_hnr | convert_coords > 16_Koordinaten printf "done.\n" fi } -handle_new_format() { +handle_format_version_3() { echo "Working on $1. Detected post-2003 Telefonbuch version." printf "Extracting street names ... " - decompress $1/streets.tl + extract_version_3 $1/streets.tl cat file_* | tr '\n\0' '\t\n' > 99_Strassenname rm file_* printf "done.\n" printf "Extracting phonebook.db ... " - decompress $1/phonebook.db + extract_version_3 $1/phonebook.db rows=`find . -name file_\* | wc -l` printf "done.\n" @@ -237,19 +237,19 @@ handle_new_format() { if [ -f $1/zip-streets-hn-geo.tl ]; then printf "Extracting geo coordinates (precision: house number) ... " - decompress $1/zip-streets-hn-geo.tl + extract_version_3 $1/zip-streets-hn-geo.tl cat file_* > 90_Geokoordinaten_hnr printf "done.\n" printf "Looking up geo coordinates for each phonebook entry ... " - lam 10_Postleitzahl -s $'\t' 07_Strasse -s $'\t' 08_Hausnummer | mapcoords 90_Geokoordinaten_hnr | convertcoords > 16_Koordinaten + lam 10_Postleitzahl -s $'\t' 07_Strasse -s $'\t' 08_Hausnummer | map_coords 90_Geokoordinaten_hnr | convert_coords > 16_Koordinaten printf "done.\n" elif [ -f $1/zip-streets-geo.tl ]; then printf "Extracting geo coordinates (precision: street) ... " - decompress $1/zip-streets-geo.tl + extract_version_3 $1/zip-streets-geo.tl cat file_* > 91_Geokoordinaten_str printf "done.\n" printf "Looking up geo coordinates for each phonebook entry ... " - lam 10_Postleitzahl -s $'\t' 07_Strasse | mapcoords 91_Geokoordinaten_str | convertcoords > 16_Koordinaten + lam 10_Postleitzahl -s $'\t' 07_Strasse | map_coords 91_Geokoordinaten_str | convert_coords > 16_Koordinaten printf "done.\n" fi rm file_* diff --git a/src/mapcoords.c b/src/mapcoords.c deleted file mode 100644 index b46f1cf..0000000 --- a/src/mapcoords.c +++ /dev/null @@ -1,62 +0,0 @@ -#define _WITH_GETLINE -#include "mystdlib.h" -#include -#include -#include -#include - -int find_offset( const void *key, const void *line ) -{ - size_t l = strlen( (char*)key ); - return strncmp( (char*)key, *(char**)line, l ); -} - -int qsort_cmp( const void *a, const void *b ) -{ - return strcmp( *(char**)a, *(char**)b ); -} - -int main( int argc, char ** args ) -{ - MAP coords = map_file( args[1], 1 ); - int i, l, lines; - char *p, **offsets, *input = malloc(1024); - ssize_t ll; - size_t input_length = 1024; - - if( !coords ) exit( 111 ); - p = (char *)coords->addr; - for ( i=0, lines=0; isize; ++i ) - if( p[i] == 0x00 ) - ++lines; - - offsets = malloc( lines * sizeof(char*)); - if( !offsets ) exit( 111 ); - - offsets[0] = p; l = 1; - for ( i=0; isize; ++i ) - if( p[i] == 0x00 ) - offsets[l++] = p+i+1; - - l--; qsort(offsets, l, sizeof(char*), qsort_cmp ); - - while( ( ll = getline( &input, &input_length, stdin ) ) >= 0 ) - { - char **coord_line; - input[ll-1]='\t'; - coord_line = bsearch( input, offsets, l, sizeof(char*), find_offset ); - if( !coord_line && ll > 2 && isalpha( input[ll-2] ) ) - { - input[ll-2] = '\t'; input[ll-1]=0; - ll--; - coord_line = bsearch( input, offsets, l, sizeof(char*), find_offset ); - } - - if( coord_line ) - printf( "%s\n", *coord_line + ll ); - else - puts( "\t" ); - } - - return 0; -} diff --git a/src/mystdlib.c b/src/mystdlib.c deleted file mode 100644 index 17f123b..0000000 --- a/src/mystdlib.c +++ /dev/null @@ -1,56 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -#include "mystdlib.h" - -MAP map_file( char *filename, int readonly ) -{ - struct stat fstatus; - MAP map = (MAP)malloc( sizeof( *map )); - - if( map ) - { - memset( map, 0, sizeof( *map )); - - if( ( map->fh = open( filename, readonly ? O_RDONLY : O_RDWR ) ) >= 0 ) - { - fstat( map->fh, &fstatus ); - if( ( map->addr = mmap( NULL, map->size = (size_t)fstatus.st_size, - PROT_READ | ( readonly ? 0 : PROT_WRITE), (readonly ? MAP_PRIVATE : MAP_SHARED), map->fh, 0) ) == MAP_FAILED ) - { - fprintf( stderr, "Mapping file '%s' failed\n", filename ); - close( map->fh ); free( map ); map = NULL; - } - } else { - fprintf( stderr, "Couldn't open file: '%s'\n", filename ); - free( map ); map = NULL; - } - } else { - fputs( "Couldn't allocate memory", stderr ); - } - - return map; -} - -void unmap_file ( MAP *pMap ) -{ - if( !pMap || !*pMap ) return; - munmap( (*pMap)->addr, (*pMap)->size); - close( (*pMap)->fh); - free( *pMap ); *pMap = NULL; -} - -int getfilesize( int fd, unsigned long *size) -{ - struct stat sb; - int ret; - if( fstat( fd, &sb )) return -1; - *size = sb.st_size; - return 0; -} diff --git a/src/mystdlib.h b/src/mystdlib.h deleted file mode 100644 index 2e9499f..0000000 --- a/src/mystdlib.h +++ /dev/null @@ -1,32 +0,0 @@ -#include -#include - -typedef struct { int fh; unsigned char *addr; size_t size; } *MAP; - -/* Mapps a file into memory - returns pointer to the mapping struct, - containing the file's size, the mapped - address and its file handle. - - If readonly is true, the file will be - opened and mapped read only. File is - opened and mapped writable, if false. - - Returns NULL if memory could not be - allocated, file could not be opened or - mapped. Gives out an diagnostic message - on stderr -*/ -MAP map_file( char *filename, int readonly ); - -/* Unmapps a file from memory. NULL pointer - checks are being done, so this is safe - to be called from cleanup without knowing - whether there actually is a map. -*/ -void unmap_file ( MAP *pMap ); - -/* Gets file size of open file - returns != 0 in case of error */ -inline int getfilesize( int fd, unsigned long *size ); - diff --git a/src/splitold.c b/src/splitold.c deleted file mode 100644 index bd85775..0000000 --- a/src/splitold.c +++ /dev/null @@ -1,42 +0,0 @@ -#include -#include -#include -#include -#include -#include - -int main( int argc, char **args ) { - char table[64], f[1024*1024*16]; - int outfiles[64], i, off, base = 0; - uint32_t fixed_columns = 0; - - if( argc > 1 ) base = atol( args[1] ); - if( argc > 2 ) fixed_columns = atol( args[2] ); - - for( i=0; i<64; ++i ) outfiles[i] = -1; - while( fgets( table, sizeof(table), stdin ) ) { - int off = ( table[strlen(table)-1] = 0 ); /* fgets sucks */ - int f_in = open( table, O_RDONLY ); - size_t s_in = read( f_in, f, sizeof(f)); - uint32_t *p = (uint32_t*)f; - uint32_t count = p[0], columns = fixed_columns ? fixed_columns : p[1] / 4 - 1; - unsigned int file, strnr; - - close(f_in); - - for( file=0; file strings long */ - for( strnr=0; strnr < count; ++strnr ) { while( f[off++] ) {}; f[off-1] = '\n'; } - write( outfiles[file], f + p[file+1], off - p[file+1] ); - } - } - for( i=0; i<64; ++i ) close( outfiles[i] ); - return 0; -} -- cgit v1.2.3