diff options
| -rwxr-xr-x | src/makecolumns.sh | 46 | ||||
| -rw-r--r-- | src/splitold.c | 27 |
2 files changed, 39 insertions, 34 deletions
diff --git a/src/makecolumns.sh b/src/makecolumns.sh index d70f564..ff8a1e4 100755 --- a/src/makecolumns.sh +++ b/src/makecolumns.sh | |||
| @@ -112,21 +112,21 @@ handle_old_format() { | |||
| 112 | 112 | ||
| 113 | # Now loop over all files and dump them | 113 | # Now loop over all files and dump them |
| 114 | printf "Splitting decompressed nname chunks into their columns ... " | 114 | printf "Splitting decompressed nname chunks into their columns ... " |
| 115 | for file in `jot -w %0${filename_len}d - ${nname_file} $(( number_of_files - 1 )) 3`; do | 115 | jot -w %0${filename_len}d - ${nname_file} $(( number_of_files - 1 )) 3 | splitold 1 1 |
| 116 | set -- `hexdump -n 8 -v -e '" " 1/4 "%u"' ${file}` | 116 | # set -- `hexdump -n 8 -v -e '" " 1/4 "%u"' ${file}` |
| 117 | tail -c +$(( $2 + 1 )) ${file} | 117 | # tail -c +$(( $2 + 1 )) ${file} |
| 118 | done | tr '\n\0' '\t\n' > 01_02_Flags_Nachname | 118 | # done | tr '\n\0' '\t\n' > 01_02_Flags_Nachname |
| 119 | cut -c 1 < 01_02_Flags_Nachname > 01_Flags | 119 | cut -c 1 < 01_unknown > 01_Flags |
| 120 | cut -c 2- < 01_02_Flags_Nachname > 02_Nachname | 120 | cut -c 2- < 01_unknown > 02_Nachname |
| 121 | rm 01_02_Flags_Nachname | 121 | rm 01_unknown |
| 122 | printf "done.\n" | 122 | printf "done.\n" |
| 123 | 123 | ||
| 124 | printf "Splitting decompress vname chunks into their columns ... " | 124 | printf "Splitting decompress vname chunks into their columns ... " |
| 125 | jot -w "%0${filename_len}d" - ${vname_file} $(( number_of_files - 1 )) 3 | xargs cat | tr '\n\0' '\t\n' > 03_Vorname | 125 | jot -w "%0${filename_len}d" - ${vname_file} $(( number_of_files - 1 )) 3 | xargs cat | tr '\n\0' '\t\n' | tr -d '\377' > 03_Vorname |
| 126 | printf "done.\n" | 126 | printf "done.\n" |
| 127 | 127 | ||
| 128 | printf "Splitting decompress table file chunks into their columns ... " | 128 | printf "Splitting decompress table file chunks into their columns ... " |
| 129 | jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3 | splitold | 129 | jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3 | splitold 4 0 |
| 130 | # for file in `jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3`; do | 130 | # for file in `jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3`; do |
| 131 | # # Offset into first table entry tells us how many | 131 | # # Offset into first table entry tells us how many |
| 132 | # # fields are in table file | 132 | # # fields are in table file |
| @@ -148,17 +148,17 @@ handle_old_format() { | |||
| 148 | 148 | ||
| 149 | # rename our columns extracted from the table file | 149 | # rename our columns extracted from the table file |
| 150 | printf "Converting string terminators to line newlines ... " | 150 | printf "Converting string terminators to line newlines ... " |
| 151 | tr '\0' '\n' < 04_unknown > 04_Namenszusatz | 151 | mv 04_unknown 04_Namenszusatz |
| 152 | tr '\0' '\n' < 05_unknown > 05_Adresszusatz | 152 | mv 05_unknown 05_Adresszusatz |
| 153 | tr '\0' '\n' < 06_unknown > 06_Ortszusatz | 153 | mv 06_unknown 06_Ortszusatz |
| 154 | tr '\0' '\n' < 08_unknown > 08_Hausnummer | 154 | mv 08_unknown 08_Hausnummer |
| 155 | tr '\0' '\n' < 09_unknown > 09_Verweise | 155 | mv 09_unknown 09_Verweise |
| 156 | tr '\0' '\n' < 10_unknown > 10_Postleitzahl | 156 | mv 10_unknown 10_Postleitzahl |
| 157 | tr '\0' '\n' < 11_unknown > 11_Ort | 157 | mv 11_unknown 11_Ort |
| 158 | tr '\0' '\n' < 12_unknown > 12_Vorwahl | 158 | mv 12_unknown 12_Vorwahl |
| 159 | tr '\0' '\n' < 13_unknown > 13_Rufnummer | 159 | mv 13_unknown 13_Rufnummer |
| 160 | [ -f 14_unknown ] && tr '\0' '\n' < 14_unknown > 14_Email | 160 | [ -f 14_unknown ] && mv 14_unknown 14_Email |
| 161 | [ -f 15_unknown ] && tr '\0' '\n' < 15_unknown > 15_Webadresse | 161 | [ -f 15_unknown ] && mv 15_unknown 15_Webadresse |
| 162 | printf "done.\n" | 162 | printf "done.\n" |
| 163 | 163 | ||
| 164 | # If street names come in an extra file, extract | 164 | # If street names come in an extra file, extract |
| @@ -169,12 +169,12 @@ handle_old_format() { | |||
| 169 | # extract street names if 07_unknown contains street indexes | 169 | # extract street names if 07_unknown contains street indexes |
| 170 | # instead of street names | 170 | # instead of street names |
| 171 | if [ -f 99_Strassenname ]; then | 171 | if [ -f 99_Strassenname ]; then |
| 172 | tr '\0' '\n' < 07_unknown > 07_Strassenindex | 172 | mv 07_unknown 07_Strassenindex |
| 173 | printf "Looking up street names from indexes ... " | 173 | printf "Looking up street names from indexes ... " |
| 174 | cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse | 174 | cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse |
| 175 | printf "done.\n" | 175 | printf "done.\n" |
| 176 | else | 176 | else |
| 177 | tr '\0' '\n' < 07_unknown > 07_Strasse | 177 | mv 07_unknown 07_Strasse |
| 178 | fi | 178 | fi |
| 179 | rm ??_unknown | 179 | rm ??_unknown |
| 180 | 180 | ||
diff --git a/src/splitold.c b/src/splitold.c index 847eb2c..bd85775 100644 --- a/src/splitold.c +++ b/src/splitold.c | |||
| @@ -1,4 +1,3 @@ | |||
| 1 | #include "mystdlib.h" | ||
| 2 | #include <stdint.h> | 1 | #include <stdint.h> |
| 3 | #include <stdio.h> | 2 | #include <stdio.h> |
| 4 | #include <unistd.h> | 3 | #include <unistd.h> |
| @@ -6,31 +5,37 @@ | |||
| 6 | #include <stdlib.h> | 5 | #include <stdlib.h> |
| 7 | #include <string.h> | 6 | #include <string.h> |
| 8 | 7 | ||
| 9 | int main() { | 8 | int main( int argc, char **args ) { |
| 10 | char table[64]; | 9 | char table[64], f[1024*1024*16]; |
| 11 | int outfiles[64], i, off; | 10 | int outfiles[64], i, off, base = 0; |
| 11 | uint32_t fixed_columns = 0; | ||
| 12 | |||
| 13 | if( argc > 1 ) base = atol( args[1] ); | ||
| 14 | if( argc > 2 ) fixed_columns = atol( args[2] ); | ||
| 12 | 15 | ||
| 13 | for( i=0; i<64; ++i ) outfiles[i] = -1; | 16 | for( i=0; i<64; ++i ) outfiles[i] = -1; |
| 14 | while( fgets( table, sizeof(table), stdin ) ) { | 17 | while( fgets( table, sizeof(table), stdin ) ) { |
| 15 | int off = ( table[strlen(table)-1] = 0 ); /* fgets sucks */ | 18 | int off = ( table[strlen(table)-1] = 0 ); /* fgets sucks */ |
| 16 | MAP f = map_file( table, 1 ); | 19 | int f_in = open( table, O_RDONLY ); |
| 17 | uint32_t *p = (uint32_t*)(f->addr); | 20 | size_t s_in = read( f_in, f, sizeof(f)); |
| 18 | uint32_t count = p[0], columns = p[1] / 4 - 1; | 21 | uint32_t *p = (uint32_t*)f; |
| 22 | uint32_t count = p[0], columns = fixed_columns ? fixed_columns : p[1] / 4 - 1; | ||
| 19 | unsigned int file, strnr; | 23 | unsigned int file, strnr; |
| 20 | 24 | ||
| 25 | close(f_in); | ||
| 26 | |||
| 21 | for( file=0; file<columns; ++file ) { | 27 | for( file=0; file<columns; ++file ) { |
| 22 | /* Create outfile, if it is not yet there */ | 28 | /* Create outfile, if it is not yet there */ |
| 23 | if( outfiles[file] == -1 ) { | 29 | if( outfiles[file] == -1 ) { |
| 24 | sprintf( table, "%02d_unknown", file+4 ); | 30 | sprintf( table, "%02d_unknown", file+base ); |
| 25 | outfiles[file] = open( table, O_WRONLY | O_APPEND | O_CREAT, 0644 ); | 31 | outfiles[file] = open( table, O_WRONLY | O_APPEND | O_CREAT, 0644 ); |
| 26 | if ( outfiles[file] == -1 ) exit(1); | 32 | if ( outfiles[file] == -1 ) exit(1); |
| 27 | } | 33 | } |
| 28 | off = p[file+1]; | 34 | off = p[file+1]; |
| 29 | /* Look for end of this chunk, which is <count> strings long */ | 35 | /* Look for end of this chunk, which is <count> strings long */ |
| 30 | for( strnr=0; strnr < count; ++strnr ) while( f->addr[off++] ); | 36 | for( strnr=0; strnr < count; ++strnr ) { while( f[off++] ) {}; f[off-1] = '\n'; } |
| 31 | write( outfiles[file], f->addr + p[file+1], off - p[file+1] ); | 37 | write( outfiles[file], f + p[file+1], off - p[file+1] ); |
| 32 | } | 38 | } |
| 33 | unmap_file(&f); | ||
| 34 | } | 39 | } |
| 35 | for( i=0; i<64; ++i ) close( outfiles[i] ); | 40 | for( i=0; i<64; ++i ) close( outfiles[i] ); |
| 36 | return 0; | 41 | return 0; |
