diff options
-rwxr-xr-x | src/makecolumns.sh | 46 | ||||
-rw-r--r-- | src/splitold.c | 27 |
2 files changed, 39 insertions, 34 deletions
diff --git a/src/makecolumns.sh b/src/makecolumns.sh index d70f564..ff8a1e4 100755 --- a/src/makecolumns.sh +++ b/src/makecolumns.sh | |||
@@ -112,21 +112,21 @@ handle_old_format() { | |||
112 | 112 | ||
113 | # Now loop over all files and dump them | 113 | # Now loop over all files and dump them |
114 | printf "Splitting decompressed nname chunks into their columns ... " | 114 | printf "Splitting decompressed nname chunks into their columns ... " |
115 | for file in `jot -w %0${filename_len}d - ${nname_file} $(( number_of_files - 1 )) 3`; do | 115 | jot -w %0${filename_len}d - ${nname_file} $(( number_of_files - 1 )) 3 | splitold 1 1 |
116 | set -- `hexdump -n 8 -v -e '" " 1/4 "%u"' ${file}` | 116 | # set -- `hexdump -n 8 -v -e '" " 1/4 "%u"' ${file}` |
117 | tail -c +$(( $2 + 1 )) ${file} | 117 | # tail -c +$(( $2 + 1 )) ${file} |
118 | done | tr '\n\0' '\t\n' > 01_02_Flags_Nachname | 118 | # done | tr '\n\0' '\t\n' > 01_02_Flags_Nachname |
119 | cut -c 1 < 01_02_Flags_Nachname > 01_Flags | 119 | cut -c 1 < 01_unknown > 01_Flags |
120 | cut -c 2- < 01_02_Flags_Nachname > 02_Nachname | 120 | cut -c 2- < 01_unknown > 02_Nachname |
121 | rm 01_02_Flags_Nachname | 121 | rm 01_unknown |
122 | printf "done.\n" | 122 | printf "done.\n" |
123 | 123 | ||
124 | printf "Splitting decompress vname chunks into their columns ... " | 124 | printf "Splitting decompress vname chunks into their columns ... " |
125 | jot -w "%0${filename_len}d" - ${vname_file} $(( number_of_files - 1 )) 3 | xargs cat | tr '\n\0' '\t\n' > 03_Vorname | 125 | jot -w "%0${filename_len}d" - ${vname_file} $(( number_of_files - 1 )) 3 | xargs cat | tr '\n\0' '\t\n' | tr -d '\377' > 03_Vorname |
126 | printf "done.\n" | 126 | printf "done.\n" |
127 | 127 | ||
128 | printf "Splitting decompress table file chunks into their columns ... " | 128 | printf "Splitting decompress table file chunks into their columns ... " |
129 | jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3 | splitold | 129 | jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3 | splitold 4 0 |
130 | # for file in `jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3`; do | 130 | # for file in `jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3`; do |
131 | # # Offset into first table entry tells us how many | 131 | # # Offset into first table entry tells us how many |
132 | # # fields are in table file | 132 | # # fields are in table file |
@@ -148,17 +148,17 @@ handle_old_format() { | |||
148 | 148 | ||
149 | # rename our columns extracted from the table file | 149 | # rename our columns extracted from the table file |
150 | printf "Converting string terminators to line newlines ... " | 150 | printf "Converting string terminators to line newlines ... " |
151 | tr '\0' '\n' < 04_unknown > 04_Namenszusatz | 151 | mv 04_unknown 04_Namenszusatz |
152 | tr '\0' '\n' < 05_unknown > 05_Adresszusatz | 152 | mv 05_unknown 05_Adresszusatz |
153 | tr '\0' '\n' < 06_unknown > 06_Ortszusatz | 153 | mv 06_unknown 06_Ortszusatz |
154 | tr '\0' '\n' < 08_unknown > 08_Hausnummer | 154 | mv 08_unknown 08_Hausnummer |
155 | tr '\0' '\n' < 09_unknown > 09_Verweise | 155 | mv 09_unknown 09_Verweise |
156 | tr '\0' '\n' < 10_unknown > 10_Postleitzahl | 156 | mv 10_unknown 10_Postleitzahl |
157 | tr '\0' '\n' < 11_unknown > 11_Ort | 157 | mv 11_unknown 11_Ort |
158 | tr '\0' '\n' < 12_unknown > 12_Vorwahl | 158 | mv 12_unknown 12_Vorwahl |
159 | tr '\0' '\n' < 13_unknown > 13_Rufnummer | 159 | mv 13_unknown 13_Rufnummer |
160 | [ -f 14_unknown ] && tr '\0' '\n' < 14_unknown > 14_Email | 160 | [ -f 14_unknown ] && mv 14_unknown 14_Email |
161 | [ -f 15_unknown ] && tr '\0' '\n' < 15_unknown > 15_Webadresse | 161 | [ -f 15_unknown ] && mv 15_unknown 15_Webadresse |
162 | printf "done.\n" | 162 | printf "done.\n" |
163 | 163 | ||
164 | # If street names come in an extra file, extract | 164 | # If street names come in an extra file, extract |
@@ -169,12 +169,12 @@ handle_old_format() { | |||
169 | # extract street names if 07_unknown contains street indexes | 169 | # extract street names if 07_unknown contains street indexes |
170 | # instead of street names | 170 | # instead of street names |
171 | if [ -f 99_Strassenname ]; then | 171 | if [ -f 99_Strassenname ]; then |
172 | tr '\0' '\n' < 07_unknown > 07_Strassenindex | 172 | mv 07_unknown 07_Strassenindex |
173 | printf "Looking up street names from indexes ... " | 173 | printf "Looking up street names from indexes ... " |
174 | cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse | 174 | cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse |
175 | printf "done.\n" | 175 | printf "done.\n" |
176 | else | 176 | else |
177 | tr '\0' '\n' < 07_unknown > 07_Strasse | 177 | mv 07_unknown 07_Strasse |
178 | fi | 178 | fi |
179 | rm ??_unknown | 179 | rm ??_unknown |
180 | 180 | ||
diff --git a/src/splitold.c b/src/splitold.c index 847eb2c..bd85775 100644 --- a/src/splitold.c +++ b/src/splitold.c | |||
@@ -1,4 +1,3 @@ | |||
1 | #include "mystdlib.h" | ||
2 | #include <stdint.h> | 1 | #include <stdint.h> |
3 | #include <stdio.h> | 2 | #include <stdio.h> |
4 | #include <unistd.h> | 3 | #include <unistd.h> |
@@ -6,31 +5,37 @@ | |||
6 | #include <stdlib.h> | 5 | #include <stdlib.h> |
7 | #include <string.h> | 6 | #include <string.h> |
8 | 7 | ||
9 | int main() { | 8 | int main( int argc, char **args ) { |
10 | char table[64]; | 9 | char table[64], f[1024*1024*16]; |
11 | int outfiles[64], i, off; | 10 | int outfiles[64], i, off, base = 0; |
11 | uint32_t fixed_columns = 0; | ||
12 | |||
13 | if( argc > 1 ) base = atol( args[1] ); | ||
14 | if( argc > 2 ) fixed_columns = atol( args[2] ); | ||
12 | 15 | ||
13 | for( i=0; i<64; ++i ) outfiles[i] = -1; | 16 | for( i=0; i<64; ++i ) outfiles[i] = -1; |
14 | while( fgets( table, sizeof(table), stdin ) ) { | 17 | while( fgets( table, sizeof(table), stdin ) ) { |
15 | int off = ( table[strlen(table)-1] = 0 ); /* fgets sucks */ | 18 | int off = ( table[strlen(table)-1] = 0 ); /* fgets sucks */ |
16 | MAP f = map_file( table, 1 ); | 19 | int f_in = open( table, O_RDONLY ); |
17 | uint32_t *p = (uint32_t*)(f->addr); | 20 | size_t s_in = read( f_in, f, sizeof(f)); |
18 | uint32_t count = p[0], columns = p[1] / 4 - 1; | 21 | uint32_t *p = (uint32_t*)f; |
22 | uint32_t count = p[0], columns = fixed_columns ? fixed_columns : p[1] / 4 - 1; | ||
19 | unsigned int file, strnr; | 23 | unsigned int file, strnr; |
20 | 24 | ||
25 | close(f_in); | ||
26 | |||
21 | for( file=0; file<columns; ++file ) { | 27 | for( file=0; file<columns; ++file ) { |
22 | /* Create outfile, if it is not yet there */ | 28 | /* Create outfile, if it is not yet there */ |
23 | if( outfiles[file] == -1 ) { | 29 | if( outfiles[file] == -1 ) { |
24 | sprintf( table, "%02d_unknown", file+4 ); | 30 | sprintf( table, "%02d_unknown", file+base ); |
25 | outfiles[file] = open( table, O_WRONLY | O_APPEND | O_CREAT, 0644 ); | 31 | outfiles[file] = open( table, O_WRONLY | O_APPEND | O_CREAT, 0644 ); |
26 | if ( outfiles[file] == -1 ) exit(1); | 32 | if ( outfiles[file] == -1 ) exit(1); |
27 | } | 33 | } |
28 | off = p[file+1]; | 34 | off = p[file+1]; |
29 | /* Look for end of this chunk, which is <count> strings long */ | 35 | /* Look for end of this chunk, which is <count> strings long */ |
30 | for( strnr=0; strnr < count; ++strnr ) while( f->addr[off++] ); | 36 | for( strnr=0; strnr < count; ++strnr ) { while( f[off++] ) {}; f[off-1] = '\n'; } |
31 | write( outfiles[file], f->addr + p[file+1], off - p[file+1] ); | 37 | write( outfiles[file], f + p[file+1], off - p[file+1] ); |
32 | } | 38 | } |
33 | unmap_file(&f); | ||
34 | } | 39 | } |
35 | for( i=0; i<64; ++i ) close( outfiles[i] ); | 40 | for( i=0; i<64; ++i ) close( outfiles[i] ); |
36 | return 0; | 41 | return 0; |