diff options
| -rw-r--r-- | Makefile | 5 | ||||
| -rwxr-xr-x | makecolumns.sh | 13 | ||||
| -rw-r--r-- | parasort.sh | 17 | ||||
| -rw-r--r-- | postprocess.sh | 2 | ||||
| -rw-r--r-- | src/export/extract_version_1.c | 16 | ||||
| -rw-r--r-- | src/export/mystdlib.c | 4 | ||||
| -rw-r--r-- | src/postprocess/map_plz.c | 4 | ||||
| -rw-r--r-- | src/postprocess/merge_entries.c | 5 | ||||
| -rw-r--r-- | src/postprocess/postprocess-1992.sh | 15 |
9 files changed, 49 insertions, 32 deletions
| @@ -1,4 +1,4 @@ | |||
| 1 | BINARIES=bin/extract_version_1 bin/extract_version_2 bin/extract_version_3 bin/extract_version_4 bin/split_version_2 bin/split_version_3 bin/map_coords bin/map_branches_v3 bin/map_branches_v4 bin/convert_coords bin/merge_entries bin/sort_plz # bin/join | 1 | BINARIES=bin/extract_version_1 bin/extract_version_2 bin/extract_version_3 bin/extract_version_4 bin/split_version_2 bin/split_version_3 bin/map_coords bin/map_branches_v3 bin/map_branches_v4 bin/convert_coords bin/merge_entries bin/sort_plz bin/map_plz # bin/join |
| 2 | CFLAGS += -W -Wall -Wextra -O3 -I src/export # -Weverything -Wno-cast-align -Wno-padded | 2 | CFLAGS += -W -Wall -Wextra -O3 -I src/export # -Weverything -Wno-cast-align -Wno-padded |
| 3 | 3 | ||
| 4 | all: $(BINARIES) | 4 | all: $(BINARIES) |
| @@ -42,6 +42,9 @@ bin/merge_entries: src/postprocess/merge_entries.c src/export/mystdlib.c src/pos | |||
| 42 | bin/sort_plz: src/postprocess/sort_plz.c | 42 | bin/sort_plz: src/postprocess/sort_plz.c |
| 43 | $(CC) $(CFLAGS) -o $@ src/postprocess/sort_plz.c | 43 | $(CC) $(CFLAGS) -o $@ src/postprocess/sort_plz.c |
| 44 | 44 | ||
| 45 | bin/map_plz: src/postprocess/map_plz.c src/export/mystdlib.c | ||
| 46 | $(CC) $(CFLAGS) -o $@ src/postprocess/map_plz.c src/export/mystdlib.c | ||
| 47 | |||
| 45 | .PHONY: clean | 48 | .PHONY: clean |
| 46 | clean: | 49 | clean: |
| 47 | @rm -f $(BINARIES) | 50 | @rm -f $(BINARIES) |
diff --git a/makecolumns.sh b/makecolumns.sh index 3f05a61..a505d31 100755 --- a/makecolumns.sh +++ b/makecolumns.sh | |||
| @@ -115,10 +115,17 @@ handle_format_version_1() { | |||
| 115 | mv 11_unknown 12_Vorwahl | 115 | mv 11_unknown 12_Vorwahl |
| 116 | mv 12_unknown 13_Rufnummer | 116 | mv 12_unknown 13_Rufnummer |
| 117 | mv 13_unknown 11_Ort | 117 | mv 13_unknown 11_Ort |
| 118 | mv 14_unknown 10_Postleitzahl | 118 | mv 14_unknown 10_Postleitzahl_West |
| 119 | mv 15_unknown 12_Vorwahl_block | ||
| 120 | |||
| 121 | printf "Splitting appartement to zusaetze ... " | ||
| 122 | paste 07_Strasse 08_Hausnummer 09_unknown | sed -E $'s:^(.*)\;([0-9]+.*)\t(.*)\t.*$:\\1\t\\2\tWohnung \\3:;s:^(.*)tr(\t.*\t.*)$:\\1tr.\\2:' > tm_unknown | ||
| 123 | cut -f 1 tm_unknown > 07_Strasse | ||
| 124 | cut -f 2 tm_unknown > 08_Hausnummer | ||
| 125 | printf "done.\n" | ||
| 119 | 126 | ||
| 120 | printf "Normalizing zusaetze ... " | 127 | printf "Normalizing zusaetze ... " |
| 121 | sed -E -e 's:^, +:u. :' 09_unknown > 04_Namenszusatz | 128 | cut -f 3 tm_unknown | sed -E -e 's:^, +:u. :' > 04_Namenszusatz |
| 122 | sed -E -e 's:^, +:u. :' 04_unknown > 05_Adresszusatz | 129 | sed -E -e 's:^, +:u. :' 04_unknown > 05_Adresszusatz |
| 123 | paste 04_Namenszusatz 05_Adresszusatz | awk '{$1=$1};1' > 04_Zusaetze | 130 | paste 04_Namenszusatz 05_Adresszusatz | awk '{$1=$1};1' > 04_Zusaetze |
| 124 | printf "done.\n" | 131 | printf "done.\n" |
| @@ -501,7 +508,7 @@ tidy_columns () { | |||
| 501 | printf "Tidying up streetnames ... " | 508 | printf "Tidying up streetnames ... " |
| 502 | # Replace any dots at end of line by a single one | 509 | # Replace any dots at end of line by a single one |
| 503 | # finish any str abbreviation without a period with a period | 510 | # finish any str abbreviation without a period with a period |
| 504 | sed -E 's/\.+/./g;s/(S|s)tr( |:)?$/\1tr./;s/(.*)-(.*) -/\1-\2-Str./;s/ -$/ Str./;s/-$/str./' 07_Strasse | iconv -f iso-8859-15 -t utf-8 > 07_Strasse.new | 511 | sed -E 's/\.+/./g;s/(S|s)tr( |:)?$/\1tr./;s/(.*)-(.*) -/\1-\2-Str./;s/ -$/ Str./;s/-$/str./;s/^(.*-.*) Str\.?$/\1-Str./' 07_Strasse | iconv -f iso-8859-15 -t utf-8 > 07_Strasse.new |
| 505 | mv 07_Strasse.new 07_Strasse | 512 | mv 07_Strasse.new 07_Strasse |
| 506 | printf "done.\n" | 513 | printf "done.\n" |
| 507 | 514 | ||
diff --git a/parasort.sh b/parasort.sh index 6565d61..b593bb9 100644 --- a/parasort.sh +++ b/parasort.sh | |||
| @@ -18,11 +18,16 @@ export PATH=${PATH}:`pwd -P`/bin/ | |||
| 18 | mkdir -p work/sorted | 18 | mkdir -p work/sorted |
| 19 | cd work/output || exit 1 | 19 | cd work/output || exit 1 |
| 20 | 20 | ||
| 21 | for a in *[05]; do echo "$a" >&2; merge_entries $a > ../sorted/$a; done & | 21 | starttime=`date +%s` |
| 22 | for a in *[16]; do echo " $a" >&2; merge_entries $a > ../sorted/$a; done & | 22 | |
| 23 | for a in *[27]; do echo " $a" >&2; merge_entries $a > ../sorted/$a; done & | 23 | for a in ???[05]?; do echo "$a" >&2; merge_entries $a > ../sorted/$a; done & |
| 24 | for a in *[38]; do echo " $a" >&2; merge_entries $a > ../sorted/$a; done & | 24 | for a in ???[16]?; do echo " $a" >&2; merge_entries $a > ../sorted/$a; done & |
| 25 | for a in *[49]; do echo " $a" >&2; merge_entries $a > ../sorted/$a; done & | 25 | for a in ???[27]?; do echo " $a" >&2; merge_entries $a > ../sorted/$a; done & |
| 26 | for a in brken *_; do echo " $a" >&2; merge_entries $a > ../sorted/$a; done & | 26 | for a in ???[38]?; do echo " $a" >&2; merge_entries $a > ../sorted/$a; done & |
| 27 | for a in ???[49]?; do echo " $a" >&2; merge_entries $a > ../sorted/$a; done & | ||
| 28 | for a in brken ???_?; do echo " $a" >&2; merge_entries $a > ../sorted/$a; done & | ||
| 27 | 29 | ||
| 28 | wait | 30 | wait |
| 31 | |||
| 32 | elapsed=$(( `date +%s` - starttime )) | ||
| 33 | printf "Finished in %d seconds (%d minutes)\n" ${elapsed} $(( elapsed / 60 )) | ||
diff --git a/postprocess.sh b/postprocess.sh index 9316357..bc70492 100644 --- a/postprocess.sh +++ b/postprocess.sh | |||
| @@ -26,4 +26,4 @@ fi | |||
| 26 | 26 | ||
| 27 | cd work || exit 1 | 27 | cd work || exit 1 |
| 28 | 28 | ||
| 29 | for a in 199[56789]_Q? 20*_Q?; do ./sort_plz $a; done | 29 | for a in ????_Q?; do ./sort_plz $a; done |
diff --git a/src/export/extract_version_1.c b/src/export/extract_version_1.c index 8ec199e..6d67a5d 100644 --- a/src/export/extract_version_1.c +++ b/src/export/extract_version_1.c | |||
| @@ -39,9 +39,9 @@ static uint8_t cp437_to_iso8859_1_table[] = { | |||
| 39 | }; | 39 | }; |
| 40 | 40 | ||
| 41 | static struct { | 41 | static struct { |
| 42 | int outfiles[15]; | 42 | int outfiles[16]; |
| 43 | uint8_t *outbuf[15]; | 43 | uint8_t *outbuf[16]; |
| 44 | size_t outfill[15]; | 44 | size_t outfill[16]; |
| 45 | char * vorwahl; | 45 | char * vorwahl; |
| 46 | char ort[1024]; | 46 | char ort[1024]; |
| 47 | size_t ort_len; | 47 | size_t ort_len; |
| @@ -101,11 +101,13 @@ static void split_to_files( uint8_t *entries, int num_entries ) | |||
| 101 | *( g_state.outbuf[0] + g_state.outfill[0]++ ) = num_entries > 1 ? '1' : '0'; | 101 | *( g_state.outbuf[0] + g_state.outfill[0]++ ) = num_entries > 1 ? '1' : '0'; |
| 102 | memcpy( g_state.outbuf[12] + g_state.outfill[12], g_state.ort, g_state.ort_len ); g_state.outfill[12] += g_state.ort_len; | 102 | memcpy( g_state.outbuf[12] + g_state.outfill[12], g_state.ort, g_state.ort_len ); g_state.outfill[12] += g_state.ort_len; |
| 103 | memcpy( g_state.outbuf[13] + g_state.outfill[13], g_state.zip, g_state.zip_len ); g_state.outfill[13] += g_state.zip_len; | 103 | memcpy( g_state.outbuf[13] + g_state.outfill[13], g_state.zip, g_state.zip_len ); g_state.outfill[13] += g_state.zip_len; |
| 104 | strcpy( g_state.outbuf[14] + g_state.outfill[14], g_state.vorwahl); g_state.outfill[14] += strlen(g_state.vorwahl); | ||
| 104 | } | 105 | } |
| 105 | 106 | ||
| 106 | *( g_state.outbuf[0 ] + g_state.outfill[0 ]++ ) = '\n'; | 107 | *( g_state.outbuf[0 ] + g_state.outfill[0 ]++ ) = '\n'; |
| 107 | *( g_state.outbuf[12] + g_state.outfill[12]++ ) = '\n'; | 108 | *( g_state.outbuf[12] + g_state.outfill[12]++ ) = '\n'; |
| 108 | *( g_state.outbuf[13] + g_state.outfill[13]++ ) = '\n'; | 109 | *( g_state.outbuf[13] + g_state.outfill[13]++ ) = '\n'; |
| 110 | *( g_state.outbuf[14] + g_state.outfill[14]++ ) = '\n'; | ||
| 109 | if( !end ) | 111 | if( !end ) |
| 110 | return; | 112 | return; |
| 111 | } | 113 | } |
| @@ -178,7 +180,7 @@ static void act_on_file( uint8_t *file ) | |||
| 178 | g_state.zip_len = snprintf( g_state.zip, sizeof(g_state.zip), "%s", zip ); | 180 | g_state.zip_len = snprintf( g_state.zip, sizeof(g_state.zip), "%s", zip ); |
| 179 | g_state.vorwahl = vorwahl; | 181 | g_state.vorwahl = vorwahl; |
| 180 | 182 | ||
| 181 | /* printf( "Working on a %04d page and %06d records file, city: %4s %-32s with prefix %s\n", num_pages, num_records, zip, ort, vorwahl ); */ | 183 | printf( "Working on a %04d page and %06d records file, city: %4s %-32s with prefix %s\n", num_pages, num_records, zip, ort, vorwahl ); |
| 182 | (void)num_records; /* silence warning about unused variable */ | 184 | (void)num_records; /* silence warning about unused variable */ |
| 183 | 185 | ||
| 184 | for( page = 0; page < num_pages; ++page ) | 186 | for( page = 0; page < num_pages; ++page ) |
| @@ -193,7 +195,7 @@ int main( ) | |||
| 193 | ssize_t temp = 0; | 195 | ssize_t temp = 0; |
| 194 | int i; | 196 | int i; |
| 195 | 197 | ||
| 196 | for( i=0; i<14; ++i ) | 198 | for( i=0; i<15; ++i ) |
| 197 | { | 199 | { |
| 198 | sprintf( filename, "%02d_unknown", i+1 ); | 200 | sprintf( filename, "%02d_unknown", i+1 ); |
| 199 | g_state.outfiles[i] = open( filename, O_WRONLY | O_APPEND | O_CREAT, 0644 ); | 201 | g_state.outfiles[i] = open( filename, O_WRONLY | O_APPEND | O_CREAT, 0644 ); |
| @@ -209,14 +211,14 @@ int main( ) | |||
| 209 | unmap_file( &f ); | 211 | unmap_file( &f ); |
| 210 | 212 | ||
| 211 | /* Write out results */ | 213 | /* Write out results */ |
| 212 | for( i=0; i<14; ++i ) { | 214 | for( i=0; i<15; ++i ) { |
| 213 | /* if( g_state.outfill[i] > 1024*1024*6 ) printf( "Large: %s %zd\n", g_state.ort, g_state.outfill[i] ); */ | 215 | /* if( g_state.outfill[i] > 1024*1024*6 ) printf( "Large: %s %zd\n", g_state.ort, g_state.outfill[i] ); */ |
| 214 | temp += write( g_state.outfiles[i], g_state.outbuf[i], g_state.outfill[i] ); | 216 | temp += write( g_state.outfiles[i], g_state.outbuf[i], g_state.outfill[i] ); |
| 215 | g_state.outfill[i] = 0; | 217 | g_state.outfill[i] = 0; |
| 216 | } | 218 | } |
| 217 | } | 219 | } |
| 218 | 220 | ||
| 219 | for( i=0; i<14; ++i ) { | 221 | for( i=0; i<15; ++i ) { |
| 220 | temp += write( g_state.outfiles[i], g_state.outbuf[i], g_state.outfill[i] ); | 222 | temp += write( g_state.outfiles[i], g_state.outbuf[i], g_state.outfill[i] ); |
| 221 | close( g_state.outfiles[i] ); | 223 | close( g_state.outfiles[i] ); |
| 222 | } | 224 | } |
diff --git a/src/export/mystdlib.c b/src/export/mystdlib.c index b65f63d..31c991e 100644 --- a/src/export/mystdlib.c +++ b/src/export/mystdlib.c | |||
| @@ -21,7 +21,9 @@ MAP map_file( char *filename, int readonly ) | |||
| 21 | if( ( map->fh = open( filename, readonly ? O_RDONLY : O_RDWR ) ) >= 0 ) | 21 | if( ( map->fh = open( filename, readonly ? O_RDONLY : O_RDWR ) ) >= 0 ) |
| 22 | { | 22 | { |
| 23 | fstat( map->fh, &fstatus ); | 23 | fstat( map->fh, &fstatus ); |
| 24 | if( ( map->addr = mmap( NULL, map->size = (size_t)fstatus.st_size, | 24 | map->size = (size_t)fstatus.st_size; |
| 25 | if (!map->size) return map; | ||
| 26 | if( ( map->addr = mmap( NULL, map->size, | ||
| 25 | PROT_READ | ( readonly ? 0 : PROT_WRITE), (readonly ? MAP_PRIVATE : MAP_SHARED), map->fh, 0) ) == MAP_FAILED ) | 27 | PROT_READ | ( readonly ? 0 : PROT_WRITE), (readonly ? MAP_PRIVATE : MAP_SHARED), map->fh, 0) ) == MAP_FAILED ) |
| 26 | { | 28 | { |
| 27 | fprintf( stderr, "Mapping file '%s' failed\n", filename ); | 29 | fprintf( stderr, "Mapping file '%s' failed\n", filename ); |
diff --git a/src/postprocess/map_plz.c b/src/postprocess/map_plz.c index ab0db71..9dec6bb 100644 --- a/src/postprocess/map_plz.c +++ b/src/postprocess/map_plz.c | |||
| @@ -18,7 +18,7 @@ int main(int argc, char **args) { | |||
| 18 | FILE *bfile, *streetfile_out; | 18 | FILE *bfile, *streetfile_out; |
| 19 | char *ptr, *input = malloc(65335); | 19 | char *ptr, *input = malloc(65335); |
| 20 | char *ort = malloc(65335), vorwahl_block[16]; | 20 | char *ort = malloc(65335), vorwahl_block[16]; |
| 21 | int i, brutes_count = 0, report = 0; | 21 | unsigned int i, brutes_count = 0, report = 0; |
| 22 | brute_t *brutes = malloc(200000*sizeof(brute_t)); | 22 | brute_t *brutes = malloc(200000*sizeof(brute_t)); |
| 23 | 23 | ||
| 24 | /* prepare io */ | 24 | /* prepare io */ |
| @@ -63,7 +63,7 @@ int main(int argc, char **args) { | |||
| 63 | g_book_by_name = (entry_t*)malloc(g_book_size * sizeof(entry_t)); | 63 | g_book_by_name = (entry_t*)malloc(g_book_size * sizeof(entry_t)); |
| 64 | 64 | ||
| 65 | /* Split pointers into input files into our arrays */ | 65 | /* Split pointers into input files into our arrays */ |
| 66 | for (i = 0, ptr = (char*)tbuch->addr; i < g_book_size; ++i) { | 66 | for (i=0, ptr=(char*)tbuch->addr; i<g_book_size; ++i) { |
| 67 | g_book[i].vorwahl = ptr; ptr += strlen(ptr) + 1; | 67 | g_book[i].vorwahl = ptr; ptr += strlen(ptr) + 1; |
| 68 | g_book[i].ort = ptr; ptr += strlen(ptr) + 1; | 68 | g_book[i].ort = ptr; ptr += strlen(ptr) + 1; |
| 69 | g_book[i].strasse = ptr; ptr += strlen(ptr) + 1; | 69 | g_book[i].strasse = ptr; ptr += strlen(ptr) + 1; |
diff --git a/src/postprocess/merge_entries.c b/src/postprocess/merge_entries.c index f9ee67d..1dd7d50 100644 --- a/src/postprocess/merge_entries.c +++ b/src/postprocess/merge_entries.c | |||
| @@ -148,9 +148,10 @@ static int sort_me(const void *f_a, const void *f_b) { | |||
| 148 | outvec_t *oa_row = oa + row * COLUMNS; | 148 | outvec_t *oa_row = oa + row * COLUMNS; |
| 149 | outvec_t *ob_row = ob + row * COLUMNS; | 149 | outvec_t *ob_row = ob + row * COLUMNS; |
| 150 | 150 | ||
| 151 | if ((res = STRCMP_n(oa_row[ 2].ptr, ob_row[ 2].ptr))) return res; /* PLZ */ | ||
| 152 | if ((res = STRCMP_n(oa_row[ 9].ptr, ob_row[ 9].ptr))) return res; /* Ort */ | ||
| 151 | if ((res = STRCMP_n(oa_row[10].ptr, ob_row[10].ptr))) return res; /* Vorwahl */ | 153 | if ((res = STRCMP_n(oa_row[10].ptr, ob_row[10].ptr))) return res; /* Vorwahl */ |
| 152 | if ((res = STRCMP_n(oa_row[11].ptr, ob_row[11].ptr))) return res; /* Rufnummer */ | 154 | if ((res = STRCMP_n(oa_row[11].ptr, ob_row[11].ptr))) return res; /* Rufnummer */ |
| 153 | if ((res = STRCMP_n(oa_row[ 2].ptr, ob_row[ 2].ptr))) return res; /* PLZ */ | ||
| 154 | if ((res = STRCMP_n(oa_row[ 6].ptr, ob_row[ 6].ptr))) return res; /* Strasse */ | 155 | if ((res = STRCMP_n(oa_row[ 6].ptr, ob_row[ 6].ptr))) return res; /* Strasse */ |
| 155 | if ((res = STRCMP_n(oa_row[ 7].ptr, ob_row[ 7].ptr))) return res; /* Hausnummer */ | 156 | if ((res = STRCMP_n(oa_row[ 7].ptr, ob_row[ 7].ptr))) return res; /* Hausnummer */ |
| 156 | if ((res = STRCMP_n(oa_row[ 3].ptr, ob_row[ 3].ptr))) return res; /* Nachname */ | 157 | if ((res = STRCMP_n(oa_row[ 3].ptr, ob_row[ 3].ptr))) return res; /* Nachname */ |
| @@ -200,7 +201,7 @@ int main(int argc, char **args) { | |||
| 200 | unsigned long current = 0, i, flag; | 201 | unsigned long current = 0, i, flag; |
| 201 | uint64_t year_list = 0, revflag_list = 0, bizflag_list = 0; | 202 | uint64_t year_list = 0, revflag_list = 0, bizflag_list = 0; |
| 202 | 203 | ||
| 203 | if (argc != 1) exit(1); | 204 | if (argc != 2) exit(1); |
| 204 | tbuch = map_file(args[1], 1); | 205 | tbuch = map_file(args[1], 1); |
| 205 | 206 | ||
| 206 | /* Estimate upper bound for amount of lines */ | 207 | /* Estimate upper bound for amount of lines */ |
diff --git a/src/postprocess/postprocess-1992.sh b/src/postprocess/postprocess-1992.sh index 1e685d2..6720991 100644 --- a/src/postprocess/postprocess-1992.sh +++ b/src/postprocess/postprocess-1992.sh | |||
| @@ -4,19 +4,16 @@ paste 1992_Q2/{01_Flags,12_Vorwahl,12_Vorwahl_block,11_Ort,07_Strasse,08_Hausnum | |||
| 4 | # Generate lookup file from 1995 | 4 | # Generate lookup file from 1995 |
| 5 | paste 1995_Q0/{12_Vorwahl,11_Ort,07_Strasse,08_Hausnummer,02_Nachname,03_Vorname,13_Rufnummer,10_Postleitzahl} | tr '\n\t' '\0' > 1995-voshnvrp.bin | 5 | paste 1995_Q0/{12_Vorwahl,11_Ort,07_Strasse,08_Hausnummer,02_Nachname,03_Vorname,13_Rufnummer,10_Postleitzahl} | tr '\n\t' '\0' > 1995-voshnvrp.bin |
| 6 | 6 | ||
| 7 | # To debug in lldb | ||
| 8 | process launch -i 1992_testfile.txt -- 1995-vorwahl-ort-strasse-hnr-name-vorname-rufnummer-plz.bin | ||
| 9 | |||
| 10 | # Compile plz mapper | 7 | # Compile plz mapper |
| 11 | cc -O3 -o map_plz map_plz.c -I ../src/export/ ../src/export/mystdlib.c | 8 | cc -O3 -o map_plz map_plz.c -I ../src/export/ ../src/export/mystdlib.c |
| 12 | 9 | ||
| 10 | # To debug in lldb | ||
| 11 | # cc -O0 -g -o map_plz map_plz.c -I ../src/export/ ../src/export/mystdlib.c | ||
| 12 | # process launch -i 1992_testfile.txt -- 1995-vorwahl-ort-strasse-hnr-name-vorname-rufnummer-plz.bin | ||
| 13 | |||
| 13 | # outputs mapped plz, generates brutemap.txt | 14 | # outputs mapped plz, generates brutemap.txt |
| 14 | touch brutemap_input.bin zip_simple_map.bin | 15 | touch brutemap_input.bin zip_simple_map.bin |
| 15 | ./map_plz 1995-voshnvrp.bin zip_simple_map.bin brutemap_input.bin < 1992-fvvoshnvrpp.txt > 10_Postleitzahl | 16 | ./map_plz 1995-voshnvrp.bin zip_simple_map.bin brutemap_input.bin < 1992-fvvoshnvrpp.txt > 1992_Q2/10_Postleitzahl |
| 16 | |||
| 17 | # generate street name translation table from brutemap, | ||
| 18 | # only taking into account similar street names | ||
| 19 | # cut -f 3,4 brutemap.txt | tr '[:upper:]' '[:lower:]' | paste brutemap.txt - | cut -f 1-4,6,7 | ./jaro | cut -f 1-5 > brutemap_filtered.txt | ||
| 20 | 17 | ||
| 21 | # generate street name translation table from brutemap, | 18 | # generate street name translation table from brutemap, |
| 22 | # only taking into account similar street names, new style | 19 | # only taking into account similar street names, new style |
| @@ -29,4 +26,4 @@ cut -f 1-5 brutemap_simifiltered.txt | sort | uniq -c | sed -E $'s:^ *([[:digit: | |||
| 29 | sort -u zip_mapfile.txt | tr '\n' '\0' > zip_simple_map.bin | 26 | sort -u zip_mapfile.txt | tr '\n' '\0' > zip_simple_map.bin |
| 30 | 27 | ||
| 31 | # Redo the mapping with the data from brutemap and zipmap | 28 | # Redo the mapping with the data from brutemap and zipmap |
| 32 | ./map_plz 1995-voshnvrp.bin zip_simple_map.bin brutemap_input.bin < 1992-fvvoshnvrpp.txt > 10_Postleitzahl | 29 | ./map_plz 1995-voshnvrp.bin zip_simple_map.bin brutemap_input.bin < 1992-fvvoshnvrpp.txt > 1992_Q2/10_Postleitzahl |
