From bf154653c1c49eafcf5c47dcded2bf5946aea3d7 Mon Sep 17 00:00:00 2001 From: User Erdgeist Date: Thu, 13 Feb 2014 21:16:42 +0000 Subject: make use of new splitold tool to speedup extraction by factor 6 --- src/makecolumns.sh | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/src/makecolumns.sh b/src/makecolumns.sh index 5d2d90b..f5803c6 100755 --- a/src/makecolumns.sh +++ b/src/makecolumns.sh @@ -112,21 +112,21 @@ handle_old_format() { # Now loop over all files and dump them printf "Splitting decompressed nname chunks into their columns ... " - for file in `jot -w %0${filename_len}d - ${nname_file} $(( number_of_files - 1 )) 3`; do - set -- `hexdump -n 8 -v -e '" " 1/4 "%u"' ${file}` - tail -c +$(( $2 + 1 )) ${file} - done | tr '\n\0' '\t\n' > 01_02_Flags_Nachname - cut -c 1 < 01_02_Flags_Nachname > 01_Flags - cut -c 2- < 01_02_Flags_Nachname > 02_Nachname - rm 01_02_Flags_Nachname + jot -w %0${filename_len}d - ${nname_file} $(( number_of_files - 1 )) 3 | splitold 1 1 +# set -- `hexdump -n 8 -v -e '" " 1/4 "%u"' ${file}` +# tail -c +$(( $2 + 1 )) ${file} +# done | tr '\n\0' '\t\n' > 01_02_Flags_Nachname + cut -c 1 < 01_unknown > 01_Flags + cut -c 2- < 01_unknown > 02_Nachname + rm 01_unknown printf "done.\n" printf "Splitting decompress vname chunks into their columns ... " - jot -w "%0${filename_len}d" - ${vname_file} $(( number_of_files - 1 )) 3 | xargs cat | tr '\n\0' '\t\n' > 03_Vorname + jot -w "%0${filename_len}d" - ${vname_file} $(( number_of_files - 1 )) 3 | xargs cat | tr '\n\0' '\t\n' | tr -d '\377' > 03_Vorname printf "done.\n" - + printf "Splitting decompress table file chunks into their columns ... " - jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3 | splitold + jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3 | splitold 4 0 # for file in `jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3`; do # # Offset into first table entry tells us how many # # fields are in table file @@ -148,17 +148,17 @@ handle_old_format() { # rename our columns extracted from the table file printf "Converting string terminators to line newlines ... " - tr '\0' '\n' < 04_unknown > 04_Namenszusatz - tr '\0' '\n' < 05_unknown > 05_Adresszusatz - tr '\0' '\n' < 06_unknown > 06_Ortszusatz - tr '\0' '\n' < 08_unknown > 08_Hausnummer - tr '\0' '\n' < 09_unknown > 09_Verweise - tr '\0' '\n' < 10_unknown > 10_Postleitzahl - tr '\0' '\n' < 11_unknown > 11_Ort - tr '\0' '\n' < 12_unknown > 12_Vorwahl - tr '\0' '\n' < 13_unknown > 13_Rufnummer - [ -f 14_unknown ] && tr '\0' '\n' < 14_unknown > 14_Email - [ -f 15_unknown ] && tr '\0' '\n' < 15_unknown > 15_Webadresse + mv 04_unknown 04_Namenszusatz + mv 05_unknown 05_Adresszusatz + mv 06_unknown 06_Ortszusatz + mv 08_unknown 08_Hausnummer + mv 09_unknown 09_Verweise + mv 10_unknown 10_Postleitzahl + mv 11_unknown 11_Ort + mv 12_unknown 12_Vorwahl + mv 13_unknown 13_Rufnummer + [ -f 14_unknown ] && mv 14_unknown 14_Email + [ -f 15_unknown ] && mv 15_unknown 15_Webadresse printf "done.\n" # If street names come in an extra file, extract @@ -169,12 +169,12 @@ handle_old_format() { # extract street names if 07_unknown contains street indexes # instead of street names if [ -f 99_Strassenname ]; then - tr '\0' '\n' < 07_unknown > 07_Strassenindex + mv 07_unknown 07_Strassenindex printf "Looking up street names from indexes ... " cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse printf "done.\n" else - tr '\0' '\n' < 07_unknown > 07_Strasse + mv 07_unknown 07_Strasse fi rm ??_unknown -- cgit v1.2.3