diff options
| author | User Erdgeist <erdgeist@avon.ccc.de> | 2014-02-13 21:16:42 +0000 |
|---|---|---|
| committer | User Erdgeist <erdgeist@avon.ccc.de> | 2014-02-13 21:16:42 +0000 |
| commit | bf154653c1c49eafcf5c47dcded2bf5946aea3d7 (patch) | |
| tree | 6a162296c435f9428bd0b26e4f9a09d1b06c1756 /src | |
| parent | 28f818ad8313da4bec3d0bf1abfbc93da3df4f70 (diff) | |
make use of new splitold tool to speedup extraction by factor 6
Diffstat (limited to 'src')
| -rwxr-xr-x | src/makecolumns.sh | 46 |
1 files changed, 23 insertions, 23 deletions
diff --git a/src/makecolumns.sh b/src/makecolumns.sh index 5d2d90b..f5803c6 100755 --- a/src/makecolumns.sh +++ b/src/makecolumns.sh | |||
| @@ -112,21 +112,21 @@ handle_old_format() { | |||
| 112 | 112 | ||
| 113 | # Now loop over all files and dump them | 113 | # Now loop over all files and dump them |
| 114 | printf "Splitting decompressed nname chunks into their columns ... " | 114 | printf "Splitting decompressed nname chunks into their columns ... " |
| 115 | for file in `jot -w %0${filename_len}d - ${nname_file} $(( number_of_files - 1 )) 3`; do | 115 | jot -w %0${filename_len}d - ${nname_file} $(( number_of_files - 1 )) 3 | splitold 1 1 |
| 116 | set -- `hexdump -n 8 -v -e '" " 1/4 "%u"' ${file}` | 116 | # set -- `hexdump -n 8 -v -e '" " 1/4 "%u"' ${file}` |
| 117 | tail -c +$(( $2 + 1 )) ${file} | 117 | # tail -c +$(( $2 + 1 )) ${file} |
| 118 | done | tr '\n\0' '\t\n' > 01_02_Flags_Nachname | 118 | # done | tr '\n\0' '\t\n' > 01_02_Flags_Nachname |
| 119 | cut -c 1 < 01_02_Flags_Nachname > 01_Flags | 119 | cut -c 1 < 01_unknown > 01_Flags |
| 120 | cut -c 2- < 01_02_Flags_Nachname > 02_Nachname | 120 | cut -c 2- < 01_unknown > 02_Nachname |
| 121 | rm 01_02_Flags_Nachname | 121 | rm 01_unknown |
| 122 | printf "done.\n" | 122 | printf "done.\n" |
| 123 | 123 | ||
| 124 | printf "Splitting decompress vname chunks into their columns ... " | 124 | printf "Splitting decompress vname chunks into their columns ... " |
| 125 | jot -w "%0${filename_len}d" - ${vname_file} $(( number_of_files - 1 )) 3 | xargs cat | tr '\n\0' '\t\n' > 03_Vorname | 125 | jot -w "%0${filename_len}d" - ${vname_file} $(( number_of_files - 1 )) 3 | xargs cat | tr '\n\0' '\t\n' | tr -d '\377' > 03_Vorname |
| 126 | printf "done.\n" | 126 | printf "done.\n" |
| 127 | 127 | ||
| 128 | printf "Splitting decompress table file chunks into their columns ... " | 128 | printf "Splitting decompress table file chunks into their columns ... " |
| 129 | jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3 | splitold | 129 | jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3 | splitold 4 0 |
| 130 | # for file in `jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3`; do | 130 | # for file in `jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3`; do |
| 131 | # # Offset into first table entry tells us how many | 131 | # # Offset into first table entry tells us how many |
| 132 | # # fields are in table file | 132 | # # fields are in table file |
| @@ -148,17 +148,17 @@ handle_old_format() { | |||
| 148 | 148 | ||
| 149 | # rename our columns extracted from the table file | 149 | # rename our columns extracted from the table file |
| 150 | printf "Converting string terminators to line newlines ... " | 150 | printf "Converting string terminators to line newlines ... " |
| 151 | tr '\0' '\n' < 04_unknown > 04_Namenszusatz | 151 | mv 04_unknown 04_Namenszusatz |
| 152 | tr '\0' '\n' < 05_unknown > 05_Adresszusatz | 152 | mv 05_unknown 05_Adresszusatz |
| 153 | tr '\0' '\n' < 06_unknown > 06_Ortszusatz | 153 | mv 06_unknown 06_Ortszusatz |
| 154 | tr '\0' '\n' < 08_unknown > 08_Hausnummer | 154 | mv 08_unknown 08_Hausnummer |
| 155 | tr '\0' '\n' < 09_unknown > 09_Verweise | 155 | mv 09_unknown 09_Verweise |
| 156 | tr '\0' '\n' < 10_unknown > 10_Postleitzahl | 156 | mv 10_unknown 10_Postleitzahl |
| 157 | tr '\0' '\n' < 11_unknown > 11_Ort | 157 | mv 11_unknown 11_Ort |
| 158 | tr '\0' '\n' < 12_unknown > 12_Vorwahl | 158 | mv 12_unknown 12_Vorwahl |
| 159 | tr '\0' '\n' < 13_unknown > 13_Rufnummer | 159 | mv 13_unknown 13_Rufnummer |
| 160 | [ -f 14_unknown ] && tr '\0' '\n' < 14_unknown > 14_Email | 160 | [ -f 14_unknown ] && mv 14_unknown 14_Email |
| 161 | [ -f 15_unknown ] && tr '\0' '\n' < 15_unknown > 15_Webadresse | 161 | [ -f 15_unknown ] && mv 15_unknown 15_Webadresse |
| 162 | printf "done.\n" | 162 | printf "done.\n" |
| 163 | 163 | ||
| 164 | # If street names come in an extra file, extract | 164 | # If street names come in an extra file, extract |
| @@ -169,12 +169,12 @@ handle_old_format() { | |||
| 169 | # extract street names if 07_unknown contains street indexes | 169 | # extract street names if 07_unknown contains street indexes |
| 170 | # instead of street names | 170 | # instead of street names |
| 171 | if [ -f 99_Strassenname ]; then | 171 | if [ -f 99_Strassenname ]; then |
| 172 | tr '\0' '\n' < 07_unknown > 07_Strassenindex | 172 | mv 07_unknown 07_Strassenindex |
| 173 | printf "Looking up street names from indexes ... " | 173 | printf "Looking up street names from indexes ... " |
| 174 | cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse | 174 | cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse |
| 175 | printf "done.\n" | 175 | printf "done.\n" |
| 176 | else | 176 | else |
| 177 | tr '\0' '\n' < 07_unknown > 07_Strasse | 177 | mv 07_unknown 07_Strasse |
| 178 | fi | 178 | fi |
| 179 | rm ??_unknown | 179 | rm ??_unknown |
| 180 | 180 | ||
