summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUser Erdgeist <erdgeist@avon.ccc.de>2014-02-13 21:16:42 +0000
committerUser Erdgeist <erdgeist@avon.ccc.de>2014-02-13 21:16:42 +0000
commitbf154653c1c49eafcf5c47dcded2bf5946aea3d7 (patch)
tree6a162296c435f9428bd0b26e4f9a09d1b06c1756
parent28f818ad8313da4bec3d0bf1abfbc93da3df4f70 (diff)
make use of new splitold tool to speedup extraction by factor 6
-rwxr-xr-xsrc/makecolumns.sh46
1 files changed, 23 insertions, 23 deletions
diff --git a/src/makecolumns.sh b/src/makecolumns.sh
index 5d2d90b..f5803c6 100755
--- a/src/makecolumns.sh
+++ b/src/makecolumns.sh
@@ -112,21 +112,21 @@ handle_old_format() {
112 112
113 # Now loop over all files and dump them 113 # Now loop over all files and dump them
114 printf "Splitting decompressed nname chunks into their columns ... " 114 printf "Splitting decompressed nname chunks into their columns ... "
115 for file in `jot -w %0${filename_len}d - ${nname_file} $(( number_of_files - 1 )) 3`; do 115 jot -w %0${filename_len}d - ${nname_file} $(( number_of_files - 1 )) 3 | splitold 1 1
116 set -- `hexdump -n 8 -v -e '" " 1/4 "%u"' ${file}` 116# set -- `hexdump -n 8 -v -e '" " 1/4 "%u"' ${file}`
117 tail -c +$(( $2 + 1 )) ${file} 117# tail -c +$(( $2 + 1 )) ${file}
118 done | tr '\n\0' '\t\n' > 01_02_Flags_Nachname 118# done | tr '\n\0' '\t\n' > 01_02_Flags_Nachname
119 cut -c 1 < 01_02_Flags_Nachname > 01_Flags 119 cut -c 1 < 01_unknown > 01_Flags
120 cut -c 2- < 01_02_Flags_Nachname > 02_Nachname 120 cut -c 2- < 01_unknown > 02_Nachname
121 rm 01_02_Flags_Nachname 121 rm 01_unknown
122 printf "done.\n" 122 printf "done.\n"
123 123
124 printf "Splitting decompress vname chunks into their columns ... " 124 printf "Splitting decompress vname chunks into their columns ... "
125 jot -w "%0${filename_len}d" - ${vname_file} $(( number_of_files - 1 )) 3 | xargs cat | tr '\n\0' '\t\n' > 03_Vorname 125 jot -w "%0${filename_len}d" - ${vname_file} $(( number_of_files - 1 )) 3 | xargs cat | tr '\n\0' '\t\n' | tr -d '\377' > 03_Vorname
126 printf "done.\n" 126 printf "done.\n"
127 127
128 printf "Splitting decompress table file chunks into their columns ... " 128 printf "Splitting decompress table file chunks into their columns ... "
129 jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3 | splitold 129 jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3 | splitold 4 0
130# for file in `jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3`; do 130# for file in `jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3`; do
131# # Offset into first table entry tells us how many 131# # Offset into first table entry tells us how many
132# # fields are in table file 132# # fields are in table file
@@ -148,17 +148,17 @@ handle_old_format() {
148 148
149 # rename our columns extracted from the table file 149 # rename our columns extracted from the table file
150 printf "Converting string terminators to line newlines ... " 150 printf "Converting string terminators to line newlines ... "
151 tr '\0' '\n' < 04_unknown > 04_Namenszusatz 151 mv 04_unknown 04_Namenszusatz
152 tr '\0' '\n' < 05_unknown > 05_Adresszusatz 152 mv 05_unknown 05_Adresszusatz
153 tr '\0' '\n' < 06_unknown > 06_Ortszusatz 153 mv 06_unknown 06_Ortszusatz
154 tr '\0' '\n' < 08_unknown > 08_Hausnummer 154 mv 08_unknown 08_Hausnummer
155 tr '\0' '\n' < 09_unknown > 09_Verweise 155 mv 09_unknown 09_Verweise
156 tr '\0' '\n' < 10_unknown > 10_Postleitzahl 156 mv 10_unknown 10_Postleitzahl
157 tr '\0' '\n' < 11_unknown > 11_Ort 157 mv 11_unknown 11_Ort
158 tr '\0' '\n' < 12_unknown > 12_Vorwahl 158 mv 12_unknown 12_Vorwahl
159 tr '\0' '\n' < 13_unknown > 13_Rufnummer 159 mv 13_unknown 13_Rufnummer
160 [ -f 14_unknown ] && tr '\0' '\n' < 14_unknown > 14_Email 160 [ -f 14_unknown ] && mv 14_unknown 14_Email
161 [ -f 15_unknown ] && tr '\0' '\n' < 15_unknown > 15_Webadresse 161 [ -f 15_unknown ] && mv 15_unknown 15_Webadresse
162 printf "done.\n" 162 printf "done.\n"
163 163
164 # If street names come in an extra file, extract 164 # If street names come in an extra file, extract
@@ -169,12 +169,12 @@ handle_old_format() {
169 # extract street names if 07_unknown contains street indexes 169 # extract street names if 07_unknown contains street indexes
170 # instead of street names 170 # instead of street names
171 if [ -f 99_Strassenname ]; then 171 if [ -f 99_Strassenname ]; then
172 tr '\0' '\n' < 07_unknown > 07_Strassenindex 172 mv 07_unknown 07_Strassenindex
173 printf "Looking up street names from indexes ... " 173 printf "Looking up street names from indexes ... "
174 cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse 174 cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse
175 printf "done.\n" 175 printf "done.\n"
176 else 176 else
177 tr '\0' '\n' < 07_unknown > 07_Strasse 177 mv 07_unknown 07_Strasse
178 fi 178 fi
179 rm ??_unknown 179 rm ??_unknown
180 180