diff options
Diffstat (limited to 'src/makecolumns.sh')
-rwxr-xr-x | src/makecolumns.sh | 42 |
1 files changed, 21 insertions, 21 deletions
diff --git a/src/makecolumns.sh b/src/makecolumns.sh index 04806fb..28dd0a8 100755 --- a/src/makecolumns.sh +++ b/src/makecolumns.sh | |||
@@ -20,7 +20,7 @@ main() { | |||
20 | fi | 20 | fi |
21 | 21 | ||
22 | # Compile all the binaries | 22 | # Compile all the binaries |
23 | make all | 23 | make binaries |
24 | 24 | ||
25 | printf "Cleaning up old working directory ... " | 25 | printf "Cleaning up old working directory ... " |
26 | rm -rf ../work_`basename ${1#white_}` | 26 | rm -rf ../work_`basename ${1#white_}` |
@@ -29,18 +29,18 @@ main() { | |||
29 | cd ../work_`basename ${1#white_}` || exit 1 | 29 | cd ../work_`basename ${1#white_}` || exit 1 |
30 | 30 | ||
31 | if [ -f "$1/phonebook.db" ]; then | 31 | if [ -f "$1/phonebook.db" ]; then |
32 | handle_new_format $1 | 32 | handle_format_version_3 $1 |
33 | elif [ -f $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] ]; then | 33 | elif [ -f $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] ]; then |
34 | handle_old_format $1 | 34 | handle_format_version_2 $1 |
35 | else | 35 | else |
36 | echo "Not a recognized Telefonbuch folder" | 36 | echo "Not a recognized Telefonbuch folder" |
37 | fi | 37 | fi |
38 | cd .. | 38 | cd .. |
39 | } | 39 | } |
40 | 40 | ||
41 | do_decompress_old() { | 41 | do_decompress_version_2() { |
42 | printf "Extracting $2 chunks ... " | 42 | printf "Extracting $2 chunks ... " |
43 | extractblocks "${1}" | 43 | extract_version_2 "${1}" |
44 | printf "done.\n" | 44 | printf "done.\n" |
45 | 45 | ||
46 | printf "Decompressing $2 chunks ... " | 46 | printf "Decompressing $2 chunks ... " |
@@ -55,10 +55,10 @@ do_decompress_old() { | |||
55 | printf "done.\n" | 55 | printf "done.\n" |
56 | } | 56 | } |
57 | 57 | ||
58 | do_processfile_old() { | 58 | do_processfile_version_2() { |
59 | working_on=`basename ${1}` | 59 | working_on=`basename ${1}` |
60 | mkdir $working_on && cd ${working_on} | 60 | mkdir $working_on && cd ${working_on} |
61 | do_decompress_old "${1}" "${2}" | 61 | do_decompress_version_2 "${1}" "${2}" |
62 | cd .. | 62 | cd .. |
63 | 63 | ||
64 | printf "Combining $2 into single file ... " | 64 | printf "Combining $2 into single file ... " |
@@ -81,10 +81,10 @@ get_dword() { | |||
81 | hexdump -n 4 -v -e '" " 1/4 "%u"' `printf %0${filename_len}d ${1}` | 81 | hexdump -n 4 -v -e '" " 1/4 "%u"' `printf %0${filename_len}d ${1}` |
82 | } | 82 | } |
83 | 83 | ||
84 | handle_old_format() { | 84 | handle_format_version_2() { |
85 | echo "Working on $1. Detected pre-2004 Telefonbuch version." | 85 | echo "Working on $1. Detected pre-2004 Telefonbuch version." |
86 | # Extract teiln.dat | 86 | # Extract teiln.dat |
87 | do_decompress_old $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] "teiln.dat" | 87 | do_decompress_version_2 $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] "teiln.dat" |
88 | 88 | ||
89 | # See how long each filename is | 89 | # See how long each filename is |
90 | export filename_len=$(( `ls | head -n 1 | wc -c` - 1 )) | 90 | export filename_len=$(( `ls | head -n 1 | wc -c` - 1 )) |
@@ -112,7 +112,7 @@ handle_old_format() { | |||
112 | 112 | ||
113 | # Now loop over all files and dump them | 113 | # Now loop over all files and dump them |
114 | printf "Splitting decompressed nname chunks into their columns ... " | 114 | printf "Splitting decompressed nname chunks into their columns ... " |
115 | jot -w %0${filename_len}d - ${nname_file} $(( number_of_files - 1 )) 3 | splitold 1 1 | 115 | jot -w %0${filename_len}d - ${nname_file} $(( number_of_files - 1 )) 3 | split_version_2 1 1 |
116 | # set -- `hexdump -n 8 -v -e '" " 1/4 "%u"' ${file}` | 116 | # set -- `hexdump -n 8 -v -e '" " 1/4 "%u"' ${file}` |
117 | # tail -c +$(( $2 + 1 )) ${file} | 117 | # tail -c +$(( $2 + 1 )) ${file} |
118 | # done | tr '\n\0' '\t\n' > 01_02_Flags_Nachname | 118 | # done | tr '\n\0' '\t\n' > 01_02_Flags_Nachname |
@@ -126,7 +126,7 @@ handle_old_format() { | |||
126 | printf "done.\n" | 126 | printf "done.\n" |
127 | 127 | ||
128 | printf "Splitting decompress table file chunks into their columns ... " | 128 | printf "Splitting decompress table file chunks into their columns ... " |
129 | jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3 | splitold 4 0 | 129 | jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3 | split_version_2 4 0 |
130 | # for file in `jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3`; do | 130 | # for file in `jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3`; do |
131 | # # Offset into first table entry tells us how many | 131 | # # Offset into first table entry tells us how many |
132 | # # fields are in table file | 132 | # # fields are in table file |
@@ -162,7 +162,7 @@ handle_old_format() { | |||
162 | # If street names come in an extra file, extract | 162 | # If street names come in an extra file, extract |
163 | # street names first | 163 | # street names first |
164 | streets=$1/[Dd][Aa][Tt]/[Ss][Tt][Rr][Aa][Ss][Ss][Ee][Nn].[Dd][Aa][Tt] | 164 | streets=$1/[Dd][Aa][Tt]/[Ss][Tt][Rr][Aa][Ss][Ss][Ee][Nn].[Dd][Aa][Tt] |
165 | [ -f ${streets} ] && do_processfile_old ${streets} "street name" 99_Strassenname convert_zeros | 165 | [ -f ${streets} ] && do_processfile_version_2 ${streets} "street name" 99_Strassenname convert_zeros |
166 | 166 | ||
167 | # extract street names if 07_unknown contains street indexes | 167 | # extract street names if 07_unknown contains street indexes |
168 | # instead of street names | 168 | # instead of street names |
@@ -177,27 +177,27 @@ handle_old_format() { | |||
177 | 177 | ||
178 | karto=$1/[Dd][Aa][Tt]/[Kk][Aa][Rr][Tt][Oo].[Dd][Aa][Tt] | 178 | karto=$1/[Dd][Aa][Tt]/[Kk][Aa][Rr][Tt][Oo].[Dd][Aa][Tt] |
179 | if [ -f ${karto} ]; then | 179 | if [ -f ${karto} ]; then |
180 | do_processfile_old ${karto} "geo coordinates" 90_Geokoordinaten_hnr_raw | 180 | do_processfile_version_2 ${karto} "geo coordinates" 90_Geokoordinaten_hnr_raw |
181 | 181 | ||
182 | printf "Looking up geo coordinates for each phonebook entry ... " | 182 | printf "Looking up geo coordinates for each phonebook entry ... " |
183 | tr '\0' '\n' < 90_Geokoordinaten_hnr_raw | tr ';' '\t' | cut -f "1,2,3,4,6,7" | tr '\n' '\0' > 90_Geokoordinaten_hnr | 183 | tr '\0' '\n' < 90_Geokoordinaten_hnr_raw | tr ';' '\t' | cut -f "1,2,3,4,6,7" | tr '\n' '\0' > 90_Geokoordinaten_hnr |
184 | rm 90_Geokoordinaten_hnr_raw | 184 | rm 90_Geokoordinaten_hnr_raw |
185 | lam 10_Postleitzahl -s $'\t' 11_Ort -s $'\t' 07_Strasse -s $'\t' 08_Hausnummer | mapcoords 90_Geokoordinaten_hnr | convertcoords > 16_Koordinaten | 185 | lam 10_Postleitzahl -s $'\t' 11_Ort -s $'\t' 07_Strasse -s $'\t' 08_Hausnummer | map_coords 90_Geokoordinaten_hnr | convert_coords > 16_Koordinaten |
186 | printf "done.\n" | 186 | printf "done.\n" |
187 | fi | 187 | fi |
188 | } | 188 | } |
189 | 189 | ||
190 | handle_new_format() { | 190 | handle_format_version_3() { |
191 | echo "Working on $1. Detected post-2003 Telefonbuch version." | 191 | echo "Working on $1. Detected post-2003 Telefonbuch version." |
192 | printf "Extracting street names ... " | 192 | printf "Extracting street names ... " |
193 | decompress $1/streets.tl | 193 | extract_version_3 $1/streets.tl |
194 | 194 | ||
195 | cat file_* | tr '\n\0' '\t\n' > 99_Strassenname | 195 | cat file_* | tr '\n\0' '\t\n' > 99_Strassenname |
196 | rm file_* | 196 | rm file_* |
197 | printf "done.\n" | 197 | printf "done.\n" |
198 | 198 | ||
199 | printf "Extracting phonebook.db ... " | 199 | printf "Extracting phonebook.db ... " |
200 | decompress $1/phonebook.db | 200 | extract_version_3 $1/phonebook.db |
201 | 201 | ||
202 | rows=`find . -name file_\* | wc -l` | 202 | rows=`find . -name file_\* | wc -l` |
203 | printf "done.\n" | 203 | printf "done.\n" |
@@ -237,19 +237,19 @@ handle_new_format() { | |||
237 | 237 | ||
238 | if [ -f $1/zip-streets-hn-geo.tl ]; then | 238 | if [ -f $1/zip-streets-hn-geo.tl ]; then |
239 | printf "Extracting geo coordinates (precision: house number) ... " | 239 | printf "Extracting geo coordinates (precision: house number) ... " |
240 | decompress $1/zip-streets-hn-geo.tl | 240 | extract_version_3 $1/zip-streets-hn-geo.tl |
241 | cat file_* > 90_Geokoordinaten_hnr | 241 | cat file_* > 90_Geokoordinaten_hnr |
242 | printf "done.\n" | 242 | printf "done.\n" |
243 | printf "Looking up geo coordinates for each phonebook entry ... " | 243 | printf "Looking up geo coordinates for each phonebook entry ... " |
244 | lam 10_Postleitzahl -s $'\t' 07_Strasse -s $'\t' 08_Hausnummer | mapcoords 90_Geokoordinaten_hnr | convertcoords > 16_Koordinaten | 244 | lam 10_Postleitzahl -s $'\t' 07_Strasse -s $'\t' 08_Hausnummer | map_coords 90_Geokoordinaten_hnr | convert_coords > 16_Koordinaten |
245 | printf "done.\n" | 245 | printf "done.\n" |
246 | elif [ -f $1/zip-streets-geo.tl ]; then | 246 | elif [ -f $1/zip-streets-geo.tl ]; then |
247 | printf "Extracting geo coordinates (precision: street) ... " | 247 | printf "Extracting geo coordinates (precision: street) ... " |
248 | decompress $1/zip-streets-geo.tl | 248 | extract_version_3 $1/zip-streets-geo.tl |
249 | cat file_* > 91_Geokoordinaten_str | 249 | cat file_* > 91_Geokoordinaten_str |
250 | printf "done.\n" | 250 | printf "done.\n" |
251 | printf "Looking up geo coordinates for each phonebook entry ... " | 251 | printf "Looking up geo coordinates for each phonebook entry ... " |
252 | lam 10_Postleitzahl -s $'\t' 07_Strasse | mapcoords 91_Geokoordinaten_str | convertcoords > 16_Koordinaten | 252 | lam 10_Postleitzahl -s $'\t' 07_Strasse | map_coords 91_Geokoordinaten_str | convert_coords > 16_Koordinaten |
253 | printf "done.\n" | 253 | printf "done.\n" |
254 | fi | 254 | fi |
255 | rm file_* | 255 | rm file_* |