diff options
author | Dirk Engling <erdgeist@erdgeist.org> | 2019-01-30 18:12:18 +0100 |
---|---|---|
committer | Dirk Engling <erdgeist@erdgeist.org> | 2019-01-30 18:12:18 +0100 |
commit | a187241f4e4cf8a592e0a3cc0b61f949e6184a9e (patch) | |
tree | ee6adb8733dd81698f4a50bf75aeadbd30f68464 /makecolumns.sh | |
parent | 0150806fbf0cc64e60984f8a99aa45ca734e0735 (diff) |
Add branch name mapper code for v3
Diffstat (limited to 'makecolumns.sh')
-rwxr-xr-x | makecolumns.sh | 44 |
1 files changed, 30 insertions, 14 deletions
diff --git a/makecolumns.sh b/makecolumns.sh index edd965c..4f4bebc 100755 --- a/makecolumns.sh +++ b/makecolumns.sh | |||
@@ -171,9 +171,21 @@ handle_format_version_2() { | |||
171 | } | 171 | } |
172 | 172 | ||
173 | handle_format_version_3() { | 173 | handle_format_version_3() { |
174 | echo "Working on $1. Detected pre-2004 Telefonbuch version." | 174 | # glob |
175 | teiln=`printf "%s" "$1"/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt]` | ||
176 | braid=`printf "%s" "$1"/[Dd][Aa][Tt]/[Bb][Rr][Aa][Ii][Dd].[Dd][Aa][Tt]` | ||
177 | streets=`printf "%s" "$1"/[Dd][Aa][Tt]/[Ss][Tt][Rr][Aa][Ss][Ss][Ee][Nn].[Dd][Aa][Tt]` | ||
178 | karto=`printf "%s" "$1"/[Dd][Aa][Tt]/[Kk][Aa][Rr][Tt][Oo].[Dd][Aa][Tt]` | ||
179 | |||
180 | if [ -f "${braid}" ]; then | ||
181 | echo "Working on $1. Detected pre-2004 Yellow Pages version." | ||
182 | is_yp=true | ||
183 | else | ||
184 | echo "Working on $1. Detected pre-2004 Telefonbuch version." | ||
185 | unset is_yp | ||
186 | fi | ||
175 | # Extract teiln.dat | 187 | # Extract teiln.dat |
176 | do_decompress_version_3 "$1"/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] "teiln.dat" | 188 | do_decompress_version_3 "${teiln}" "teiln.dat" |
177 | 189 | ||
178 | # See how long each filename is | 190 | # See how long each filename is |
179 | export filename_len=$(( `ls | head -n 1 | wc -c` - 1 )) | 191 | export filename_len=$(( `ls | head -n 1 | wc -c` - 1 )) |
@@ -205,8 +217,14 @@ handle_format_version_3() { | |||
205 | # set -- `hexdump -n 8 -v -e '" " 1/4 "%u"' ${file}` | 217 | # set -- `hexdump -n 8 -v -e '" " 1/4 "%u"' ${file}` |
206 | # tail -c +$(( $2 + 1 )) ${file} | 218 | # tail -c +$(( $2 + 1 )) ${file} |
207 | # done | tr '\n\0' '\t\n' > 01_02_Flags_Nachname | 219 | # done | tr '\n\0' '\t\n' > 01_02_Flags_Nachname |
208 | cut -c 1 < 01_unknown > 01_Flags | 220 | if [ "${is_yp}" ]; then |
209 | cut -c 2- < 01_unknown > 02_Nachname | 221 | cut -c 1 < 01_unknown > 01_Flags |
222 | cut -c 2-7 < 01_unknown > 09_Branchenindex | ||
223 | cut -c 8- < 01_unknown > 02_Nachname | ||
224 | else | ||
225 | cut -c 1 < 01_unknown > 01_Flags | ||
226 | cut -c 2- < 01_unknown > 02_Nachname | ||
227 | fi | ||
210 | rm 01_unknown | 228 | rm 01_unknown |
211 | printf "done.\n" | 229 | printf "done.\n" |
212 | 230 | ||
@@ -259,7 +277,6 @@ handle_format_version_3() { | |||
259 | 277 | ||
260 | # If street names come in an extra file, extract | 278 | # If street names come in an extra file, extract |
261 | # street names first | 279 | # street names first |
262 | streets="$1"/[Dd][Aa][Tt]/[Ss][Tt][Rr][Aa][Ss][Ss][Ee][Nn].[Dd][Aa][Tt] | ||
263 | [ -f "${streets}" ] && do_processfile_version_3 "${streets}" "street name" 99_Strassenname convert_zeros | 280 | [ -f "${streets}" ] && do_processfile_version_3 "${streets}" "street name" 99_Strassenname convert_zeros |
264 | 281 | ||
265 | # extract street names if 07_unknown contains street indexes | 282 | # extract street names if 07_unknown contains street indexes |
@@ -280,7 +297,6 @@ handle_format_version_3() { | |||
280 | tidy_streetnames 07_Strasse | 297 | tidy_streetnames 07_Strasse |
281 | fi | 298 | fi |
282 | 299 | ||
283 | karto="$1"/[Dd][Aa][Tt]/[Kk][Aa][Rr][Tt][Oo].[Dd][Aa][Tt] | ||
284 | if [ -f "${karto}" ]; then | 300 | if [ -f "${karto}" ]; then |
285 | do_processfile_version_3 "${karto}" "geo coordinates" 90_Geokoordinaten_hnr_raw | 301 | do_processfile_version_3 "${karto}" "geo coordinates" 90_Geokoordinaten_hnr_raw |
286 | 302 | ||
@@ -290,6 +306,14 @@ handle_format_version_3() { | |||
290 | paste 10_Postleitzahl 11_Ort 07_Strasse 08_Hausnummer | map_coords 90_Geokoordinaten_hnr | convert_coords > 16_Koordinaten | 306 | paste 10_Postleitzahl 11_Ort 07_Strasse 08_Hausnummer | map_coords 90_Geokoordinaten_hnr | convert_coords > 16_Koordinaten |
291 | printf "done.\n" | 307 | printf "done.\n" |
292 | fi | 308 | fi |
309 | |||
310 | if [ -f "${braid}" ]; then | ||
311 | do_processfile_version_3 "${braid}" "branchen name index" 97_Branchenname convert_zeros | ||
312 | |||
313 | printf "Looking up branch names from codes ... " | ||
314 | map_branches_v3 97_Branchenname < 09_Branchenindex > 09_Branchen | ||
315 | printf "done.\n" | ||
316 | fi | ||
293 | } | 317 | } |
294 | 318 | ||
295 | handle_format_version_4() { | 319 | handle_format_version_4() { |
@@ -387,17 +411,9 @@ handle_format_version_4() { | |||
387 | rm file_* | 411 | rm file_* |
388 | printf "done.\n" | 412 | printf "done.\n" |
389 | 413 | ||
390 | printf "Generating branch name index ... " | ||
391 | mkdir branchcodes/ | ||
392 | while read index name; do | ||
393 | printf $name > branchcodes/${index} | ||
394 | done < 97_Branchenname | ||
395 | printf "done.\n" | ||
396 | |||
397 | printf "Looking up branch names from codes ... " | 414 | printf "Looking up branch names from codes ... " |
398 | map_branches 97_Branchenname < 09_Verweise > 09_Branchen | 415 | map_branches 97_Branchenname < 09_Verweise > 09_Branchen |
399 | printf "done.\n" | 416 | printf "done.\n" |
400 | rm -r branchcodes | ||
401 | fi | 417 | fi |
402 | } | 418 | } |
403 | 419 | ||