From 00a8ae93bc88d8cdf1aecc7f3c410359af987c3c Mon Sep 17 00:00:00 2001 From: User Erdgeist Date: Fri, 7 Feb 2014 17:15:43 +0000 Subject: Mute the invoked tools Be more verbose what we do when parsing the older format Factor out common code to avoid redundant implementation Export geo coordinates for the older format --- src/makecolumns.sh | 90 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 57 insertions(+), 33 deletions(-) mode change 100644 => 100755 src/makecolumns.sh diff --git a/src/makecolumns.sh b/src/makecolumns.sh old mode 100644 new mode 100755 index 4986157..95c9d08 --- a/src/makecolumns.sh +++ b/src/makecolumns.sh @@ -3,6 +3,7 @@ export LANG=C export LC_CTYPE=C export LC_ALL=C +export PATH=${PATH}:`pwd`/../bin/ main() { [ -f /usr/local/bin/el ] && EL=/usr/local/bin/el @@ -28,11 +29,8 @@ main() { if [ -f "$1/phonebook.db" ]; then handle_new_format $1 - elif [ -f "$1/DAT/TEILN.DAT" ]; then - handle_old_format "$1/DAT/TEILN.DAT" "${strassen}" - elif [ -f "$1/dat/teiln.dat" ]; then - echo handle_old_format "$1/dat/teiln.dat" "${strassen}" - handle_old_format "$1/dat/teiln.dat" "${strassen}" + elif [ -f $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] ]; then + handle_old_format $1 else echo "Not a recognized Telefonbuch folder" fi @@ -40,40 +38,56 @@ main() { } get_dword() { - # $1 file, $2 offset - set -- `od -tu4 -N4 -j$(( 4*${2:-0} )) ${1}` - printf "%d\n" $2 + # $1 file, $2 offset + set -- `od -tu4 -N4 -j$(( 4*${2:-0} )) ${1}` + printf "%d\n" $2 +} + +do_decompress_old() { + printf "Extracting $2 chunks ... " + extractblocks "${1}" | grep -v appropriate + printf "done.\n" + + printf "Decompressing $2 chunks ... " + for archive in *.lha; do + lha x ${archive} > /dev/null + rm ${archive} + done + printf "done.\n" +} + +do_processfile_old() { + working_on=`basename ${1}` + mkdir $working_on && cd ${working_on} + do_decompress_old "${1}" "${2}" + cd .. + + printf "Combining $2 into single file ... " + if [ "${4}" = "convert_zeros" ]; then + cat ${working_on}/* | tr '\n\0' '\t\n' > $3 + else + cat ${working_on}/* > $3 + fi + printf "done.\n" + + rm -rf ${working_on} } handle_old_format() { # Clear old files. Be very careful, we could # have ended up in an unexpected directory, after all. + printf "Cleaning up old temporary files ... " find -E . -depth 1 -regex '^\./[0123456789]+' -delete - rm -f ??_* + rm -rf ??_* coords strassen + printf "done.\n" # If street names come in an extra file, extract # street names first - if [ "$2" ]; then - mkdir strassen - cd strassen/ - - ../../bin/extractblocks $2 - - # This should leave us with a bunch of .lha files - for archive in *.lha; do lha x ${archive}; done - find . -name \*.lha -delete - cd .. - - cat strassen/* | tr '\n\0' '\t\n' > 99_Strassenname - rm -r strassen/ - fi + streets=$1/[Dd][Aa][Tt]/[Ss][Tt][Rr][Aa][Ss][Ss][Ee][Nn].[Dd][Aa][Tt] + [ -f ${streets} ] && do_processfile_old "${streets}" "street name" 99_Strassenname convert_zeros # Then extract teiln.dat - ../bin/extractblocks $1 - - # This should leave us with a bunch of .lha files - for archive in *.lha; do lha x ${archive}; done - find . -name \*.lha -delete + do_decompress_old $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] "teiln.dat" # See how long each filename is filename_len=$(( `ls | head -n 1 | wc -c` - 1 )) @@ -100,6 +114,7 @@ handle_old_format() { fi # Now loop over all files and dump them + printf "Splitting decompressed chunks into their columns ... " while [ -f ${nname_file} ]; do # Get number of entries in this round count=`get_dword ${nname_file}` @@ -134,9 +149,12 @@ handle_old_format() { table_file=`printf %0${filename_len}d ${table_file}` done + printf "done.\n" # wipe all temporary extracted files + printf "Cleaning up decompressed chunks ... " find -E . -depth 1 -regex '^\./[0123456789]+' -delete + printf "done.\n" # rename our columns extracted from the table file mv 04_unknown 04_Namenszusatz @@ -155,23 +173,29 @@ handle_old_format() { # instead of street names if [ -f 99_Strassenname ]; then mv 07_unknown 07_Strassenindex + printf "Looking up street names from indexes ... " cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse + printf "done.\n" else mv 07_unknown 07_Strasse fi + + karto=$1/[Dd][Aa][Tt]/[Kk][Aa][Rr][Tt][Oo].[Dd][Aa][Tt] + printf "%s\n" $karto + [ -f ${karto} ] && do_processfile_old ${karto} "geo coordinates" 90_Geokoordinaten_hnr } handle_new_format() { echo "Working on $1. Detected post-2003 Telefonbuch version." printf "Extracting street names ... " - ../bin/decompress $1/streets.tl + decompress $1/streets.tl cat file_* | tr '\n\0' '\t\n' > 99_Strassenname rm file_* printf "done.\n" printf "Extracting phonebook.db ... " - ../bin/decompress $1/phonebook.db | grep -v appropriate + decompress $1/phonebook.db | grep -v appropriate numfiles=`find . -name file_\* | wc -l` printf "done.\n" @@ -206,7 +230,7 @@ handle_new_format() { mv column_9 13_Rufnummer mv column_10 14_15_Email_Webadresse - printf "Looing up street names from indexes ... " + printf "Looking up street names from indexes ... " cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse printf "done.\n" @@ -216,7 +240,7 @@ handle_new_format() { if [ -f $1/zip-streets-hn-geo.tl ]; then printf "Extracting geo coordinates (precision: house number) ... " - ../bin/decompress $1/zip-streets-hn-geo.tl + decompress $1/zip-streets-hn-geo.tl cat file_* > 90_Geokoordinaten_hnr printf "done.\n" printf "Looking up geo coordinates for each phonebook entry ... " @@ -224,7 +248,7 @@ handle_new_format() { printf "done.\n" elif [ -f $1/zip-streets-geo.tl ]; then printf "Extracting geo coordinates (precision: street) ... " - ../bin/decompress $1/zip-streets-geo.tl + decompress $1/zip-streets-geo.tl cat file_* > 91_Geokoordinaten_str printf "done.\n" printf "Looking up geo coordinates for each phonebook entry ... " -- cgit v1.2.3