diff options
| -rwxr-xr-x[-rw-r--r--] | src/makecolumns.sh | 90 |
1 files changed, 57 insertions, 33 deletions
diff --git a/src/makecolumns.sh b/src/makecolumns.sh index 4986157..95c9d08 100644..100755 --- a/src/makecolumns.sh +++ b/src/makecolumns.sh | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | export LANG=C | 3 | export LANG=C |
| 4 | export LC_CTYPE=C | 4 | export LC_CTYPE=C |
| 5 | export LC_ALL=C | 5 | export LC_ALL=C |
| 6 | export PATH=${PATH}:`pwd`/../bin/ | ||
| 6 | 7 | ||
| 7 | main() { | 8 | main() { |
| 8 | [ -f /usr/local/bin/el ] && EL=/usr/local/bin/el | 9 | [ -f /usr/local/bin/el ] && EL=/usr/local/bin/el |
| @@ -28,11 +29,8 @@ main() { | |||
| 28 | 29 | ||
| 29 | if [ -f "$1/phonebook.db" ]; then | 30 | if [ -f "$1/phonebook.db" ]; then |
| 30 | handle_new_format $1 | 31 | handle_new_format $1 |
| 31 | elif [ -f "$1/DAT/TEILN.DAT" ]; then | 32 | elif [ -f $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] ]; then |
| 32 | handle_old_format "$1/DAT/TEILN.DAT" "${strassen}" | 33 | handle_old_format $1 |
| 33 | elif [ -f "$1/dat/teiln.dat" ]; then | ||
| 34 | echo handle_old_format "$1/dat/teiln.dat" "${strassen}" | ||
| 35 | handle_old_format "$1/dat/teiln.dat" "${strassen}" | ||
| 36 | else | 34 | else |
| 37 | echo "Not a recognized Telefonbuch folder" | 35 | echo "Not a recognized Telefonbuch folder" |
| 38 | fi | 36 | fi |
| @@ -40,40 +38,56 @@ main() { | |||
| 40 | } | 38 | } |
| 41 | 39 | ||
| 42 | get_dword() { | 40 | get_dword() { |
| 43 | # $1 file, $2 offset | 41 | # $1 file, $2 offset |
| 44 | set -- `od -tu4 -N4 -j$(( 4*${2:-0} )) ${1}` | 42 | set -- `od -tu4 -N4 -j$(( 4*${2:-0} )) ${1}` |
| 45 | printf "%d\n" $2 | 43 | printf "%d\n" $2 |
| 44 | } | ||
| 45 | |||
| 46 | do_decompress_old() { | ||
| 47 | printf "Extracting $2 chunks ... " | ||
| 48 | extractblocks "${1}" | grep -v appropriate | ||
| 49 | printf "done.\n" | ||
| 50 | |||
| 51 | printf "Decompressing $2 chunks ... " | ||
| 52 | for archive in *.lha; do | ||
| 53 | lha x ${archive} > /dev/null | ||
| 54 | rm ${archive} | ||
| 55 | done | ||
| 56 | printf "done.\n" | ||
| 57 | } | ||
| 58 | |||
| 59 | do_processfile_old() { | ||
| 60 | working_on=`basename ${1}` | ||
| 61 | mkdir $working_on && cd ${working_on} | ||
| 62 | do_decompress_old "${1}" "${2}" | ||
| 63 | cd .. | ||
| 64 | |||
| 65 | printf "Combining $2 into single file ... " | ||
| 66 | if [ "${4}" = "convert_zeros" ]; then | ||
| 67 | cat ${working_on}/* | tr '\n\0' '\t\n' > $3 | ||
| 68 | else | ||
| 69 | cat ${working_on}/* > $3 | ||
| 70 | fi | ||
| 71 | printf "done.\n" | ||
| 72 | |||
| 73 | rm -rf ${working_on} | ||
| 46 | } | 74 | } |
| 47 | 75 | ||
| 48 | handle_old_format() { | 76 | handle_old_format() { |
| 49 | # Clear old files. Be very careful, we could | 77 | # Clear old files. Be very careful, we could |
| 50 | # have ended up in an unexpected directory, after all. | 78 | # have ended up in an unexpected directory, after all. |
| 79 | printf "Cleaning up old temporary files ... " | ||
| 51 | find -E . -depth 1 -regex '^\./[0123456789]+' -delete | 80 | find -E . -depth 1 -regex '^\./[0123456789]+' -delete |
| 52 | rm -f ??_* | 81 | rm -rf ??_* coords strassen |
| 82 | printf "done.\n" | ||
| 53 | 83 | ||
| 54 | # If street names come in an extra file, extract | 84 | # If street names come in an extra file, extract |
| 55 | # street names first | 85 | # street names first |
| 56 | if [ "$2" ]; then | 86 | streets=$1/[Dd][Aa][Tt]/[Ss][Tt][Rr][Aa][Ss][Ss][Ee][Nn].[Dd][Aa][Tt] |
| 57 | mkdir strassen | 87 | [ -f ${streets} ] && do_processfile_old "${streets}" "street name" 99_Strassenname convert_zeros |
| 58 | cd strassen/ | ||
| 59 | |||
| 60 | ../../bin/extractblocks $2 | ||
| 61 | |||
| 62 | # This should leave us with a bunch of .lha files | ||
| 63 | for archive in *.lha; do lha x ${archive}; done | ||
| 64 | find . -name \*.lha -delete | ||
| 65 | cd .. | ||
| 66 | |||
| 67 | cat strassen/* | tr '\n\0' '\t\n' > 99_Strassenname | ||
| 68 | rm -r strassen/ | ||
| 69 | fi | ||
| 70 | 88 | ||
| 71 | # Then extract teiln.dat | 89 | # Then extract teiln.dat |
| 72 | ../bin/extractblocks $1 | 90 | do_decompress_old $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] "teiln.dat" |
| 73 | |||
| 74 | # This should leave us with a bunch of .lha files | ||
| 75 | for archive in *.lha; do lha x ${archive}; done | ||
| 76 | find . -name \*.lha -delete | ||
| 77 | 91 | ||
| 78 | # See how long each filename is | 92 | # See how long each filename is |
| 79 | filename_len=$(( `ls | head -n 1 | wc -c` - 1 )) | 93 | filename_len=$(( `ls | head -n 1 | wc -c` - 1 )) |
| @@ -100,6 +114,7 @@ handle_old_format() { | |||
| 100 | fi | 114 | fi |
| 101 | 115 | ||
| 102 | # Now loop over all files and dump them | 116 | # Now loop over all files and dump them |
| 117 | printf "Splitting decompressed chunks into their columns ... " | ||
| 103 | while [ -f ${nname_file} ]; do | 118 | while [ -f ${nname_file} ]; do |
| 104 | # Get number of entries in this round | 119 | # Get number of entries in this round |
| 105 | count=`get_dword ${nname_file}` | 120 | count=`get_dword ${nname_file}` |
| @@ -134,9 +149,12 @@ handle_old_format() { | |||
| 134 | table_file=`printf %0${filename_len}d ${table_file}` | 149 | table_file=`printf %0${filename_len}d ${table_file}` |
| 135 | 150 | ||
| 136 | done | 151 | done |
| 152 | printf "done.\n" | ||
| 137 | 153 | ||
| 138 | # wipe all temporary extracted files | 154 | # wipe all temporary extracted files |
| 155 | printf "Cleaning up decompressed chunks ... " | ||
| 139 | find -E . -depth 1 -regex '^\./[0123456789]+' -delete | 156 | find -E . -depth 1 -regex '^\./[0123456789]+' -delete |
| 157 | printf "done.\n" | ||
| 140 | 158 | ||
| 141 | # rename our columns extracted from the table file | 159 | # rename our columns extracted from the table file |
| 142 | mv 04_unknown 04_Namenszusatz | 160 | mv 04_unknown 04_Namenszusatz |
| @@ -155,23 +173,29 @@ handle_old_format() { | |||
| 155 | # instead of street names | 173 | # instead of street names |
| 156 | if [ -f 99_Strassenname ]; then | 174 | if [ -f 99_Strassenname ]; then |
| 157 | mv 07_unknown 07_Strassenindex | 175 | mv 07_unknown 07_Strassenindex |
| 176 | printf "Looking up street names from indexes ... " | ||
| 158 | cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse | 177 | cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse |
| 178 | printf "done.\n" | ||
| 159 | else | 179 | else |
| 160 | mv 07_unknown 07_Strasse | 180 | mv 07_unknown 07_Strasse |
| 161 | fi | 181 | fi |
| 182 | |||
| 183 | karto=$1/[Dd][Aa][Tt]/[Kk][Aa][Rr][Tt][Oo].[Dd][Aa][Tt] | ||
| 184 | printf "%s\n" $karto | ||
| 185 | [ -f ${karto} ] && do_processfile_old ${karto} "geo coordinates" 90_Geokoordinaten_hnr | ||
| 162 | } | 186 | } |
| 163 | 187 | ||
| 164 | handle_new_format() { | 188 | handle_new_format() { |
| 165 | echo "Working on $1. Detected post-2003 Telefonbuch version." | 189 | echo "Working on $1. Detected post-2003 Telefonbuch version." |
| 166 | printf "Extracting street names ... " | 190 | printf "Extracting street names ... " |
| 167 | ../bin/decompress $1/streets.tl | 191 | decompress $1/streets.tl |
| 168 | 192 | ||
| 169 | cat file_* | tr '\n\0' '\t\n' > 99_Strassenname | 193 | cat file_* | tr '\n\0' '\t\n' > 99_Strassenname |
| 170 | rm file_* | 194 | rm file_* |
| 171 | printf "done.\n" | 195 | printf "done.\n" |
| 172 | 196 | ||
| 173 | printf "Extracting phonebook.db ... " | 197 | printf "Extracting phonebook.db ... " |
| 174 | ../bin/decompress $1/phonebook.db | grep -v appropriate | 198 | decompress $1/phonebook.db | grep -v appropriate |
| 175 | 199 | ||
| 176 | numfiles=`find . -name file_\* | wc -l` | 200 | numfiles=`find . -name file_\* | wc -l` |
| 177 | printf "done.\n" | 201 | printf "done.\n" |
| @@ -206,7 +230,7 @@ handle_new_format() { | |||
| 206 | mv column_9 13_Rufnummer | 230 | mv column_9 13_Rufnummer |
| 207 | mv column_10 14_15_Email_Webadresse | 231 | mv column_10 14_15_Email_Webadresse |
| 208 | 232 | ||
| 209 | printf "Looing up street names from indexes ... " | 233 | printf "Looking up street names from indexes ... " |
| 210 | cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse | 234 | cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse |
| 211 | printf "done.\n" | 235 | printf "done.\n" |
| 212 | 236 | ||
| @@ -216,7 +240,7 @@ handle_new_format() { | |||
| 216 | 240 | ||
| 217 | if [ -f $1/zip-streets-hn-geo.tl ]; then | 241 | if [ -f $1/zip-streets-hn-geo.tl ]; then |
| 218 | printf "Extracting geo coordinates (precision: house number) ... " | 242 | printf "Extracting geo coordinates (precision: house number) ... " |
| 219 | ../bin/decompress $1/zip-streets-hn-geo.tl | 243 | decompress $1/zip-streets-hn-geo.tl |
| 220 | cat file_* > 90_Geokoordinaten_hnr | 244 | cat file_* > 90_Geokoordinaten_hnr |
| 221 | printf "done.\n" | 245 | printf "done.\n" |
| 222 | printf "Looking up geo coordinates for each phonebook entry ... " | 246 | printf "Looking up geo coordinates for each phonebook entry ... " |
| @@ -224,7 +248,7 @@ handle_new_format() { | |||
| 224 | printf "done.\n" | 248 | printf "done.\n" |
| 225 | elif [ -f $1/zip-streets-geo.tl ]; then | 249 | elif [ -f $1/zip-streets-geo.tl ]; then |
| 226 | printf "Extracting geo coordinates (precision: street) ... " | 250 | printf "Extracting geo coordinates (precision: street) ... " |
| 227 | ../bin/decompress $1/zip-streets-geo.tl | 251 | decompress $1/zip-streets-geo.tl |
| 228 | cat file_* > 91_Geokoordinaten_str | 252 | cat file_* > 91_Geokoordinaten_str |
| 229 | printf "done.\n" | 253 | printf "done.\n" |
| 230 | printf "Looking up geo coordinates for each phonebook entry ... " | 254 | printf "Looking up geo coordinates for each phonebook entry ... " |
