diff options
author | User Erdgeist <erdgeist@avon.ccc.de> | 2014-02-07 17:15:43 +0000 |
---|---|---|
committer | User Erdgeist <erdgeist@avon.ccc.de> | 2014-02-07 17:15:43 +0000 |
commit | 00a8ae93bc88d8cdf1aecc7f3c410359af987c3c (patch) | |
tree | 7b9c766b31e76e642e096b87f8ecf0df05e420cd /src | |
parent | 0d9bc8f4efd22460a3dfea679d4b84b2c5da7cfa (diff) |
Mute the invoked tools
Be more verbose what we do when parsing the older format
Factor out common code to avoid redundant implementation
Export geo coordinates for the older format
Diffstat (limited to 'src')
-rwxr-xr-x[-rw-r--r--] | src/makecolumns.sh | 90 |
1 files changed, 57 insertions, 33 deletions
diff --git a/src/makecolumns.sh b/src/makecolumns.sh index 4986157..95c9d08 100644..100755 --- a/src/makecolumns.sh +++ b/src/makecolumns.sh | |||
@@ -3,6 +3,7 @@ | |||
3 | export LANG=C | 3 | export LANG=C |
4 | export LC_CTYPE=C | 4 | export LC_CTYPE=C |
5 | export LC_ALL=C | 5 | export LC_ALL=C |
6 | export PATH=${PATH}:`pwd`/../bin/ | ||
6 | 7 | ||
7 | main() { | 8 | main() { |
8 | [ -f /usr/local/bin/el ] && EL=/usr/local/bin/el | 9 | [ -f /usr/local/bin/el ] && EL=/usr/local/bin/el |
@@ -28,11 +29,8 @@ main() { | |||
28 | 29 | ||
29 | if [ -f "$1/phonebook.db" ]; then | 30 | if [ -f "$1/phonebook.db" ]; then |
30 | handle_new_format $1 | 31 | handle_new_format $1 |
31 | elif [ -f "$1/DAT/TEILN.DAT" ]; then | 32 | elif [ -f $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] ]; then |
32 | handle_old_format "$1/DAT/TEILN.DAT" "${strassen}" | 33 | handle_old_format $1 |
33 | elif [ -f "$1/dat/teiln.dat" ]; then | ||
34 | echo handle_old_format "$1/dat/teiln.dat" "${strassen}" | ||
35 | handle_old_format "$1/dat/teiln.dat" "${strassen}" | ||
36 | else | 34 | else |
37 | echo "Not a recognized Telefonbuch folder" | 35 | echo "Not a recognized Telefonbuch folder" |
38 | fi | 36 | fi |
@@ -40,40 +38,56 @@ main() { | |||
40 | } | 38 | } |
41 | 39 | ||
42 | get_dword() { | 40 | get_dword() { |
43 | # $1 file, $2 offset | 41 | # $1 file, $2 offset |
44 | set -- `od -tu4 -N4 -j$(( 4*${2:-0} )) ${1}` | 42 | set -- `od -tu4 -N4 -j$(( 4*${2:-0} )) ${1}` |
45 | printf "%d\n" $2 | 43 | printf "%d\n" $2 |
44 | } | ||
45 | |||
46 | do_decompress_old() { | ||
47 | printf "Extracting $2 chunks ... " | ||
48 | extractblocks "${1}" | grep -v appropriate | ||
49 | printf "done.\n" | ||
50 | |||
51 | printf "Decompressing $2 chunks ... " | ||
52 | for archive in *.lha; do | ||
53 | lha x ${archive} > /dev/null | ||
54 | rm ${archive} | ||
55 | done | ||
56 | printf "done.\n" | ||
57 | } | ||
58 | |||
59 | do_processfile_old() { | ||
60 | working_on=`basename ${1}` | ||
61 | mkdir $working_on && cd ${working_on} | ||
62 | do_decompress_old "${1}" "${2}" | ||
63 | cd .. | ||
64 | |||
65 | printf "Combining $2 into single file ... " | ||
66 | if [ "${4}" = "convert_zeros" ]; then | ||
67 | cat ${working_on}/* | tr '\n\0' '\t\n' > $3 | ||
68 | else | ||
69 | cat ${working_on}/* > $3 | ||
70 | fi | ||
71 | printf "done.\n" | ||
72 | |||
73 | rm -rf ${working_on} | ||
46 | } | 74 | } |
47 | 75 | ||
48 | handle_old_format() { | 76 | handle_old_format() { |
49 | # Clear old files. Be very careful, we could | 77 | # Clear old files. Be very careful, we could |
50 | # have ended up in an unexpected directory, after all. | 78 | # have ended up in an unexpected directory, after all. |
79 | printf "Cleaning up old temporary files ... " | ||
51 | find -E . -depth 1 -regex '^\./[0123456789]+' -delete | 80 | find -E . -depth 1 -regex '^\./[0123456789]+' -delete |
52 | rm -f ??_* | 81 | rm -rf ??_* coords strassen |
82 | printf "done.\n" | ||
53 | 83 | ||
54 | # If street names come in an extra file, extract | 84 | # If street names come in an extra file, extract |
55 | # street names first | 85 | # street names first |
56 | if [ "$2" ]; then | 86 | streets=$1/[Dd][Aa][Tt]/[Ss][Tt][Rr][Aa][Ss][Ss][Ee][Nn].[Dd][Aa][Tt] |
57 | mkdir strassen | 87 | [ -f ${streets} ] && do_processfile_old "${streets}" "street name" 99_Strassenname convert_zeros |
58 | cd strassen/ | ||
59 | |||
60 | ../../bin/extractblocks $2 | ||
61 | |||
62 | # This should leave us with a bunch of .lha files | ||
63 | for archive in *.lha; do lha x ${archive}; done | ||
64 | find . -name \*.lha -delete | ||
65 | cd .. | ||
66 | |||
67 | cat strassen/* | tr '\n\0' '\t\n' > 99_Strassenname | ||
68 | rm -r strassen/ | ||
69 | fi | ||
70 | 88 | ||
71 | # Then extract teiln.dat | 89 | # Then extract teiln.dat |
72 | ../bin/extractblocks $1 | 90 | do_decompress_old $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] "teiln.dat" |
73 | |||
74 | # This should leave us with a bunch of .lha files | ||
75 | for archive in *.lha; do lha x ${archive}; done | ||
76 | find . -name \*.lha -delete | ||
77 | 91 | ||
78 | # See how long each filename is | 92 | # See how long each filename is |
79 | filename_len=$(( `ls | head -n 1 | wc -c` - 1 )) | 93 | filename_len=$(( `ls | head -n 1 | wc -c` - 1 )) |
@@ -100,6 +114,7 @@ handle_old_format() { | |||
100 | fi | 114 | fi |
101 | 115 | ||
102 | # Now loop over all files and dump them | 116 | # Now loop over all files and dump them |
117 | printf "Splitting decompressed chunks into their columns ... " | ||
103 | while [ -f ${nname_file} ]; do | 118 | while [ -f ${nname_file} ]; do |
104 | # Get number of entries in this round | 119 | # Get number of entries in this round |
105 | count=`get_dword ${nname_file}` | 120 | count=`get_dword ${nname_file}` |
@@ -134,9 +149,12 @@ handle_old_format() { | |||
134 | table_file=`printf %0${filename_len}d ${table_file}` | 149 | table_file=`printf %0${filename_len}d ${table_file}` |
135 | 150 | ||
136 | done | 151 | done |
152 | printf "done.\n" | ||
137 | 153 | ||
138 | # wipe all temporary extracted files | 154 | # wipe all temporary extracted files |
155 | printf "Cleaning up decompressed chunks ... " | ||
139 | find -E . -depth 1 -regex '^\./[0123456789]+' -delete | 156 | find -E . -depth 1 -regex '^\./[0123456789]+' -delete |
157 | printf "done.\n" | ||
140 | 158 | ||
141 | # rename our columns extracted from the table file | 159 | # rename our columns extracted from the table file |
142 | mv 04_unknown 04_Namenszusatz | 160 | mv 04_unknown 04_Namenszusatz |
@@ -155,23 +173,29 @@ handle_old_format() { | |||
155 | # instead of street names | 173 | # instead of street names |
156 | if [ -f 99_Strassenname ]; then | 174 | if [ -f 99_Strassenname ]; then |
157 | mv 07_unknown 07_Strassenindex | 175 | mv 07_unknown 07_Strassenindex |
176 | printf "Looking up street names from indexes ... " | ||
158 | cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse | 177 | cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse |
178 | printf "done.\n" | ||
159 | else | 179 | else |
160 | mv 07_unknown 07_Strasse | 180 | mv 07_unknown 07_Strasse |
161 | fi | 181 | fi |
182 | |||
183 | karto=$1/[Dd][Aa][Tt]/[Kk][Aa][Rr][Tt][Oo].[Dd][Aa][Tt] | ||
184 | printf "%s\n" $karto | ||
185 | [ -f ${karto} ] && do_processfile_old ${karto} "geo coordinates" 90_Geokoordinaten_hnr | ||
162 | } | 186 | } |
163 | 187 | ||
164 | handle_new_format() { | 188 | handle_new_format() { |
165 | echo "Working on $1. Detected post-2003 Telefonbuch version." | 189 | echo "Working on $1. Detected post-2003 Telefonbuch version." |
166 | printf "Extracting street names ... " | 190 | printf "Extracting street names ... " |
167 | ../bin/decompress $1/streets.tl | 191 | decompress $1/streets.tl |
168 | 192 | ||
169 | cat file_* | tr '\n\0' '\t\n' > 99_Strassenname | 193 | cat file_* | tr '\n\0' '\t\n' > 99_Strassenname |
170 | rm file_* | 194 | rm file_* |
171 | printf "done.\n" | 195 | printf "done.\n" |
172 | 196 | ||
173 | printf "Extracting phonebook.db ... " | 197 | printf "Extracting phonebook.db ... " |
174 | ../bin/decompress $1/phonebook.db | grep -v appropriate | 198 | decompress $1/phonebook.db | grep -v appropriate |
175 | 199 | ||
176 | numfiles=`find . -name file_\* | wc -l` | 200 | numfiles=`find . -name file_\* | wc -l` |
177 | printf "done.\n" | 201 | printf "done.\n" |
@@ -206,7 +230,7 @@ handle_new_format() { | |||
206 | mv column_9 13_Rufnummer | 230 | mv column_9 13_Rufnummer |
207 | mv column_10 14_15_Email_Webadresse | 231 | mv column_10 14_15_Email_Webadresse |
208 | 232 | ||
209 | printf "Looing up street names from indexes ... " | 233 | printf "Looking up street names from indexes ... " |
210 | cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse | 234 | cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse |
211 | printf "done.\n" | 235 | printf "done.\n" |
212 | 236 | ||
@@ -216,7 +240,7 @@ handle_new_format() { | |||
216 | 240 | ||
217 | if [ -f $1/zip-streets-hn-geo.tl ]; then | 241 | if [ -f $1/zip-streets-hn-geo.tl ]; then |
218 | printf "Extracting geo coordinates (precision: house number) ... " | 242 | printf "Extracting geo coordinates (precision: house number) ... " |
219 | ../bin/decompress $1/zip-streets-hn-geo.tl | 243 | decompress $1/zip-streets-hn-geo.tl |
220 | cat file_* > 90_Geokoordinaten_hnr | 244 | cat file_* > 90_Geokoordinaten_hnr |
221 | printf "done.\n" | 245 | printf "done.\n" |
222 | printf "Looking up geo coordinates for each phonebook entry ... " | 246 | printf "Looking up geo coordinates for each phonebook entry ... " |
@@ -224,7 +248,7 @@ handle_new_format() { | |||
224 | printf "done.\n" | 248 | printf "done.\n" |
225 | elif [ -f $1/zip-streets-geo.tl ]; then | 249 | elif [ -f $1/zip-streets-geo.tl ]; then |
226 | printf "Extracting geo coordinates (precision: street) ... " | 250 | printf "Extracting geo coordinates (precision: street) ... " |
227 | ../bin/decompress $1/zip-streets-geo.tl | 251 | decompress $1/zip-streets-geo.tl |
228 | cat file_* > 91_Geokoordinaten_str | 252 | cat file_* > 91_Geokoordinaten_str |
229 | printf "done.\n" | 253 | printf "done.\n" |
230 | printf "Looking up geo coordinates for each phonebook entry ... " | 254 | printf "Looking up geo coordinates for each phonebook entry ... " |