diff options
author | Dirk Engling <erdgeist@erdgeist.org> | 2014-01-18 06:47:38 +0100 |
---|---|---|
committer | Dirk Engling <erdgeist@erdgeist.org> | 2014-01-18 06:47:38 +0100 |
commit | 4e33872678d38319e3bb6bd98584dcb78aae5940 (patch) | |
tree | 86c8ac9f12d67e295be0504eb5ce0bd6628d9e71 | |
parent | d381a0c3b24c686df7d2abc5f69d0e81e9065479 (diff) |
Implement street name extraction
-rw-r--r-- | src/makecolumns.sh | 38 |
1 files changed, 32 insertions, 6 deletions
diff --git a/src/makecolumns.sh b/src/makecolumns.sh index c4b5d24..450cb5d 100644 --- a/src/makecolumns.sh +++ b/src/makecolumns.sh | |||
@@ -24,12 +24,15 @@ main() { | |||
24 | mkdir -p ../work_`basename $1` | 24 | mkdir -p ../work_`basename $1` |
25 | cd ../work_`basename $1` || exit 1 | 25 | cd ../work_`basename $1` || exit 1 |
26 | 26 | ||
27 | unset strassen; [ -f $1/dat/strassen.dat ] && strassen=$1/dat/strassen.dat | ||
28 | |||
27 | if [ -f "$1/phonebook.db" ]; then | 29 | if [ -f "$1/phonebook.db" ]; then |
28 | handle_new_format $1 | 30 | handle_new_format $1 |
29 | elif [ -f "$1/DAT/TEILN.DAT" ]; then | 31 | elif [ -f "$1/DAT/TEILN.DAT" ]; then |
30 | handle_old_format "$1/DAT/TEILN.DAT" | 32 | handle_old_format "$1/DAT/TEILN.DAT" "${strassen}" |
31 | elif [ -f "$1/dat/teiln.dat" ]; then | 33 | elif [ -f "$1/dat/teiln.dat" ]; then |
32 | handle_old_format "$1/dat/teiln.dat" | 34 | echo handle_old_format "$1/dat/teiln.dat" "${strassen}" |
35 | handle_old_format "$1/dat/teiln.dat" "${strassen}" | ||
33 | else | 36 | else |
34 | echo "Not a recognized Telefonbuch folder" | 37 | echo "Not a recognized Telefonbuch folder" |
35 | fi | 38 | fi |
@@ -44,10 +47,27 @@ get_dword() { | |||
44 | handle_old_format() { | 47 | handle_old_format() { |
45 | # Clear old files. Be very careful, we could | 48 | # Clear old files. Be very careful, we could |
46 | # have ended up in an unexpected directory, after all. | 49 | # have ended up in an unexpected directory, after all. |
47 | find -depth 1 -regex ^[0-9]+$ -delete | 50 | find -E . -depth 1 -regex '^\./[0123456789]+' -delete |
48 | rm ??_* | 51 | rm ??_* |
49 | 52 | ||
50 | # First the simple stuff, extract teiln.dat | 53 | # If street names come in an extra file, extract |
54 | # street names first | ||
55 | if [ "$2" ]; then | ||
56 | mkdir strassen | ||
57 | cd strassen/ | ||
58 | |||
59 | ../../bin/extractblocks $2 | ||
60 | |||
61 | # This should leave us with a bunch of .lha files | ||
62 | for archive in *.lha; do lha x ${archive}; done | ||
63 | find . -name \*.lha -delete | ||
64 | cd .. | ||
65 | |||
66 | cat strassen/* | tr '\n\0' '\t\n' > 99_Strassenname | ||
67 | rm -r strassen/ | ||
68 | fi | ||
69 | |||
70 | # Then extract teiln.dat | ||
51 | ../bin/extractblocks $1 | 71 | ../bin/extractblocks $1 |
52 | 72 | ||
53 | # This should leave us with a bunch of .lha files | 73 | # This should leave us with a bunch of .lha files |
@@ -115,7 +135,7 @@ handle_old_format() { | |||
115 | done | 135 | done |
116 | 136 | ||
117 | # wipe all temporary extracted files | 137 | # wipe all temporary extracted files |
118 | find -depth 1 -regex ^[0-9]+$ -delete | 138 | find -E . -depth 1 -regex '^\./[0123456789]+' -delete |
119 | 139 | ||
120 | # rename our columns extracted from the table file | 140 | # rename our columns extracted from the table file |
121 | mv 04_unknown 04_Namenszusatz | 141 | mv 04_unknown 04_Namenszusatz |
@@ -130,8 +150,14 @@ handle_old_format() { | |||
130 | [ -f 14_unknown ] && mv 14_unknown 14_Email | 150 | [ -f 14_unknown ] && mv 14_unknown 14_Email |
131 | [ -f 15_unknown ] && mv 15_unknown 15_Webadresse | 151 | [ -f 15_unknown ] && mv 15_unknown 15_Webadresse |
132 | 152 | ||
133 | # TODO: extract street names if 07_unknown contains street indexes | 153 | # extract street names if 07_unknown contains street indexes |
134 | # instead of street names | 154 | # instead of street names |
155 | if [ -f 99_Strassenname ]; then | ||
156 | mv 07_unknown 07_Strassenindex | ||
157 | cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse | ||
158 | else | ||
159 | mv 07_unknown 07_Strasse | ||
160 | fi | ||
135 | } | 161 | } |
136 | 162 | ||
137 | handle_new_format() { | 163 | handle_new_format() { |