diff options
author | Dirk Engling <erdgeist@erdgeist.org> | 2014-01-18 03:58:54 +0100 |
---|---|---|
committer | Dirk Engling <erdgeist@erdgeist.org> | 2014-01-18 03:58:54 +0100 |
commit | 7f66b38b3dd8602b1532d0e6cda645f771474f94 (patch) | |
tree | 26ea137620c35df68612e5fdc7f322522005dfec /src | |
parent | f2df25ca202300fe04672703ea7fb30d8431b302 (diff) |
remove hexout tool, use xxd. Also automatize export of old issues of Das Telefonbuch in the shell script
Diffstat (limited to 'src')
-rw-r--r-- | src/Makefile | 6 | ||||
-rw-r--r-- | src/makecolumns.sh | 219 |
2 files changed, 170 insertions, 55 deletions
diff --git a/src/Makefile b/src/Makefile index 8de6d8b..8d1a66d 100644 --- a/src/Makefile +++ b/src/Makefile | |||
@@ -1,7 +1,7 @@ | |||
1 | all: decompress hexout | 1 | all: decompress extractblocks |
2 | 2 | ||
3 | decompress: decompress.c mystdlib.c | 3 | decompress: decompress.c mystdlib.c |
4 | gcc -O2 -o ../bin/decompress decompress.c mystdlib.c -lz | 4 | gcc -O2 -o ../bin/decompress decompress.c mystdlib.c -lz |
5 | 5 | ||
6 | hexout: hexout.c | 6 | extractblocks: extractblocks_new.c mystdlib.c |
7 | gcc -O2 -o ../bin/hexout hexout.c | 7 | gcc -o ../bin/extractblocks extractblocks_new.c mystdlib.c |
diff --git a/src/makecolumns.sh b/src/makecolumns.sh index cd26091..c4b5d24 100644 --- a/src/makecolumns.sh +++ b/src/makecolumns.sh | |||
@@ -4,64 +4,179 @@ export LANG=C | |||
4 | export LC_CTYPE=C | 4 | export LC_CTYPE=C |
5 | export LC_ALL=C | 5 | export LC_ALL=C |
6 | 6 | ||
7 | [ -f /usr/local/bin/el ] && EL=/usr/local/bin/el | 7 | main() { |
8 | [ -f `dirname $0`/../bin/el ] && EL=`dirname $0`/../bin/el | 8 | [ -f /usr/local/bin/el ] && EL=/usr/local/bin/el |
9 | [ -f `dirname $0`/../bin/el ] && EL=`dirname $0`/../bin/el | ||
9 | 10 | ||
10 | if [ -z "${EL}" ]; then | 11 | if [ -z "${EL}" ]; then |
11 | echo "el not found. Get it at 'cvs -d :pserver:anoncvs@cvs.erdgeist.org:/home/cvsroot co el'" | 12 | echo "el not found. Get it at 'cvs -d :pserver:anoncvs@cvs.erdgeist.org:/home/cvsroot co el'" |
12 | exit 1 | 13 | exit 1 |
13 | fi | 14 | fi |
14 | |||
15 | HO=`dirname $0`/../bin/hexout | ||
16 | |||
17 | if [ $# -ne 1 ]; then | ||
18 | echo "Syntax: $0 [phonebookdirectory]" | ||
19 | exit 1 | ||
20 | fi | ||
21 | |||
22 | make -f Makefile all | ||
23 | 15 | ||
24 | mkdir ../work | 16 | if [ $# -ne 1 ]; then |
25 | cd ../work | 17 | echo "Syntax: $0 [phonebookdirectory]" |
18 | exit 1 | ||
19 | fi | ||
26 | 20 | ||
27 | ../bin/decompress $1/streets.tl | 21 | # Compile all the binaries |
22 | make all | ||
28 | 23 | ||
29 | cat file_* > streets | 24 | mkdir -p ../work_`basename $1` |
30 | rm file_* | 25 | cd ../work_`basename $1` || exit 1 |
31 | 26 | ||
32 | ../bin/decompress $1/phonebook.db | 27 | if [ -f "$1/phonebook.db" ]; then |
28 | handle_new_format $1 | ||
29 | elif [ -f "$1/DAT/TEILN.DAT" ]; then | ||
30 | handle_old_format "$1/DAT/TEILN.DAT" | ||
31 | elif [ -f "$1/dat/teiln.dat" ]; then | ||
32 | handle_old_format "$1/dat/teiln.dat" | ||
33 | else | ||
34 | echo "Not a recognized Telefonbuch folder" | ||
35 | fi | ||
36 | } | ||
37 | |||
38 | get_dword() { | ||
39 | # $1 file, $2 offset | ||
40 | set -- `od -tu4 -N4 -j$(( 4*${2:-0} )) ${1}` | ||
41 | printf "%d\n" $2 | ||
42 | } | ||
43 | |||
44 | handle_old_format() { | ||
45 | # Clear old files. Be very careful, we could | ||
46 | # have ended up in an unexpected directory, after all. | ||
47 | find -depth 1 -regex ^[0-9]+$ -delete | ||
48 | rm ??_* | ||
49 | |||
50 | # First the simple stuff, extract teiln.dat | ||
51 | ../bin/extractblocks $1 | ||
52 | |||
53 | # This should leave us with a bunch of .lha files | ||
54 | for archive in *.lha; do lha x ${archive}; done | ||
55 | find . -name \*.lha -delete | ||
56 | |||
57 | # See how long each filename is | ||
58 | filename_len=$(( `ls | head -n 1 | wc -c` - 1 )) | ||
59 | |||
60 | # from 2000F on file 0+3*n is table, so make it default | ||
61 | table_file=`printf %0${filename_len}d 0` | ||
62 | vname_file=`printf %0${filename_len}d 2` | ||
63 | |||
64 | # if supposed vname file is larger than table file, | ||
65 | # we're having a pre-2000F layout, so switch accordingly | ||
66 | if [ `stat -f %z ${table_file}` -lt `stat -f %z ${vname_file}` ]; then | ||
67 | table_file=`printf %0${filename_len}d 2` | ||
68 | nname_file=`printf %0${filename_len}d 0` | ||
69 | vname_file=`printf %0${filename_len}d 1` | ||
70 | else | ||
71 | nname_file=`printf %0${filename_len}d 1` | ||
72 | fi | ||
33 | 73 | ||
34 | numfiles=`find . -name file_\* | wc -l` | 74 | # Table file has a table header with identical count |
75 | # to nname file's header. Verify this | ||
76 | if [ `get_dword ${nname_file}` -ne `get_dword ${table_file}` ]; then | ||
77 | echo "Unknown layout." | ||
78 | exit | ||
79 | fi | ||
35 | 80 | ||
36 | for column in `jot - 0 10 1`; do | 81 | # Now loop over all files and dump them |
37 | for file in `jot - ${column} ${numfiles} 11`; do | 82 | while [ -f ${nname_file} ]; do |
38 | acton=`printf file_%05X ${file}` | 83 | # Get number of entries in this round |
39 | if [ ${column} = 0 ]; then | 84 | count=`get_dword ${nname_file}` |
40 | ${HO} < ${acton} >> column_0; | 85 | |
86 | # Get offset into first nname | ||
87 | nname_off=$(( `get_dword ${nname_file} 1` + 1 )) | ||
88 | |||
89 | # Now get the flags before the nnames | ||
90 | tail -c +${nname_off} ${nname_file} | tr '\n\0' '\t\n' | head -n ${count} | cut -c -1 >> 01_Flags | ||
91 | tail -c +${nname_off} ${nname_file} | tr '\n\0' '\t\n' | head -n ${count} | cut -c 2- >> 02_Nachname | ||
92 | |||
93 | # Extract the vnames | ||
94 | tr '\n\0' '\t\n' < ${vname_file} | head -n ${count} >> 03_Vorname | ||
95 | |||
96 | # Offset into first table entry tells us how many | ||
97 | # fields are in table file | ||
98 | table_entries=$(( `get_dword ${table_file} 1` / 4 - 1 )) | ||
99 | |||
100 | # Now iterate over all entries in the table file | ||
101 | for table_index in `jot ${table_entries}`; do | ||
102 | table_off=`get_dword ${table_file} ${table_index}` | ||
103 | tail -c +$(( table_off + 1 )) ${table_file} | tr '\n\0' '\t\n' | head -n ${count} >> `printf %02d_unknown $(( table_index + 3 ))` | ||
104 | done | ||
105 | |||
106 | # Advance the filenames. Note, that we need bc because | ||
107 | # builtin arithmetic treats numbers with leading zeros as octals | ||
108 | nname_file=`printf "%s + 3\n" ${nname_file} | bc` | ||
109 | nname_file=`printf %0${filename_len}d ${nname_file}` | ||
110 | vname_file=`printf "%s + 3\n" ${vname_file} | bc` | ||
111 | vname_file=`printf %0${filename_len}d ${vname_file}` | ||
112 | table_file=`printf "%s + 3\n" ${table_file} | bc` | ||
113 | table_file=`printf %0${filename_len}d ${table_file}` | ||
114 | |||
115 | done | ||
116 | |||
117 | # wipe all temporary extracted files | ||
118 | find -depth 1 -regex ^[0-9]+$ -delete | ||
119 | |||
120 | # rename our columns extracted from the table file | ||
121 | mv 04_unknown 04_Namenszusatz | ||
122 | mv 05_unknown 05_Adresszusatz | ||
123 | mv 06_unknown 06_Ortszusatz | ||
124 | mv 08_unknown 08_Hausnummer | ||
125 | mv 09_unknown 09_Detail | ||
126 | mv 10_unknown 10_Postleitzahl | ||
127 | mv 11_unknown 11_Ort | ||
128 | mv 12_unknown 12_Vorwahl | ||
129 | mv 13_unknown 13_Rufnummer | ||
130 | [ -f 14_unknown ] && mv 14_unknown 14_Email | ||
131 | [ -f 15_unknown ] && mv 15_unknown 15_Webadresse | ||
132 | |||
133 | # TODO: extract street names if 07_unknown contains street indexes | ||
134 | # instead of street names | ||
135 | } | ||
136 | |||
137 | handle_new_format() { | ||
138 | ../bin/decompress $1/streets.tl | ||
139 | |||
140 | cat file_* > streets | ||
141 | rm file_* | ||
142 | |||
143 | ../bin/decompress $1/phonebook.db | ||
144 | |||
145 | numfiles=`find . -name file_\* | wc -l` | ||
146 | |||
147 | for column in `jot - 0 10 1`; do | ||
148 | for file in `jot - ${column} ${numfiles} 11`; do | ||
149 | acton=`printf file_%05X ${file}` | ||
150 | if [ ${column} = 0 ]; then | ||
151 | xxd -ps -c1 ${acton} >> column_0; | ||
152 | else | ||
153 | tr '\n\0' '\t\n' < ${acton} >> column_${column}; | ||
154 | fi | ||
155 | done; | ||
156 | done | ||
157 | |||
158 | mv column_0 01_Flags | ||
159 | mv column_1 02_Nachname | ||
160 | mv column_2 03_Vorname | ||
161 | mv column_3 04_05_Namenszusatz_Addresszusatz | ||
162 | mv column_4 09_Detail | ||
163 | mv column_5 07_08_Strassenindex_Hausnummer | ||
164 | mv column_6 12_Vorwahl | ||
165 | mv column_7 10_Postleitzahl | ||
166 | mv column_8 11_Ort | ||
167 | mv column_9 13_Rufnummer | ||
168 | mv column_10 14_15_Email_Webadresse | ||
169 | |||
170 | [ -e streets ] && tr '\n\0' '\t\n' < streets > 99_Strassenname && rm streets | ||
171 | |||
172 | if [ -e 99_Strassenname ]; then | ||
173 | cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse | ||
41 | else | 174 | else |
42 | tr '\0' '\n' < ${acton} >> column_${column}; | 175 | echo Could not convert streetindexes |
43 | fi | 176 | fi |
44 | done; | 177 | |
45 | done | 178 | find . -name file_\* -delete |
46 | 179 | } | |
47 | mv column_0 01_Flags | 180 | |
48 | mv column_1 02_Nachname | 181 | # After function definitions, main() can use them |
49 | mv column_2 03_Vorname | 182 | main "$@" |
50 | mv column_3 04_05_Namenszusatz_Addresszusatz | ||
51 | mv column_4 09_Detail | ||
52 | mv column_5 07_08_Strassenindex_Hausnummer | ||
53 | mv column_6 12_Vorwahl | ||
54 | mv column_7 10_Postleitzahl | ||
55 | mv column_8 11_Ort | ||
56 | mv column_9 13_Rufnummer | ||
57 | mv column_10 14_15_Email_Webadresse | ||
58 | |||
59 | [ -e streets ] && tr '\0' '\n' < streets > 99_Strassenname && rm streets | ||
60 | |||
61 | if [ -e 99_Strassenname ]; then | ||
62 | cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse | ||
63 | else | ||
64 | echo Could not convert streetindexes | ||
65 | fi | ||
66 | |||
67 | find . -name file_\* -delete | ||