diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/Makefile | 24 | ||||
| -rwxr-xr-x | src/makecolumns.sh | 286 |
2 files changed, 0 insertions, 310 deletions
diff --git a/src/Makefile b/src/Makefile deleted file mode 100644 index 6d40c6b..0000000 --- a/src/Makefile +++ /dev/null | |||
| @@ -1,24 +0,0 @@ | |||
| 1 | BINARIES=../bin/extract_version_1 ../bin/extract_version_2 ../bin/extract_version_3 ../bin/split_version_2 ../bin/map_coords ../bin/convert_coords | ||
| 2 | binaries: $(BINARIES) | ||
| 3 | CFLAGS+=-Wextra | ||
| 4 | |||
| 5 | ../bin/extract_version_3: export/extract_version_3.c export/mystdlib.c | ||
| 6 | @$(CC) $(CFLAGS) -O2 -o ../bin/extract_version_3 export/extract_version_3.c export/mystdlib.c -lz -I export | ||
| 7 | |||
| 8 | ../bin/extract_version_2: export/extract_version_2.c export/mystdlib.c | ||
| 9 | @$(CC) $(CFLAGS) -O3 -o ../bin/extract_version_2 export/extract_version_2.c export/mystdlib.c -I export | ||
| 10 | |||
| 11 | ../bin/extract_version_1: export/extract_version_1.c export/mystdlib.c | ||
| 12 | @$(CC) $(CFLAGS) -O3 -o ../bin/extract_version_1 export/extract_version_1.c export/mystdlib.c -I export | ||
| 13 | |||
| 14 | ../bin/split_version_2: export/split_version_2.c | ||
| 15 | @$(CC) $(CFLAGS) -O3 -o ../bin/split_version_2 export/split_version_2.c | ||
| 16 | |||
| 17 | ../bin/map_coords: export/map_coords.c export/mystdlib.c | ||
| 18 | @$(CC) $(CFLAGS) -O3 -o ../bin/map_coords export/map_coords.c export/mystdlib.c -I export | ||
| 19 | |||
| 20 | ../bin/convert_coords: export/convert_coords.c | ||
| 21 | @$(CC) $(CFLAGS) -O3 -o ../bin/convert_coords export/convert_coords.c -lm | ||
| 22 | |||
| 23 | clean: | ||
| 24 | @rm -f $(BINARIES) | ||
diff --git a/src/makecolumns.sh b/src/makecolumns.sh deleted file mode 100755 index 0f9c5ba..0000000 --- a/src/makecolumns.sh +++ /dev/null | |||
| @@ -1,286 +0,0 @@ | |||
| 1 | #!/bin/sh | ||
| 2 | |||
| 3 | export LANG=C | ||
| 4 | export LC_CTYPE=C | ||
| 5 | export LC_ALL=C | ||
| 6 | export PATH=${PATH}:`pwd`/../bin/ | ||
| 7 | |||
| 8 | main() { | ||
| 9 | [ -f /usr/local/bin/el ] && EL=/usr/local/bin/el | ||
| 10 | [ -f `dirname $0`/../bin/el ] && EL=`dirname $0`/../bin/el | ||
| 11 | |||
| 12 | if [ -z "${EL}" ]; then | ||
| 13 | echo "el not found. Get it at 'cvs -d :pserver:anoncvs@cvs.erdgeist.org:/home/cvsroot co el'" | ||
| 14 | exit 1 | ||
| 15 | fi | ||
| 16 | |||
| 17 | if [ $# -ne 1 ]; then | ||
| 18 | echo "Syntax: $0 [phonebookdirectory]" | ||
| 19 | exit 1 | ||
| 20 | fi | ||
| 21 | |||
| 22 | # Compile all the binaries | ||
| 23 | make binaries | ||
| 24 | |||
| 25 | printf "Cleaning up old working directory ... " | ||
| 26 | rm -rf ../work_`basename "${1#white_}"` | ||
| 27 | printf "done.\n" | ||
| 28 | mkdir -p ../work_`basename "${1#white_}"` | ||
| 29 | cd ../work_`basename "${1#white_}"` || exit 1 | ||
| 30 | |||
| 31 | if [ -f "$1/phonebook.db" ]; then | ||
| 32 | handle_format_version_3 "${1}" | ||
| 33 | elif [ -f "${1}/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt]" ]; then | ||
| 34 | handle_format_version_2 "${1}" | ||
| 35 | elif [ -n "`find "${1}" -name dpr00000.005 -ls -quit`" ]; then | ||
| 36 | handle_format_version_1 "${1}" | ||
| 37 | else | ||
| 38 | echo "Not a recognized Telefonbuch folder" | ||
| 39 | fi | ||
| 40 | cd .. | ||
| 41 | } | ||
| 42 | |||
| 43 | do_decompress_version_2() { | ||
| 44 | printf "Extracting $2 chunks ... " | ||
| 45 | extract_version_2 "${1}" | ||
| 46 | printf "done.\n" | ||
| 47 | |||
| 48 | printf "Decompressing $2 chunks ... " | ||
| 49 | numfiles=`find . -name \*.lha | wc -l` | ||
| 50 | reported=0; processed=0 | ||
| 51 | for archive in *.lha; do | ||
| 52 | lha x ${archive} > /dev/null | ||
| 53 | rm ${archive} | ||
| 54 | [ 1 -eq $(( ( ( (processed+=1) * 20 ) / numfiles ) > reported )) ] && printf "%d%% " $(( (reported+=1) * 5 )) | ||
| 55 | done | ||
| 56 | [ $reported -lt 10 ] && printf "100% " | ||
| 57 | printf "done.\n" | ||
| 58 | } | ||
| 59 | |||
| 60 | do_processfile_version_2() { | ||
| 61 | working_on=`basename ${1}` | ||
| 62 | mkdir $working_on && cd ${working_on} | ||
| 63 | do_decompress_version_2 "${1}" "${2}" | ||
| 64 | cd .. | ||
| 65 | |||
| 66 | printf "Combining $2 into single file ... " | ||
| 67 | if [ "${4}" = "convert_zeros" ]; then | ||
| 68 | cat ${working_on}/* | tr '\n\0' '\t\n' > $3 | ||
| 69 | else | ||
| 70 | cat ${working_on}/* > $3 | ||
| 71 | fi | ||
| 72 | printf "done.\n" | ||
| 73 | |||
| 74 | rm -rf ${working_on} | ||
| 75 | } | ||
| 76 | |||
| 77 | size() { | ||
| 78 | stat -f %z `printf %0${filename_len}d $1` | ||
| 79 | } | ||
| 80 | |||
| 81 | get_dword() { | ||
| 82 | # $1 file | ||
| 83 | hexdump -n 4 -v -e '" " 1/4 "%u"' `printf %0${filename_len}d ${1}` | ||
| 84 | } | ||
| 85 | |||
| 86 | handle_format_version_1() { | ||
| 87 | echo "Working on $1. Detected pre-02/1996 Telefonbuch version." | ||
| 88 | # Extract all dpr database files | ||
| 89 | printf "Extracting dpr databases ... " | ||
| 90 | find "$1" -name dpr\*.001 | extract_version_1 | ||
| 91 | printf "done.\n" | ||
| 92 | |||
| 93 | # rename our extracted columns | ||
| 94 | mv 01_unknown 01_Flags | ||
| 95 | mv 02_unknown 02_Nachname | ||
| 96 | mv 03_unknown 03_Vorname | ||
| 97 | mv 04_unknown 05_Adresszusatz | ||
| 98 | mv 05_unknown 06_Ortszusatz | ||
| 99 | mv 06_unknown 10_Zustellamt_PLZOst | ||
| 100 | mv 07_unknown 07_Strasse | ||
| 101 | mv 08_unknown 08_Hausnummer | ||
| 102 | mv 09_unknown 04_Namenszusatz | ||
| 103 | mv 10_unknown 09_Fax_Verweise | ||
| 104 | mv 11_unknown 12_Vorwahl | ||
| 105 | mv 12_unknown 13_Rufnummer | ||
| 106 | mv 13_unknown 11_Ort | ||
| 107 | mv 14_unknown 10_Postleitzahl | ||
| 108 | } | ||
| 109 | |||
| 110 | |||
| 111 | handle_format_version_2() { | ||
| 112 | echo "Working on $1. Detected pre-2004 Telefonbuch version." | ||
| 113 | # Extract teiln.dat | ||
| 114 | do_decompress_version_2 $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] "teiln.dat" | ||
| 115 | |||
| 116 | # See how long each filename is | ||
| 117 | export filename_len=$(( `ls | head -n 1 | wc -c` - 1 )) | ||
| 118 | |||
| 119 | # Get total amount of files, for reporting progress | ||
| 120 | number_of_files=`find -E . -depth 1 -regex '^\./[0123456789]+' | wc -l` | ||
| 121 | |||
| 122 | # from 2000F on file 0+3*n is table, so make it default | ||
| 123 | table_file=0; vname_file=2 | ||
| 124 | |||
| 125 | # if supposed vname file is larger than table file, | ||
| 126 | # we're having a pre-2000F layout, so switch accordingly | ||
| 127 | if [ `size ${table_file}` -lt `size ${vname_file}` ]; then | ||
| 128 | table_file=2; nname_file=0; vname_file=1 | ||
| 129 | else | ||
| 130 | nname_file=1 | ||
| 131 | fi | ||
| 132 | |||
| 133 | # Table file has a table header with identical count | ||
| 134 | # to nname file's header. Verify this | ||
| 135 | if [ `get_dword ${nname_file}` -ne `get_dword ${table_file}` ]; then | ||
| 136 | echo "Unknown layout." | ||
| 137 | exit | ||
| 138 | fi | ||
| 139 | |||
| 140 | # Now loop over all files and dump them | ||
| 141 | printf "Splitting decompressed nname chunks into their columns ... " | ||
| 142 | jot -w %0${filename_len}d - ${nname_file} $(( number_of_files - 1 )) 3 | split_version_2 1 1 | ||
| 143 | # set -- `hexdump -n 8 -v -e '" " 1/4 "%u"' ${file}` | ||
| 144 | # tail -c +$(( $2 + 1 )) ${file} | ||
| 145 | # done | tr '\n\0' '\t\n' > 01_02_Flags_Nachname | ||
| 146 | cut -c 1 < 01_unknown > 01_Flags | ||
| 147 | cut -c 2- < 01_unknown > 02_Nachname | ||
| 148 | rm 01_unknown | ||
| 149 | printf "done.\n" | ||
| 150 | |||
| 151 | printf "Splitting decompress vname chunks into their columns ... " | ||
| 152 | jot -w "%0${filename_len}d" - ${vname_file} $(( number_of_files - 1 )) 3 | xargs cat | tr '\n\0' '\t\n' | tr -d '\377' > 03_Vorname | ||
| 153 | printf "done.\n" | ||
| 154 | |||
| 155 | printf "Splitting decompress table file chunks into their columns ... " | ||
| 156 | jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3 | split_version_2 4 0 | ||
| 157 | # for file in `jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3`; do | ||
| 158 | # # Offset into first table entry tells us how many | ||
| 159 | # # fields are in table file | ||
| 160 | # set -- `hexdump -n 64 -v -e '" " 1/4 "%u"' ${file}` | ||
| 161 | # count=$1; table_entries=$(( $2 / 4 - 1 )); shift | ||
| 162 | # | ||
| 163 | # # Now iterate over all entries in the table file | ||
| 164 | # for idx in `jot ${table_entries}`; do | ||
| 165 | # tail -c +$(( $1 + 1 )) ${file} | tr '\n\0' '\t\n' | head -n ${count} >> `printf %02d_unknown $(( idx + 3 ))` | ||
| 166 | # shift | ||
| 167 | # done | ||
| 168 | # done | ||
| 169 | printf "done.\n" | ||
| 170 | |||
| 171 | # wipe all temporary extracted files | ||
| 172 | printf "Cleaning up decompressed chunks ... " | ||
| 173 | find -E . -depth 1 -regex '^\./[0123456789]+' -delete | ||
| 174 | printf "done.\n" | ||
| 175 | |||
| 176 | # rename our columns extracted from the table file | ||
| 177 | mv 04_unknown 04_Namenszusatz | ||
| 178 | mv 05_unknown 05_Adresszusatz | ||
| 179 | mv 06_unknown 06_Ortszusatz | ||
| 180 | mv 08_unknown 08_Hausnummer | ||
| 181 | mv 09_unknown 09_Verweise | ||
| 182 | mv 10_unknown 10_Postleitzahl | ||
| 183 | mv 11_unknown 11_Ort | ||
| 184 | mv 12_unknown 12_Vorwahl | ||
| 185 | mv 13_unknown 13_Rufnummer | ||
| 186 | [ -f 14_unknown ] && mv 14_unknown 14_Email | ||
| 187 | [ -f 15_unknown ] && mv 15_unknown 15_Webadresse | ||
| 188 | |||
| 189 | # If street names come in an extra file, extract | ||
| 190 | # street names first | ||
| 191 | streets=$1/[Dd][Aa][Tt]/[Ss][Tt][Rr][Aa][Ss][Ss][Ee][Nn].[Dd][Aa][Tt] | ||
| 192 | [ -f ${streets} ] && do_processfile_version_2 ${streets} "street name" 99_Strassenname convert_zeros | ||
| 193 | |||
| 194 | # extract street names if 07_unknown contains street indexes | ||
| 195 | # instead of street names | ||
| 196 | if [ -f 99_Strassenname ]; then | ||
| 197 | mv 07_unknown 07_Strassenindex | ||
| 198 | printf "Looking up street names from indexes ... " | ||
| 199 | cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse | ||
| 200 | printf "done.\n" | ||
| 201 | else | ||
| 202 | mv 07_unknown 07_Strasse | ||
| 203 | fi | ||
| 204 | |||
| 205 | karto=$1/[Dd][Aa][Tt]/[Kk][Aa][Rr][Tt][Oo].[Dd][Aa][Tt] | ||
| 206 | if [ -f ${karto} ]; then | ||
| 207 | do_processfile_version_2 ${karto} "geo coordinates" 90_Geokoordinaten_hnr_raw | ||
| 208 | |||
| 209 | printf "Looking up geo coordinates for each phonebook entry ... " | ||
| 210 | tr '\0' '\n' < 90_Geokoordinaten_hnr_raw | tr ';' '\t' | cut -f "1,2,3,4,6,7" | tr '\n' '\0' > 90_Geokoordinaten_hnr | ||
| 211 | rm 90_Geokoordinaten_hnr_raw | ||
| 212 | lam 10_Postleitzahl -s $'\t' 11_Ort -s $'\t' 07_Strasse -s $'\t' 08_Hausnummer | map_coords 90_Geokoordinaten_hnr | convert_coords > 16_Koordinaten | ||
| 213 | printf "done.\n" | ||
| 214 | fi | ||
| 215 | } | ||
| 216 | |||
| 217 | handle_format_version_3() { | ||
| 218 | echo "Working on $1. Detected post-2003 Telefonbuch version." | ||
| 219 | printf "Extracting street names ... " | ||
| 220 | extract_version_3 $1/streets.tl | ||
| 221 | |||
| 222 | cat file_* | tr '\n\0' '\t\n' > 99_Strassenname | ||
| 223 | rm file_* | ||
| 224 | printf "done.\n" | ||
| 225 | |||
| 226 | printf "Extracting phonebook.db ... " | ||
| 227 | extract_version_3 $1/phonebook.db | ||
| 228 | |||
| 229 | rows=`find . -name file_\* | wc -l` | ||
| 230 | printf "done.\n" | ||
| 231 | |||
| 232 | printf "Splitting decompressed chunks into their columns (11 total) ... 1, " | ||
| 233 | jot -w "file_%05X" - 0 $(( rows - 1 )) 11 | xargs cat | xxd -ps -c1 > column_0 | ||
| 234 | |||
| 235 | for col in 1 2 3 4 5 6 7 8 9 10; do | ||
| 236 | printf "%d, " $(( col + 1 )) | ||
| 237 | jot -w "file_%05X" - ${col} $(( rows - 1 )) 11 | xargs cat | tr '\n\0' '\t\n' > column_${col} | ||
| 238 | done | ||
| 239 | printf "done.\n" | ||
| 240 | |||
| 241 | printf "Cleaning up decompressed chunks ... " | ||
| 242 | find . -name file_\* -delete | ||
| 243 | printf "done.\n" | ||
| 244 | |||
| 245 | mv column_0 01_Flags | ||
| 246 | mv column_1 02_Nachname | ||
| 247 | mv column_2 03_Vorname | ||
| 248 | mv column_3 04_05_Namenszusatz_Addresszusatz | ||
| 249 | mv column_4 09_Verweise | ||
| 250 | mv column_5 07_08_Strassenindex_Hausnummer | ||
| 251 | mv column_6 12_Vorwahl | ||
| 252 | mv column_7 10_Postleitzahl | ||
| 253 | mv column_8 11_Ort | ||
| 254 | mv column_9 13_Rufnummer | ||
| 255 | mv column_10 14_15_Email_Webadresse | ||
| 256 | |||
| 257 | printf "Looking up street names from indexes ... " | ||
| 258 | cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse | ||
| 259 | printf "done.\n" | ||
| 260 | |||
| 261 | printf "Splitting house numbers ... " | ||
| 262 | sed -E $'s:$:\t:' < 07_08_Strassenindex_Hausnummer | cut -f 2 > 08_Hausnummer | ||
| 263 | printf "done.\n" | ||
| 264 | |||
| 265 | if [ -f $1/zip-streets-hn-geo.tl ]; then | ||
| 266 | printf "Extracting geo coordinates (precision: house number) ... " | ||
| 267 | extract_version_3 $1/zip-streets-hn-geo.tl | ||
| 268 | cat file_* > 90_Geokoordinaten_hnr | ||
| 269 | printf "done.\n" | ||
| 270 | printf "Looking up geo coordinates for each phonebook entry ... " | ||
| 271 | lam 10_Postleitzahl -s $'\t' 07_Strasse -s $'\t' 08_Hausnummer | map_coords 90_Geokoordinaten_hnr | convert_coords > 16_Koordinaten | ||
| 272 | printf "done.\n" | ||
| 273 | elif [ -f $1/zip-streets-geo.tl ]; then | ||
| 274 | printf "Extracting geo coordinates (precision: street) ... " | ||
| 275 | extract_version_3 $1/zip-streets-geo.tl | ||
| 276 | cat file_* > 91_Geokoordinaten_str | ||
| 277 | printf "done.\n" | ||
| 278 | printf "Looking up geo coordinates for each phonebook entry ... " | ||
| 279 | lam 10_Postleitzahl -s $'\t' 07_Strasse | map_coords 91_Geokoordinaten_str | convert_coords > 16_Koordinaten | ||
| 280 | printf "done.\n" | ||
| 281 | fi | ||
| 282 | rm file_* | ||
| 283 | } | ||
| 284 | |||
| 285 | # After function definitions, main() can use them | ||
| 286 | main "$@" | ||
