From 2a185f889470f9bfa049b1610900536309aded5a Mon Sep 17 00:00:00 2001 From: Dirk Engling Date: Mon, 24 Feb 2014 03:14:53 +0100 Subject: Use make the way it was intended --- src/Makefile | 24 ----- src/makecolumns.sh | 286 ----------------------------------------------------- 2 files changed, 310 deletions(-) delete mode 100644 src/Makefile delete mode 100755 src/makecolumns.sh (limited to 'src') diff --git a/src/Makefile b/src/Makefile deleted file mode 100644 index 6d40c6b..0000000 --- a/src/Makefile +++ /dev/null @@ -1,24 +0,0 @@ -BINARIES=../bin/extract_version_1 ../bin/extract_version_2 ../bin/extract_version_3 ../bin/split_version_2 ../bin/map_coords ../bin/convert_coords -binaries: $(BINARIES) -CFLAGS+=-Wextra - -../bin/extract_version_3: export/extract_version_3.c export/mystdlib.c - @$(CC) $(CFLAGS) -O2 -o ../bin/extract_version_3 export/extract_version_3.c export/mystdlib.c -lz -I export - -../bin/extract_version_2: export/extract_version_2.c export/mystdlib.c - @$(CC) $(CFLAGS) -O3 -o ../bin/extract_version_2 export/extract_version_2.c export/mystdlib.c -I export - -../bin/extract_version_1: export/extract_version_1.c export/mystdlib.c - @$(CC) $(CFLAGS) -O3 -o ../bin/extract_version_1 export/extract_version_1.c export/mystdlib.c -I export - -../bin/split_version_2: export/split_version_2.c - @$(CC) $(CFLAGS) -O3 -o ../bin/split_version_2 export/split_version_2.c - -../bin/map_coords: export/map_coords.c export/mystdlib.c - @$(CC) $(CFLAGS) -O3 -o ../bin/map_coords export/map_coords.c export/mystdlib.c -I export - -../bin/convert_coords: export/convert_coords.c - @$(CC) $(CFLAGS) -O3 -o ../bin/convert_coords export/convert_coords.c -lm - -clean: - @rm -f $(BINARIES) diff --git a/src/makecolumns.sh b/src/makecolumns.sh deleted file mode 100755 index 0f9c5ba..0000000 --- a/src/makecolumns.sh +++ /dev/null @@ -1,286 +0,0 @@ -#!/bin/sh - -export LANG=C -export LC_CTYPE=C -export LC_ALL=C -export PATH=${PATH}:`pwd`/../bin/ - -main() { - [ -f /usr/local/bin/el ] && EL=/usr/local/bin/el - [ -f `dirname $0`/../bin/el ] && EL=`dirname $0`/../bin/el - - if [ -z "${EL}" ]; then - echo "el not found. Get it at 'cvs -d :pserver:anoncvs@cvs.erdgeist.org:/home/cvsroot co el'" - exit 1 - fi - - if [ $# -ne 1 ]; then - echo "Syntax: $0 [phonebookdirectory]" - exit 1 - fi - - # Compile all the binaries - make binaries - - printf "Cleaning up old working directory ... " - rm -rf ../work_`basename "${1#white_}"` - printf "done.\n" - mkdir -p ../work_`basename "${1#white_}"` - cd ../work_`basename "${1#white_}"` || exit 1 - - if [ -f "$1/phonebook.db" ]; then - handle_format_version_3 "${1}" - elif [ -f "${1}/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt]" ]; then - handle_format_version_2 "${1}" - elif [ -n "`find "${1}" -name dpr00000.005 -ls -quit`" ]; then - handle_format_version_1 "${1}" - else - echo "Not a recognized Telefonbuch folder" - fi - cd .. -} - -do_decompress_version_2() { - printf "Extracting $2 chunks ... " - extract_version_2 "${1}" - printf "done.\n" - - printf "Decompressing $2 chunks ... " - numfiles=`find . -name \*.lha | wc -l` - reported=0; processed=0 - for archive in *.lha; do - lha x ${archive} > /dev/null - rm ${archive} - [ 1 -eq $(( ( ( (processed+=1) * 20 ) / numfiles ) > reported )) ] && printf "%d%% " $(( (reported+=1) * 5 )) - done - [ $reported -lt 10 ] && printf "100% " - printf "done.\n" -} - -do_processfile_version_2() { - working_on=`basename ${1}` - mkdir $working_on && cd ${working_on} - do_decompress_version_2 "${1}" "${2}" - cd .. - - printf "Combining $2 into single file ... " - if [ "${4}" = "convert_zeros" ]; then - cat ${working_on}/* | tr '\n\0' '\t\n' > $3 - else - cat ${working_on}/* > $3 - fi - printf "done.\n" - - rm -rf ${working_on} -} - -size() { - stat -f %z `printf %0${filename_len}d $1` -} - -get_dword() { - # $1 file - hexdump -n 4 -v -e '" " 1/4 "%u"' `printf %0${filename_len}d ${1}` -} - -handle_format_version_1() { - echo "Working on $1. Detected pre-02/1996 Telefonbuch version." - # Extract all dpr database files - printf "Extracting dpr databases ... " - find "$1" -name dpr\*.001 | extract_version_1 - printf "done.\n" - - # rename our extracted columns - mv 01_unknown 01_Flags - mv 02_unknown 02_Nachname - mv 03_unknown 03_Vorname - mv 04_unknown 05_Adresszusatz - mv 05_unknown 06_Ortszusatz - mv 06_unknown 10_Zustellamt_PLZOst - mv 07_unknown 07_Strasse - mv 08_unknown 08_Hausnummer - mv 09_unknown 04_Namenszusatz - mv 10_unknown 09_Fax_Verweise - mv 11_unknown 12_Vorwahl - mv 12_unknown 13_Rufnummer - mv 13_unknown 11_Ort - mv 14_unknown 10_Postleitzahl -} - - -handle_format_version_2() { - echo "Working on $1. Detected pre-2004 Telefonbuch version." - # Extract teiln.dat - do_decompress_version_2 $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] "teiln.dat" - - # See how long each filename is - export filename_len=$(( `ls | head -n 1 | wc -c` - 1 )) - - # Get total amount of files, for reporting progress - number_of_files=`find -E . -depth 1 -regex '^\./[0123456789]+' | wc -l` - - # from 2000F on file 0+3*n is table, so make it default - table_file=0; vname_file=2 - - # if supposed vname file is larger than table file, - # we're having a pre-2000F layout, so switch accordingly - if [ `size ${table_file}` -lt `size ${vname_file}` ]; then - table_file=2; nname_file=0; vname_file=1 - else - nname_file=1 - fi - - # Table file has a table header with identical count - # to nname file's header. Verify this - if [ `get_dword ${nname_file}` -ne `get_dword ${table_file}` ]; then - echo "Unknown layout." - exit - fi - - # Now loop over all files and dump them - printf "Splitting decompressed nname chunks into their columns ... " - jot -w %0${filename_len}d - ${nname_file} $(( number_of_files - 1 )) 3 | split_version_2 1 1 -# set -- `hexdump -n 8 -v -e '" " 1/4 "%u"' ${file}` -# tail -c +$(( $2 + 1 )) ${file} -# done | tr '\n\0' '\t\n' > 01_02_Flags_Nachname - cut -c 1 < 01_unknown > 01_Flags - cut -c 2- < 01_unknown > 02_Nachname - rm 01_unknown - printf "done.\n" - - printf "Splitting decompress vname chunks into their columns ... " - jot -w "%0${filename_len}d" - ${vname_file} $(( number_of_files - 1 )) 3 | xargs cat | tr '\n\0' '\t\n' | tr -d '\377' > 03_Vorname - printf "done.\n" - - printf "Splitting decompress table file chunks into their columns ... " - jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3 | split_version_2 4 0 -# for file in `jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3`; do -# # Offset into first table entry tells us how many -# # fields are in table file -# set -- `hexdump -n 64 -v -e '" " 1/4 "%u"' ${file}` -# count=$1; table_entries=$(( $2 / 4 - 1 )); shift -# -# # Now iterate over all entries in the table file -# for idx in `jot ${table_entries}`; do -# tail -c +$(( $1 + 1 )) ${file} | tr '\n\0' '\t\n' | head -n ${count} >> `printf %02d_unknown $(( idx + 3 ))` -# shift -# done -# done - printf "done.\n" - - # wipe all temporary extracted files - printf "Cleaning up decompressed chunks ... " - find -E . -depth 1 -regex '^\./[0123456789]+' -delete - printf "done.\n" - - # rename our columns extracted from the table file - mv 04_unknown 04_Namenszusatz - mv 05_unknown 05_Adresszusatz - mv 06_unknown 06_Ortszusatz - mv 08_unknown 08_Hausnummer - mv 09_unknown 09_Verweise - mv 10_unknown 10_Postleitzahl - mv 11_unknown 11_Ort - mv 12_unknown 12_Vorwahl - mv 13_unknown 13_Rufnummer - [ -f 14_unknown ] && mv 14_unknown 14_Email - [ -f 15_unknown ] && mv 15_unknown 15_Webadresse - - # If street names come in an extra file, extract - # street names first - streets=$1/[Dd][Aa][Tt]/[Ss][Tt][Rr][Aa][Ss][Ss][Ee][Nn].[Dd][Aa][Tt] - [ -f ${streets} ] && do_processfile_version_2 ${streets} "street name" 99_Strassenname convert_zeros - - # extract street names if 07_unknown contains street indexes - # instead of street names - if [ -f 99_Strassenname ]; then - mv 07_unknown 07_Strassenindex - printf "Looking up street names from indexes ... " - cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse - printf "done.\n" - else - mv 07_unknown 07_Strasse - fi - - karto=$1/[Dd][Aa][Tt]/[Kk][Aa][Rr][Tt][Oo].[Dd][Aa][Tt] - if [ -f ${karto} ]; then - do_processfile_version_2 ${karto} "geo coordinates" 90_Geokoordinaten_hnr_raw - - printf "Looking up geo coordinates for each phonebook entry ... " - tr '\0' '\n' < 90_Geokoordinaten_hnr_raw | tr ';' '\t' | cut -f "1,2,3,4,6,7" | tr '\n' '\0' > 90_Geokoordinaten_hnr - rm 90_Geokoordinaten_hnr_raw - lam 10_Postleitzahl -s $'\t' 11_Ort -s $'\t' 07_Strasse -s $'\t' 08_Hausnummer | map_coords 90_Geokoordinaten_hnr | convert_coords > 16_Koordinaten - printf "done.\n" - fi -} - -handle_format_version_3() { - echo "Working on $1. Detected post-2003 Telefonbuch version." - printf "Extracting street names ... " - extract_version_3 $1/streets.tl - - cat file_* | tr '\n\0' '\t\n' > 99_Strassenname - rm file_* - printf "done.\n" - - printf "Extracting phonebook.db ... " - extract_version_3 $1/phonebook.db - - rows=`find . -name file_\* | wc -l` - printf "done.\n" - - printf "Splitting decompressed chunks into their columns (11 total) ... 1, " - jot -w "file_%05X" - 0 $(( rows - 1 )) 11 | xargs cat | xxd -ps -c1 > column_0 - - for col in 1 2 3 4 5 6 7 8 9 10; do - printf "%d, " $(( col + 1 )) - jot -w "file_%05X" - ${col} $(( rows - 1 )) 11 | xargs cat | tr '\n\0' '\t\n' > column_${col} - done - printf "done.\n" - - printf "Cleaning up decompressed chunks ... " - find . -name file_\* -delete - printf "done.\n" - - mv column_0 01_Flags - mv column_1 02_Nachname - mv column_2 03_Vorname - mv column_3 04_05_Namenszusatz_Addresszusatz - mv column_4 09_Verweise - mv column_5 07_08_Strassenindex_Hausnummer - mv column_6 12_Vorwahl - mv column_7 10_Postleitzahl - mv column_8 11_Ort - mv column_9 13_Rufnummer - mv column_10 14_15_Email_Webadresse - - printf "Looking up street names from indexes ... " - cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse - printf "done.\n" - - printf "Splitting house numbers ... " - sed -E $'s:$:\t:' < 07_08_Strassenindex_Hausnummer | cut -f 2 > 08_Hausnummer - printf "done.\n" - - if [ -f $1/zip-streets-hn-geo.tl ]; then - printf "Extracting geo coordinates (precision: house number) ... " - extract_version_3 $1/zip-streets-hn-geo.tl - cat file_* > 90_Geokoordinaten_hnr - printf "done.\n" - printf "Looking up geo coordinates for each phonebook entry ... " - lam 10_Postleitzahl -s $'\t' 07_Strasse -s $'\t' 08_Hausnummer | map_coords 90_Geokoordinaten_hnr | convert_coords > 16_Koordinaten - printf "done.\n" - elif [ -f $1/zip-streets-geo.tl ]; then - printf "Extracting geo coordinates (precision: street) ... " - extract_version_3 $1/zip-streets-geo.tl - cat file_* > 91_Geokoordinaten_str - printf "done.\n" - printf "Looking up geo coordinates for each phonebook entry ... " - lam 10_Postleitzahl -s $'\t' 07_Strasse | map_coords 91_Geokoordinaten_str | convert_coords > 16_Koordinaten - printf "done.\n" - fi - rm file_* -} - -# After function definitions, main() can use them -main "$@" -- cgit v1.2.3