From 7f66b38b3dd8602b1532d0e6cda645f771474f94 Mon Sep 17 00:00:00 2001 From: Dirk Engling Date: Sat, 18 Jan 2014 03:58:54 +0100 Subject: remove hexout tool, use xxd. Also automatize export of old issues of Das Telefonbuch in the shell script --- src/Makefile | 6 +- src/makecolumns.sh | 219 ++++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 170 insertions(+), 55 deletions(-) diff --git a/src/Makefile b/src/Makefile index 8de6d8b..8d1a66d 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,7 +1,7 @@ -all: decompress hexout +all: decompress extractblocks decompress: decompress.c mystdlib.c gcc -O2 -o ../bin/decompress decompress.c mystdlib.c -lz -hexout: hexout.c - gcc -O2 -o ../bin/hexout hexout.c +extractblocks: extractblocks_new.c mystdlib.c + gcc -o ../bin/extractblocks extractblocks_new.c mystdlib.c diff --git a/src/makecolumns.sh b/src/makecolumns.sh index cd26091..c4b5d24 100644 --- a/src/makecolumns.sh +++ b/src/makecolumns.sh @@ -4,64 +4,179 @@ export LANG=C export LC_CTYPE=C export LC_ALL=C -[ -f /usr/local/bin/el ] && EL=/usr/local/bin/el -[ -f `dirname $0`/../bin/el ] && EL=`dirname $0`/../bin/el +main() { + [ -f /usr/local/bin/el ] && EL=/usr/local/bin/el + [ -f `dirname $0`/../bin/el ] && EL=`dirname $0`/../bin/el -if [ -z "${EL}" ]; then - echo "el not found. Get it at 'cvs -d :pserver:anoncvs@cvs.erdgeist.org:/home/cvsroot co el'" - exit 1 -fi - -HO=`dirname $0`/../bin/hexout - -if [ $# -ne 1 ]; then - echo "Syntax: $0 [phonebookdirectory]" - exit 1 -fi - -make -f Makefile all + if [ -z "${EL}" ]; then + echo "el not found. Get it at 'cvs -d :pserver:anoncvs@cvs.erdgeist.org:/home/cvsroot co el'" + exit 1 + fi -mkdir ../work -cd ../work + if [ $# -ne 1 ]; then + echo "Syntax: $0 [phonebookdirectory]" + exit 1 + fi -../bin/decompress $1/streets.tl + # Compile all the binaries + make all -cat file_* > streets -rm file_* + mkdir -p ../work_`basename $1` + cd ../work_`basename $1` || exit 1 -../bin/decompress $1/phonebook.db + if [ -f "$1/phonebook.db" ]; then + handle_new_format $1 + elif [ -f "$1/DAT/TEILN.DAT" ]; then + handle_old_format "$1/DAT/TEILN.DAT" + elif [ -f "$1/dat/teiln.dat" ]; then + handle_old_format "$1/dat/teiln.dat" + else + echo "Not a recognized Telefonbuch folder" + fi +} + +get_dword() { + # $1 file, $2 offset + set -- `od -tu4 -N4 -j$(( 4*${2:-0} )) ${1}` + printf "%d\n" $2 +} + +handle_old_format() { + # Clear old files. Be very careful, we could + # have ended up in an unexpected directory, after all. + find -depth 1 -regex ^[0-9]+$ -delete + rm ??_* + + # First the simple stuff, extract teiln.dat + ../bin/extractblocks $1 + + # This should leave us with a bunch of .lha files + for archive in *.lha; do lha x ${archive}; done + find . -name \*.lha -delete + + # See how long each filename is + filename_len=$(( `ls | head -n 1 | wc -c` - 1 )) + + # from 2000F on file 0+3*n is table, so make it default + table_file=`printf %0${filename_len}d 0` + vname_file=`printf %0${filename_len}d 2` + + # if supposed vname file is larger than table file, + # we're having a pre-2000F layout, so switch accordingly + if [ `stat -f %z ${table_file}` -lt `stat -f %z ${vname_file}` ]; then + table_file=`printf %0${filename_len}d 2` + nname_file=`printf %0${filename_len}d 0` + vname_file=`printf %0${filename_len}d 1` + else + nname_file=`printf %0${filename_len}d 1` + fi -numfiles=`find . -name file_\* | wc -l` + # Table file has a table header with identical count + # to nname file's header. Verify this + if [ `get_dword ${nname_file}` -ne `get_dword ${table_file}` ]; then + echo "Unknown layout." + exit + fi -for column in `jot - 0 10 1`; do - for file in `jot - ${column} ${numfiles} 11`; do - acton=`printf file_%05X ${file}` - if [ ${column} = 0 ]; then - ${HO} < ${acton} >> column_0; + # Now loop over all files and dump them + while [ -f ${nname_file} ]; do + # Get number of entries in this round + count=`get_dword ${nname_file}` + + # Get offset into first nname + nname_off=$(( `get_dword ${nname_file} 1` + 1 )) + + # Now get the flags before the nnames + tail -c +${nname_off} ${nname_file} | tr '\n\0' '\t\n' | head -n ${count} | cut -c -1 >> 01_Flags + tail -c +${nname_off} ${nname_file} | tr '\n\0' '\t\n' | head -n ${count} | cut -c 2- >> 02_Nachname + + # Extract the vnames + tr '\n\0' '\t\n' < ${vname_file} | head -n ${count} >> 03_Vorname + + # Offset into first table entry tells us how many + # fields are in table file + table_entries=$(( `get_dword ${table_file} 1` / 4 - 1 )) + + # Now iterate over all entries in the table file + for table_index in `jot ${table_entries}`; do + table_off=`get_dword ${table_file} ${table_index}` + tail -c +$(( table_off + 1 )) ${table_file} | tr '\n\0' '\t\n' | head -n ${count} >> `printf %02d_unknown $(( table_index + 3 ))` + done + + # Advance the filenames. Note, that we need bc because + # builtin arithmetic treats numbers with leading zeros as octals + nname_file=`printf "%s + 3\n" ${nname_file} | bc` + nname_file=`printf %0${filename_len}d ${nname_file}` + vname_file=`printf "%s + 3\n" ${vname_file} | bc` + vname_file=`printf %0${filename_len}d ${vname_file}` + table_file=`printf "%s + 3\n" ${table_file} | bc` + table_file=`printf %0${filename_len}d ${table_file}` + + done + + # wipe all temporary extracted files + find -depth 1 -regex ^[0-9]+$ -delete + + # rename our columns extracted from the table file + mv 04_unknown 04_Namenszusatz + mv 05_unknown 05_Adresszusatz + mv 06_unknown 06_Ortszusatz + mv 08_unknown 08_Hausnummer + mv 09_unknown 09_Detail + mv 10_unknown 10_Postleitzahl + mv 11_unknown 11_Ort + mv 12_unknown 12_Vorwahl + mv 13_unknown 13_Rufnummer + [ -f 14_unknown ] && mv 14_unknown 14_Email + [ -f 15_unknown ] && mv 15_unknown 15_Webadresse + + # TODO: extract street names if 07_unknown contains street indexes + # instead of street names +} + +handle_new_format() { + ../bin/decompress $1/streets.tl + + cat file_* > streets + rm file_* + + ../bin/decompress $1/phonebook.db + + numfiles=`find . -name file_\* | wc -l` + + for column in `jot - 0 10 1`; do + for file in `jot - ${column} ${numfiles} 11`; do + acton=`printf file_%05X ${file}` + if [ ${column} = 0 ]; then + xxd -ps -c1 ${acton} >> column_0; + else + tr '\n\0' '\t\n' < ${acton} >> column_${column}; + fi + done; + done + + mv column_0 01_Flags + mv column_1 02_Nachname + mv column_2 03_Vorname + mv column_3 04_05_Namenszusatz_Addresszusatz + mv column_4 09_Detail + mv column_5 07_08_Strassenindex_Hausnummer + mv column_6 12_Vorwahl + mv column_7 10_Postleitzahl + mv column_8 11_Ort + mv column_9 13_Rufnummer + mv column_10 14_15_Email_Webadresse + + [ -e streets ] && tr '\n\0' '\t\n' < streets > 99_Strassenname && rm streets + + if [ -e 99_Strassenname ]; then + cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse else - tr '\0' '\n' < ${acton} >> column_${column}; + echo Could not convert streetindexes fi - done; -done - -mv column_0 01_Flags -mv column_1 02_Nachname -mv column_2 03_Vorname -mv column_3 04_05_Namenszusatz_Addresszusatz -mv column_4 09_Detail -mv column_5 07_08_Strassenindex_Hausnummer -mv column_6 12_Vorwahl -mv column_7 10_Postleitzahl -mv column_8 11_Ort -mv column_9 13_Rufnummer -mv column_10 14_15_Email_Webadresse - -[ -e streets ] && tr '\0' '\n' < streets > 99_Strassenname && rm streets - -if [ -e 99_Strassenname ]; then - cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse -else - echo Could not convert streetindexes -fi - -find . -name file_\* -delete + + find . -name file_\* -delete +} + +# After function definitions, main() can use them +main "$@" -- cgit v1.2.3