From 7f66b38b3dd8602b1532d0e6cda645f771474f94 Mon Sep 17 00:00:00 2001
From: Dirk Engling <erdgeist@erdgeist.org>
Date: Sat, 18 Jan 2014 03:58:54 +0100
Subject: remove hexout tool, use xxd. Also automatize export of old issues of
 Das Telefonbuch in the shell script

---
 src/Makefile       |   6 +-
 src/makecolumns.sh | 219 ++++++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 170 insertions(+), 55 deletions(-)

diff --git a/src/Makefile b/src/Makefile
index 8de6d8b..8d1a66d 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,7 +1,7 @@
-all: decompress hexout
+all: decompress extractblocks
 
 decompress: decompress.c mystdlib.c
 	gcc -O2 -o ../bin/decompress decompress.c mystdlib.c -lz
 
-hexout: hexout.c
-	gcc -O2 -o ../bin/hexout hexout.c
+extractblocks: extractblocks_new.c mystdlib.c
+	gcc -o ../bin/extractblocks extractblocks_new.c mystdlib.c
diff --git a/src/makecolumns.sh b/src/makecolumns.sh
index cd26091..c4b5d24 100644
--- a/src/makecolumns.sh
+++ b/src/makecolumns.sh
@@ -4,64 +4,179 @@ export LANG=C
 export LC_CTYPE=C
 export LC_ALL=C
 
-[ -f /usr/local/bin/el ] && EL=/usr/local/bin/el
-[ -f `dirname $0`/../bin/el ] && EL=`dirname $0`/../bin/el
+main() {
+    [ -f /usr/local/bin/el ] && EL=/usr/local/bin/el
+    [ -f `dirname $0`/../bin/el ] && EL=`dirname $0`/../bin/el
 
-if [ -z "${EL}" ]; then
-  echo "el not found. Get it at 'cvs -d :pserver:anoncvs@cvs.erdgeist.org:/home/cvsroot co el'"
-  exit 1
-fi
-
-HO=`dirname $0`/../bin/hexout
-
-if [ $# -ne 1 ]; then
-  echo "Syntax: $0 [phonebookdirectory]"
-  exit 1
-fi
-
-make -f Makefile all
+    if [ -z "${EL}" ]; then
+      echo "el not found. Get it at 'cvs -d :pserver:anoncvs@cvs.erdgeist.org:/home/cvsroot co el'"
+      exit 1
+    fi
 
-mkdir ../work
-cd ../work
+    if [ $# -ne 1 ]; then
+      echo "Syntax: $0 [phonebookdirectory]"
+      exit 1
+    fi
 
-../bin/decompress $1/streets.tl
+    # Compile all the binaries
+    make all
 
-cat file_* > streets
-rm file_*
+    mkdir -p ../work_`basename $1`
+    cd ../work_`basename $1` || exit 1
 
-../bin/decompress $1/phonebook.db
+    if [ -f "$1/phonebook.db" ]; then
+        handle_new_format $1
+    elif [ -f "$1/DAT/TEILN.DAT" ]; then
+        handle_old_format "$1/DAT/TEILN.DAT"
+    elif [ -f "$1/dat/teiln.dat" ]; then
+        handle_old_format "$1/dat/teiln.dat"
+    else
+        echo "Not a recognized Telefonbuch folder"
+    fi
+}
+
+get_dword() {
+  # $1 file, $2 offset
+  set -- `od -tu4 -N4 -j$(( 4*${2:-0} )) ${1}`
+  printf "%d\n" $2
+}
+
+handle_old_format() {
+    # Clear old files. Be very careful, we could
+    # have ended up in an unexpected directory, after all.
+    find -depth 1 -regex ^[0-9]+$ -delete
+    rm ??_*
+
+    # First the simple stuff, extract teiln.dat
+    ../bin/extractblocks $1
+
+    # This should leave us with a bunch of .lha files
+    for archive in *.lha; do lha x ${archive}; done
+    find . -name \*.lha -delete
+
+    # See how long each filename is
+    filename_len=$(( `ls | head -n 1 | wc -c` - 1 ))
+
+    # from 2000F on file 0+3*n is table, so make it default
+    table_file=`printf %0${filename_len}d 0`
+    vname_file=`printf %0${filename_len}d 2`
+
+    # if supposed vname file is larger than table file,
+    # we're having a pre-2000F layout, so switch accordingly
+    if [ `stat -f %z ${table_file}` -lt `stat -f %z ${vname_file}` ]; then
+        table_file=`printf %0${filename_len}d 2`
+        nname_file=`printf %0${filename_len}d 0`
+        vname_file=`printf %0${filename_len}d 1`
+    else
+        nname_file=`printf %0${filename_len}d 1`
+    fi
 
-numfiles=`find . -name file_\* | wc -l`
+    # Table file has a table header with identical count
+    # to nname file's header. Verify this
+    if [ `get_dword ${nname_file}` -ne `get_dword ${table_file}` ]; then
+        echo "Unknown layout."
+        exit
+    fi
 
-for column in `jot - 0 10 1`; do
-  for file in `jot - ${column} ${numfiles} 11`; do
-    acton=`printf file_%05X ${file}`
-    if [ ${column} = 0 ]; then
-      ${HO} < ${acton} >> column_0;
+    # Now loop over all files and dump them
+    while [ -f ${nname_file} ]; do
+        # Get number of entries in this round
+        count=`get_dword ${nname_file}`
+
+        # Get offset into first nname
+        nname_off=$(( `get_dword ${nname_file} 1` + 1 ))
+
+        # Now get the flags before the nnames
+        tail -c +${nname_off} ${nname_file} | tr '\n\0' '\t\n' | head -n ${count} | cut -c -1 >> 01_Flags
+        tail -c +${nname_off} ${nname_file} | tr '\n\0' '\t\n' | head -n ${count} | cut -c 2- >> 02_Nachname
+
+        # Extract the vnames
+        tr '\n\0' '\t\n' < ${vname_file} | head -n ${count} >> 03_Vorname
+
+        # Offset into first table entry tells us how many
+        # fields are in table file
+        table_entries=$(( `get_dword ${table_file} 1` / 4 - 1 ))
+
+        # Now iterate over all entries in the table file
+        for table_index in `jot ${table_entries}`; do
+            table_off=`get_dword ${table_file} ${table_index}`
+            tail -c +$(( table_off + 1 )) ${table_file} | tr '\n\0' '\t\n' | head -n ${count} >> `printf %02d_unknown $(( table_index + 3 ))`
+        done
+
+        # Advance the filenames. Note, that we need bc because
+        # builtin arithmetic treats numbers with leading zeros as octals
+        nname_file=`printf "%s + 3\n" ${nname_file} | bc`
+        nname_file=`printf %0${filename_len}d ${nname_file}`
+        vname_file=`printf "%s + 3\n" ${vname_file} | bc`
+        vname_file=`printf %0${filename_len}d ${vname_file}`
+        table_file=`printf "%s + 3\n" ${table_file} | bc`
+        table_file=`printf %0${filename_len}d ${table_file}`
+
+    done
+
+    # wipe all temporary extracted files
+    find -depth 1 -regex ^[0-9]+$ -delete
+
+    # rename our columns extracted from the table file
+    mv 04_unknown 04_Namenszusatz
+    mv 05_unknown 05_Adresszusatz
+    mv 06_unknown 06_Ortszusatz
+    mv 08_unknown 08_Hausnummer
+    mv 09_unknown 09_Detail
+    mv 10_unknown 10_Postleitzahl
+    mv 11_unknown 11_Ort
+    mv 12_unknown 12_Vorwahl
+    mv 13_unknown 13_Rufnummer
+    [ -f 14_unknown ] && mv 14_unknown 14_Email
+    [ -f 15_unknown ] && mv 15_unknown 15_Webadresse
+
+    # TODO: extract street names if 07_unknown contains street indexes
+    # instead of street names
+}
+
+handle_new_format() {
+    ../bin/decompress $1/streets.tl
+
+    cat file_* > streets
+    rm file_*
+
+    ../bin/decompress $1/phonebook.db
+
+    numfiles=`find . -name file_\* | wc -l`
+
+    for column in `jot - 0 10 1`; do
+      for file in `jot - ${column} ${numfiles} 11`; do
+        acton=`printf file_%05X ${file}`
+        if [ ${column} = 0 ]; then
+          xxd -ps -c1 ${acton} >> column_0;
+        else
+          tr '\n\0' '\t\n' < ${acton} >> column_${column};
+        fi
+     done;
+    done
+
+    mv column_0 01_Flags
+    mv column_1 02_Nachname
+    mv column_2 03_Vorname
+    mv column_3 04_05_Namenszusatz_Addresszusatz
+    mv column_4 09_Detail
+    mv column_5 07_08_Strassenindex_Hausnummer
+    mv column_6 12_Vorwahl
+    mv column_7 10_Postleitzahl
+    mv column_8 11_Ort
+    mv column_9 13_Rufnummer
+    mv column_10 14_15_Email_Webadresse
+
+    [ -e streets ] && tr '\n\0' '\t\n' < streets > 99_Strassenname && rm streets
+
+    if [ -e 99_Strassenname ]; then
+      cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse
     else
-      tr '\0' '\n' < ${acton} >> column_${column};
+      echo Could not convert streetindexes
     fi
- done;
-done
-
-mv column_0 01_Flags
-mv column_1 02_Nachname
-mv column_2 03_Vorname
-mv column_3 04_05_Namenszusatz_Addresszusatz
-mv column_4 09_Detail
-mv column_5 07_08_Strassenindex_Hausnummer
-mv column_6 12_Vorwahl
-mv column_7 10_Postleitzahl
-mv column_8 11_Ort
-mv column_9 13_Rufnummer
-mv column_10 14_15_Email_Webadresse
-
-[ -e streets ] && tr '\0' '\n' < streets > 99_Strassenname && rm streets
-
-if [ -e 99_Strassenname ]; then
-  cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse
-else
-  echo Could not convert streetindexes
-fi
-
-find . -name file_\* -delete
+
+    find . -name file_\* -delete
+}
+
+# After function definitions, main() can use them
+main "$@"
-- 
cgit v1.3