2 files changed, 170 insertions, 55 deletions
diff --git a/src/Makefile b/src/Makefile
index 8de6d8b..8d1a66d 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,7 +1,7 @@
-all: decompress hexout
+all: decompress extractblocks
 decompress: decompress.c mystdlib.c
        gcc -O2 -o ../bin/decompress decompress.c mystdlib.c -lz
-hexout: hexout.c
+extractblocks: extractblocks_new.c mystdlib.c
-        gcc -O2 -o ../bin/hexout hexout.c
+        gcc -o ../bin/extractblocks extractblocks_new.c mystdlib.c
diff --git a/src/makecolumns.sh b/src/makecolumns.sh
index cd26091..c4b5d24 100644
--- a/src/makecolumns.sh
+++ b/src/makecolumns.sh
@@ -4,64 +4,179 @@ export LANG=C
 export LC_CTYPE=C
 export LC_ALL=C
-[ -f /usr/local/bin/el ] && EL=/usr/local/bin/el
+main() {
-[ -f `dirname $0`/../bin/el ] && EL=`dirname $0`/../bin/el
+    [ -f /usr/local/bin/el ] && EL=/usr/local/bin/el
+    [ -f `dirname $0`/../bin/el ] && EL=`dirname $0`/../bin/el
-if [ -z "${EL}" ]; then
+    if [ -z "${EL}" ]; then
-  echo "el not found. Get it at 'cvs -d :pserver:anoncvs@cvs.erdgeist.org:/home/cvsroot co el'"
+      echo "el not found. Get it at 'cvs -d :pserver:anoncvs@cvs.erdgeist.org:/home/cvsroot co el'"
-  exit 1
+      exit 1
-fi
+    fi
-HO=`dirname $0`/../bin/hexout
-if [ $# -ne 1 ]; then
-  echo "Syntax: $0 [phonebookdirectory]"
-  exit 1
-fi
-make -f Makefile all
-mkdir ../work
+    if [ $# -ne 1 ]; then
-cd ../work
+      echo "Syntax: $0 [phonebookdirectory]"
+      exit 1
+    fi
-../bin/decompress $1/streets.tl
+    # Compile all the binaries
+    make all
-cat file_* > streets
+    mkdir -p ../work_`basename $1`
-rm file_*
+    cd ../work_`basename $1` || exit 1
-../bin/decompress $1/phonebook.db
+    if [ -f "$1/phonebook.db" ]; then
+        handle_new_format $1
+    elif [ -f "$1/DAT/TEILN.DAT" ]; then
+        handle_old_format "$1/DAT/TEILN.DAT"
+    elif [ -f "$1/dat/teiln.dat" ]; then
+        handle_old_format "$1/dat/teiln.dat"
+    else
+        echo "Not a recognized Telefonbuch folder"
+    fi
+}
+get_dword() {
+  # $1 file, $2 offset
+  set -- `od -tu4 -N4 -j$(( 4*${2:-0} )) ${1}`
+  printf "%d\n" $2
+}
+handle_old_format() {
+    # Clear old files. Be very careful, we could
+    # have ended up in an unexpected directory, after all.
+    find -depth 1 -regex ^[0-9]+$ -delete
+    rm ??_*
+    # First the simple stuff, extract teiln.dat
+    ../bin/extractblocks $1
+    # This should leave us with a bunch of .lha files
+    for archive in *.lha; do lha x ${archive}; done
+    find . -name \*.lha -delete
+    # See how long each filename is
+    filename_len=$(( `ls | head -n 1 | wc -c` - 1 ))
+    # from 2000F on file 0+3*n is table, so make it default
+    table_file=`printf %0${filename_len}d 0`
+    vname_file=`printf %0${filename_len}d 2`
+    # if supposed vname file is larger than table file,
+    # we're having a pre-2000F layout, so switch accordingly
+    if [ `stat -f %z ${table_file}` -lt `stat -f %z ${vname_file}` ]; then
+        table_file=`printf %0${filename_len}d 2`
+        nname_file=`printf %0${filename_len}d 0`
+        vname_file=`printf %0${filename_len}d 1`
+    else
+        nname_file=`printf %0${filename_len}d 1`
+    fi
-numfiles=`find . -name file_\* | wc -l`
+    # Table file has a table header with identical count
+    # to nname file's header. Verify this
+    if [ `get_dword ${nname_file}` -ne `get_dword ${table_file}` ]; then
+        echo "Unknown layout."
+        exit
+    fi
-for column in `jot - 0 10 1`; do
+    # Now loop over all files and dump them
-  for file in `jot - ${column} ${numfiles} 11`; do
+    while [ -f ${nname_file} ]; do
-    acton=`printf file_%05X ${file}`
+        # Get number of entries in this round
-    if [ ${column} = 0 ]; then
+        count=`get_dword ${nname_file}`
-      ${HO} < ${acton} >> column_0;
+        # Get offset into first nname
+        nname_off=$(( `get_dword ${nname_file} 1` + 1 ))
+        # Now get the flags before the nnames
+        tail -c +${nname_off} ${nname_file} | tr '\n\0' '\t\n' | head -n ${count} | cut -c -1 >> 01_Flags
+        tail -c +${nname_off} ${nname_file} | tr '\n\0' '\t\n' | head -n ${count} | cut -c 2- >> 02_Nachname
+        # Extract the vnames
+        tr '\n\0' '\t\n' < ${vname_file} | head -n ${count} >> 03_Vorname
+        # Offset into first table entry tells us how many
+        # fields are in table file
+        table_entries=$(( `get_dword ${table_file} 1` / 4 - 1 ))
+        # Now iterate over all entries in the table file
+        for table_index in `jot ${table_entries}`; do
+            table_off=`get_dword ${table_file} ${table_index}`
+            tail -c +$(( table_off + 1 )) ${table_file} | tr '\n\0' '\t\n' | head -n ${count} >> `printf %02d_unknown $(( table_index + 3 ))`
+        done
+        # Advance the filenames. Note, that we need bc because
+        # builtin arithmetic treats numbers with leading zeros as octals
+        nname_file=`printf "%s + 3\n" ${nname_file} | bc`
+        nname_file=`printf %0${filename_len}d ${nname_file}`
+        vname_file=`printf "%s + 3\n" ${vname_file} | bc`
+        vname_file=`printf %0${filename_len}d ${vname_file}`
+        table_file=`printf "%s + 3\n" ${table_file} | bc`
+        table_file=`printf %0${filename_len}d ${table_file}`
+    done
+    # wipe all temporary extracted files
+    find -depth 1 -regex ^[0-9]+$ -delete
+    # rename our columns extracted from the table file
+    mv 04_unknown 04_Namenszusatz
+    mv 05_unknown 05_Adresszusatz
+    mv 06_unknown 06_Ortszusatz
+    mv 08_unknown 08_Hausnummer
+    mv 09_unknown 09_Detail
+    mv 10_unknown 10_Postleitzahl
+    mv 11_unknown 11_Ort
+    mv 12_unknown 12_Vorwahl
+    mv 13_unknown 13_Rufnummer
+    [ -f 14_unknown ] && mv 14_unknown 14_Email
+    [ -f 15_unknown ] && mv 15_unknown 15_Webadresse
+    # TODO: extract street names if 07_unknown contains street indexes
+    # instead of street names
+}
+handle_new_format() {
+    ../bin/decompress $1/streets.tl
+    cat file_* > streets
+    rm file_*
+    ../bin/decompress $1/phonebook.db
+    numfiles=`find . -name file_\* | wc -l`
+    for column in `jot - 0 10 1`; do
+      for file in `jot - ${column} ${numfiles} 11`; do
+        acton=`printf file_%05X ${file}`
+        if [ ${column} = 0 ]; then
+          xxd -ps -c1 ${acton} >> column_0;
+        else
+          tr '\n\0' '\t\n' < ${acton} >> column_${column};
+        fi
+     done;
+    done
+    mv column_0 01_Flags
+    mv column_1 02_Nachname
+    mv column_2 03_Vorname
+    mv column_3 04_05_Namenszusatz_Addresszusatz
+    mv column_4 09_Detail
+    mv column_5 07_08_Strassenindex_Hausnummer
+    mv column_6 12_Vorwahl
+    mv column_7 10_Postleitzahl
+    mv column_8 11_Ort
+    mv column_9 13_Rufnummer
+    mv column_10 14_15_Email_Webadresse
+    [ -e streets ] && tr '\n\0' '\t\n' < streets > 99_Strassenname && rm streets
+    if [ -e 99_Strassenname ]; then
+      cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse
    else
-      tr '\0' '\n' < ${acton} >> column_${column};
+      echo Could not convert streetindexes
    fi
- done;
-done
+    find . -name file_\* -delete
+}
-mv column_0 01_Flags
-mv column_1 02_Nachname
+# After function definitions, main() can use them
-mv column_2 03_Vorname
+main "$@"
-mv column_3 04_05_Namenszusatz_Addresszusatz
-mv column_4 09_Detail
-mv column_5 07_08_Strassenindex_Hausnummer
-mv column_6 12_Vorwahl
-mv column_7 10_Postleitzahl
-mv column_8 11_Ort
-mv column_9 13_Rufnummer
-mv column_10 14_15_Email_Webadresse
-[ -e streets ] && tr '\0' '\n' < streets > 99_Strassenname && rm streets
-if [ -e 99_Strassenname ]; then
-  cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse
-else
-  echo Could not convert streetindexes
-fi
-find . -name file_\* -delete

diff --git a/src/Makefile b/src/Makefile index 8de6d8b..8d1a66d 100644 --- a/src/Makefile +++ b/src/Makefile
@@ -1,7 +1,7 @@
1	all: decompress hexout	1	all: decompress extractblocks
2		2
3	decompress: decompress.c mystdlib.c	3	decompress: decompress.c mystdlib.c
4	gcc -O2 -o ../bin/decompress decompress.c mystdlib.c -lz	4	gcc -O2 -o ../bin/decompress decompress.c mystdlib.c -lz
5		5
6	hexout: hexout.c	6	extractblocks: extractblocks_new.c mystdlib.c
7	gcc -O2 -o ../bin/hexout hexout.c	7	gcc -o ../bin/extractblocks extractblocks_new.c mystdlib.c


diff --git a/src/makecolumns.sh b/src/makecolumns.sh index cd26091..c4b5d24 100644 --- a/src/makecolumns.sh +++ b/src/makecolumns.sh
@@ -4,64 +4,179 @@ export LANG=C
4	export LC_CTYPE=C	4	export LC_CTYPE=C
5	export LC_ALL=C	5	export LC_ALL=C
6		6
7	[ -f /usr/local/bin/el ] && EL=/usr/local/bin/el	7	main() {
8	[ -f `dirname $0`/../bin/el ] && EL=`dirname $0`/../bin/el	8	[ -f /usr/local/bin/el ] && EL=/usr/local/bin/el
		9	[ -f `dirname $0`/../bin/el ] && EL=`dirname $0`/../bin/el
9		10
10	if [ -z "${EL}" ]; then	11	if [ -z "${EL}" ]; then
11	echo "el not found. Get it at 'cvs -d :pserver:anoncvs@cvs.erdgeist.org:/home/cvsroot co el'"	12	echo "el not found. Get it at 'cvs -d :pserver:anoncvs@cvs.erdgeist.org:/home/cvsroot co el'"
12	exit 1	13	exit 1
13	fi	14	fi
14
15	HO=`dirname $0`/../bin/hexout
16
17	if [ $# -ne 1 ]; then
18	echo "Syntax: $0 [phonebookdirectory]"
19	exit 1
20	fi
21
22	make -f Makefile all
23		15
24	mkdir ../work	16	if [ $# -ne 1 ]; then
25	cd ../work	17	echo "Syntax: $0 [phonebookdirectory]"
		18	exit 1
		19	fi
26		20
27	../bin/decompress $1/streets.tl	21	# Compile all the binaries
		22	make all
28		23
29	cat file_* > streets	24	mkdir -p ../work_`basename $1`
30	rm file_*	25	cd ../work_`basename $1` \|\| exit 1
31		26
32	../bin/decompress $1/phonebook.db	27	if [ -f "$1/phonebook.db" ]; then
		28	handle_new_format $1
		29	elif [ -f "$1/DAT/TEILN.DAT" ]; then
		30	handle_old_format "$1/DAT/TEILN.DAT"
		31	elif [ -f "$1/dat/teiln.dat" ]; then
		32	handle_old_format "$1/dat/teiln.dat"
		33	else
		34	echo "Not a recognized Telefonbuch folder"
		35	fi
		36	}
		37
		38	get_dword() {
		39	# $1 file, $2 offset
		40	set -- `od -tu4 -N4 -j$(( 4*${2:-0} )) ${1}`
		41	printf "%d\n" $2
		42	}
		43
		44	handle_old_format() {
		45	# Clear old files. Be very careful, we could
		46	# have ended up in an unexpected directory, after all.
		47	find -depth 1 -regex ^[0-9]+$ -delete
		48	rm ??_*
		49
		50	# First the simple stuff, extract teiln.dat
		51	../bin/extractblocks $1
		52
		53	# This should leave us with a bunch of .lha files
		54	for archive in *.lha; do lha x ${archive}; done
		55	find . -name \*.lha -delete
		56
		57	# See how long each filename is
		58	filename_len=$(( `ls \| head -n 1 \| wc -c` - 1 ))
		59
		60	# from 2000F on file 0+3*n is table, so make it default
		61	table_file=`printf %0${filename_len}d 0`
		62	vname_file=`printf %0${filename_len}d 2`
		63
		64	# if supposed vname file is larger than table file,
		65	# we're having a pre-2000F layout, so switch accordingly
		66	if [ `stat -f %z ${table_file}` -lt `stat -f %z ${vname_file}` ]; then
		67	table_file=`printf %0${filename_len}d 2`
		68	nname_file=`printf %0${filename_len}d 0`
		69	vname_file=`printf %0${filename_len}d 1`
		70	else
		71	nname_file=`printf %0${filename_len}d 1`
		72	fi
33		73
34	numfiles=`find . -name file_\* \| wc -l`	74	# Table file has a table header with identical count
		75	# to nname file's header. Verify this
		76	if [ `get_dword ${nname_file}` -ne `get_dword ${table_file}` ]; then
		77	echo "Unknown layout."
		78	exit
		79	fi
35		80
36	for column in `jot - 0 10 1`; do	81	# Now loop over all files and dump them
37	for file in `jot - ${column} ${numfiles} 11`; do	82	while [ -f ${nname_file} ]; do
38	acton=`printf file_%05X ${file}`	83	# Get number of entries in this round
39	if [ ${column} = 0 ]; then	84	count=`get_dword ${nname_file}`
40	${HO} < ${acton} >> column_0;	85
		86	# Get offset into first nname
		87	nname_off=$(( `get_dword ${nname_file} 1` + 1 ))
		88
		89	# Now get the flags before the nnames
		90	tail -c +${nname_off} ${nname_file} \| tr '\n\0' '\t\n' \| head -n ${count} \| cut -c -1 >> 01_Flags
		91	tail -c +${nname_off} ${nname_file} \| tr '\n\0' '\t\n' \| head -n ${count} \| cut -c 2- >> 02_Nachname
		92
		93	# Extract the vnames
		94	tr '\n\0' '\t\n' < ${vname_file} \| head -n ${count} >> 03_Vorname
		95
		96	# Offset into first table entry tells us how many
		97	# fields are in table file
		98	table_entries=$(( `get_dword ${table_file} 1` / 4 - 1 ))
		99
		100	# Now iterate over all entries in the table file
		101	for table_index in `jot ${table_entries}`; do
		102	table_off=`get_dword ${table_file} ${table_index}`
		103	tail -c +$(( table_off + 1 )) ${table_file} \| tr '\n\0' '\t\n' \| head -n ${count} >> `printf %02d_unknown $(( table_index + 3 ))`
		104	done
		105
		106	# Advance the filenames. Note, that we need bc because
		107	# builtin arithmetic treats numbers with leading zeros as octals
		108	nname_file=`printf "%s + 3\n" ${nname_file} \| bc`
		109	nname_file=`printf %0${filename_len}d ${nname_file}`
		110	vname_file=`printf "%s + 3\n" ${vname_file} \| bc`
		111	vname_file=`printf %0${filename_len}d ${vname_file}`
		112	table_file=`printf "%s + 3\n" ${table_file} \| bc`
		113	table_file=`printf %0${filename_len}d ${table_file}`
		114
		115	done
		116
		117	# wipe all temporary extracted files
		118	find -depth 1 -regex ^[0-9]+$ -delete
		119
		120	# rename our columns extracted from the table file
		121	mv 04_unknown 04_Namenszusatz
		122	mv 05_unknown 05_Adresszusatz
		123	mv 06_unknown 06_Ortszusatz
		124	mv 08_unknown 08_Hausnummer
		125	mv 09_unknown 09_Detail
		126	mv 10_unknown 10_Postleitzahl
		127	mv 11_unknown 11_Ort
		128	mv 12_unknown 12_Vorwahl
		129	mv 13_unknown 13_Rufnummer
		130	[ -f 14_unknown ] && mv 14_unknown 14_Email
		131	[ -f 15_unknown ] && mv 15_unknown 15_Webadresse
		132
		133	# TODO: extract street names if 07_unknown contains street indexes
		134	# instead of street names
		135	}
		136
		137	handle_new_format() {
		138	../bin/decompress $1/streets.tl
		139
		140	cat file_* > streets
		141	rm file_*
		142
		143	../bin/decompress $1/phonebook.db
		144
		145	numfiles=`find . -name file_\* \| wc -l`
		146
		147	for column in `jot - 0 10 1`; do
		148	for file in `jot - ${column} ${numfiles} 11`; do
		149	acton=`printf file_%05X ${file}`
		150	if [ ${column} = 0 ]; then
		151	xxd -ps -c1 ${acton} >> column_0;
		152	else
		153	tr '\n\0' '\t\n' < ${acton} >> column_${column};
		154	fi
		155	done;
		156	done
		157
		158	mv column_0 01_Flags
		159	mv column_1 02_Nachname
		160	mv column_2 03_Vorname
		161	mv column_3 04_05_Namenszusatz_Addresszusatz
		162	mv column_4 09_Detail
		163	mv column_5 07_08_Strassenindex_Hausnummer
		164	mv column_6 12_Vorwahl
		165	mv column_7 10_Postleitzahl
		166	mv column_8 11_Ort
		167	mv column_9 13_Rufnummer
		168	mv column_10 14_15_Email_Webadresse
		169
		170	[ -e streets ] && tr '\n\0' '\t\n' < streets > 99_Strassenname && rm streets
		171
		172	if [ -e 99_Strassenname ]; then
		173	cut -f 1 07_08_Strassenindex_Hausnummer \| ${EL} -0 99_Strassenname > 07_Strasse
41	else	174	else
42	tr '\0' '\n' < ${acton} >> column_${column};	175	echo Could not convert streetindexes
43	fi	176	fi
44	done;	177
45	done	178	find . -name file_\* -delete
46		179	}
47	mv column_0 01_Flags	180
48	mv column_1 02_Nachname	181	# After function definitions, main() can use them
49	mv column_2 03_Vorname	182	main "$@"
50	mv column_3 04_05_Namenszusatz_Addresszusatz
51	mv column_4 09_Detail
52	mv column_5 07_08_Strassenindex_Hausnummer
53	mv column_6 12_Vorwahl
54	mv column_7 10_Postleitzahl
55	mv column_8 11_Ort
56	mv column_9 13_Rufnummer
57	mv column_10 14_15_Email_Webadresse
58
59	[ -e streets ] && tr '\0' '\n' < streets > 99_Strassenname && rm streets
60
61	if [ -e 99_Strassenname ]; then
62	cut -f 1 07_08_Strassenindex_Hausnummer \| ${EL} -0 99_Strassenname > 07_Strasse
63	else
64	echo Could not convert streetindexes
65	fi
66
67	find . -name file_\* -delete