diff options
| author | Dirk Engling <erdgeist@erdgeist.org> | 2014-01-18 03:58:54 +0100 |
|---|---|---|
| committer | Dirk Engling <erdgeist@erdgeist.org> | 2014-01-18 03:58:54 +0100 |
| commit | 7f66b38b3dd8602b1532d0e6cda645f771474f94 (patch) | |
| tree | 26ea137620c35df68612e5fdc7f322522005dfec /src | |
| parent | f2df25ca202300fe04672703ea7fb30d8431b302 (diff) | |
remove hexout tool, use xxd. Also automatize export of old issues of Das Telefonbuch in the shell script
Diffstat (limited to 'src')
| -rw-r--r-- | src/Makefile | 6 | ||||
| -rw-r--r-- | src/makecolumns.sh | 219 |
2 files changed, 170 insertions, 55 deletions
diff --git a/src/Makefile b/src/Makefile index 8de6d8b..8d1a66d 100644 --- a/src/Makefile +++ b/src/Makefile | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | all: decompress hexout | 1 | all: decompress extractblocks |
| 2 | 2 | ||
| 3 | decompress: decompress.c mystdlib.c | 3 | decompress: decompress.c mystdlib.c |
| 4 | gcc -O2 -o ../bin/decompress decompress.c mystdlib.c -lz | 4 | gcc -O2 -o ../bin/decompress decompress.c mystdlib.c -lz |
| 5 | 5 | ||
| 6 | hexout: hexout.c | 6 | extractblocks: extractblocks_new.c mystdlib.c |
| 7 | gcc -O2 -o ../bin/hexout hexout.c | 7 | gcc -o ../bin/extractblocks extractblocks_new.c mystdlib.c |
diff --git a/src/makecolumns.sh b/src/makecolumns.sh index cd26091..c4b5d24 100644 --- a/src/makecolumns.sh +++ b/src/makecolumns.sh | |||
| @@ -4,64 +4,179 @@ export LANG=C | |||
| 4 | export LC_CTYPE=C | 4 | export LC_CTYPE=C |
| 5 | export LC_ALL=C | 5 | export LC_ALL=C |
| 6 | 6 | ||
| 7 | [ -f /usr/local/bin/el ] && EL=/usr/local/bin/el | 7 | main() { |
| 8 | [ -f `dirname $0`/../bin/el ] && EL=`dirname $0`/../bin/el | 8 | [ -f /usr/local/bin/el ] && EL=/usr/local/bin/el |
| 9 | [ -f `dirname $0`/../bin/el ] && EL=`dirname $0`/../bin/el | ||
| 9 | 10 | ||
| 10 | if [ -z "${EL}" ]; then | 11 | if [ -z "${EL}" ]; then |
| 11 | echo "el not found. Get it at 'cvs -d :pserver:anoncvs@cvs.erdgeist.org:/home/cvsroot co el'" | 12 | echo "el not found. Get it at 'cvs -d :pserver:anoncvs@cvs.erdgeist.org:/home/cvsroot co el'" |
| 12 | exit 1 | 13 | exit 1 |
| 13 | fi | 14 | fi |
| 14 | |||
| 15 | HO=`dirname $0`/../bin/hexout | ||
| 16 | |||
| 17 | if [ $# -ne 1 ]; then | ||
| 18 | echo "Syntax: $0 [phonebookdirectory]" | ||
| 19 | exit 1 | ||
| 20 | fi | ||
| 21 | |||
| 22 | make -f Makefile all | ||
| 23 | 15 | ||
| 24 | mkdir ../work | 16 | if [ $# -ne 1 ]; then |
| 25 | cd ../work | 17 | echo "Syntax: $0 [phonebookdirectory]" |
| 18 | exit 1 | ||
| 19 | fi | ||
| 26 | 20 | ||
| 27 | ../bin/decompress $1/streets.tl | 21 | # Compile all the binaries |
| 22 | make all | ||
| 28 | 23 | ||
| 29 | cat file_* > streets | 24 | mkdir -p ../work_`basename $1` |
| 30 | rm file_* | 25 | cd ../work_`basename $1` || exit 1 |
| 31 | 26 | ||
| 32 | ../bin/decompress $1/phonebook.db | 27 | if [ -f "$1/phonebook.db" ]; then |
| 28 | handle_new_format $1 | ||
| 29 | elif [ -f "$1/DAT/TEILN.DAT" ]; then | ||
| 30 | handle_old_format "$1/DAT/TEILN.DAT" | ||
| 31 | elif [ -f "$1/dat/teiln.dat" ]; then | ||
| 32 | handle_old_format "$1/dat/teiln.dat" | ||
| 33 | else | ||
| 34 | echo "Not a recognized Telefonbuch folder" | ||
| 35 | fi | ||
| 36 | } | ||
| 37 | |||
| 38 | get_dword() { | ||
| 39 | # $1 file, $2 offset | ||
| 40 | set -- `od -tu4 -N4 -j$(( 4*${2:-0} )) ${1}` | ||
| 41 | printf "%d\n" $2 | ||
| 42 | } | ||
| 43 | |||
| 44 | handle_old_format() { | ||
| 45 | # Clear old files. Be very careful, we could | ||
| 46 | # have ended up in an unexpected directory, after all. | ||
| 47 | find -depth 1 -regex ^[0-9]+$ -delete | ||
| 48 | rm ??_* | ||
| 49 | |||
| 50 | # First the simple stuff, extract teiln.dat | ||
| 51 | ../bin/extractblocks $1 | ||
| 52 | |||
| 53 | # This should leave us with a bunch of .lha files | ||
| 54 | for archive in *.lha; do lha x ${archive}; done | ||
| 55 | find . -name \*.lha -delete | ||
| 56 | |||
| 57 | # See how long each filename is | ||
| 58 | filename_len=$(( `ls | head -n 1 | wc -c` - 1 )) | ||
| 59 | |||
| 60 | # from 2000F on file 0+3*n is table, so make it default | ||
| 61 | table_file=`printf %0${filename_len}d 0` | ||
| 62 | vname_file=`printf %0${filename_len}d 2` | ||
| 63 | |||
| 64 | # if supposed vname file is larger than table file, | ||
| 65 | # we're having a pre-2000F layout, so switch accordingly | ||
| 66 | if [ `stat -f %z ${table_file}` -lt `stat -f %z ${vname_file}` ]; then | ||
| 67 | table_file=`printf %0${filename_len}d 2` | ||
| 68 | nname_file=`printf %0${filename_len}d 0` | ||
| 69 | vname_file=`printf %0${filename_len}d 1` | ||
| 70 | else | ||
| 71 | nname_file=`printf %0${filename_len}d 1` | ||
| 72 | fi | ||
| 33 | 73 | ||
| 34 | numfiles=`find . -name file_\* | wc -l` | 74 | # Table file has a table header with identical count |
| 75 | # to nname file's header. Verify this | ||
| 76 | if [ `get_dword ${nname_file}` -ne `get_dword ${table_file}` ]; then | ||
| 77 | echo "Unknown layout." | ||
| 78 | exit | ||
| 79 | fi | ||
| 35 | 80 | ||
| 36 | for column in `jot - 0 10 1`; do | 81 | # Now loop over all files and dump them |
| 37 | for file in `jot - ${column} ${numfiles} 11`; do | 82 | while [ -f ${nname_file} ]; do |
| 38 | acton=`printf file_%05X ${file}` | 83 | # Get number of entries in this round |
| 39 | if [ ${column} = 0 ]; then | 84 | count=`get_dword ${nname_file}` |
| 40 | ${HO} < ${acton} >> column_0; | 85 | |
| 86 | # Get offset into first nname | ||
| 87 | nname_off=$(( `get_dword ${nname_file} 1` + 1 )) | ||
| 88 | |||
| 89 | # Now get the flags before the nnames | ||
| 90 | tail -c +${nname_off} ${nname_file} | tr '\n\0' '\t\n' | head -n ${count} | cut -c -1 >> 01_Flags | ||
| 91 | tail -c +${nname_off} ${nname_file} | tr '\n\0' '\t\n' | head -n ${count} | cut -c 2- >> 02_Nachname | ||
| 92 | |||
| 93 | # Extract the vnames | ||
| 94 | tr '\n\0' '\t\n' < ${vname_file} | head -n ${count} >> 03_Vorname | ||
| 95 | |||
| 96 | # Offset into first table entry tells us how many | ||
| 97 | # fields are in table file | ||
| 98 | table_entries=$(( `get_dword ${table_file} 1` / 4 - 1 )) | ||
| 99 | |||
| 100 | # Now iterate over all entries in the table file | ||
| 101 | for table_index in `jot ${table_entries}`; do | ||
| 102 | table_off=`get_dword ${table_file} ${table_index}` | ||
| 103 | tail -c +$(( table_off + 1 )) ${table_file} | tr '\n\0' '\t\n' | head -n ${count} >> `printf %02d_unknown $(( table_index + 3 ))` | ||
| 104 | done | ||
| 105 | |||
| 106 | # Advance the filenames. Note, that we need bc because | ||
| 107 | # builtin arithmetic treats numbers with leading zeros as octals | ||
| 108 | nname_file=`printf "%s + 3\n" ${nname_file} | bc` | ||
| 109 | nname_file=`printf %0${filename_len}d ${nname_file}` | ||
| 110 | vname_file=`printf "%s + 3\n" ${vname_file} | bc` | ||
| 111 | vname_file=`printf %0${filename_len}d ${vname_file}` | ||
| 112 | table_file=`printf "%s + 3\n" ${table_file} | bc` | ||
| 113 | table_file=`printf %0${filename_len}d ${table_file}` | ||
| 114 | |||
| 115 | done | ||
| 116 | |||
| 117 | # wipe all temporary extracted files | ||
| 118 | find -depth 1 -regex ^[0-9]+$ -delete | ||
| 119 | |||
| 120 | # rename our columns extracted from the table file | ||
| 121 | mv 04_unknown 04_Namenszusatz | ||
| 122 | mv 05_unknown 05_Adresszusatz | ||
| 123 | mv 06_unknown 06_Ortszusatz | ||
| 124 | mv 08_unknown 08_Hausnummer | ||
| 125 | mv 09_unknown 09_Detail | ||
| 126 | mv 10_unknown 10_Postleitzahl | ||
| 127 | mv 11_unknown 11_Ort | ||
| 128 | mv 12_unknown 12_Vorwahl | ||
| 129 | mv 13_unknown 13_Rufnummer | ||
| 130 | [ -f 14_unknown ] && mv 14_unknown 14_Email | ||
| 131 | [ -f 15_unknown ] && mv 15_unknown 15_Webadresse | ||
| 132 | |||
| 133 | # TODO: extract street names if 07_unknown contains street indexes | ||
| 134 | # instead of street names | ||
| 135 | } | ||
| 136 | |||
| 137 | handle_new_format() { | ||
| 138 | ../bin/decompress $1/streets.tl | ||
| 139 | |||
| 140 | cat file_* > streets | ||
| 141 | rm file_* | ||
| 142 | |||
| 143 | ../bin/decompress $1/phonebook.db | ||
| 144 | |||
| 145 | numfiles=`find . -name file_\* | wc -l` | ||
| 146 | |||
| 147 | for column in `jot - 0 10 1`; do | ||
| 148 | for file in `jot - ${column} ${numfiles} 11`; do | ||
| 149 | acton=`printf file_%05X ${file}` | ||
| 150 | if [ ${column} = 0 ]; then | ||
| 151 | xxd -ps -c1 ${acton} >> column_0; | ||
| 152 | else | ||
| 153 | tr '\n\0' '\t\n' < ${acton} >> column_${column}; | ||
| 154 | fi | ||
| 155 | done; | ||
| 156 | done | ||
| 157 | |||
| 158 | mv column_0 01_Flags | ||
| 159 | mv column_1 02_Nachname | ||
| 160 | mv column_2 03_Vorname | ||
| 161 | mv column_3 04_05_Namenszusatz_Addresszusatz | ||
| 162 | mv column_4 09_Detail | ||
| 163 | mv column_5 07_08_Strassenindex_Hausnummer | ||
| 164 | mv column_6 12_Vorwahl | ||
| 165 | mv column_7 10_Postleitzahl | ||
| 166 | mv column_8 11_Ort | ||
| 167 | mv column_9 13_Rufnummer | ||
| 168 | mv column_10 14_15_Email_Webadresse | ||
| 169 | |||
| 170 | [ -e streets ] && tr '\n\0' '\t\n' < streets > 99_Strassenname && rm streets | ||
| 171 | |||
| 172 | if [ -e 99_Strassenname ]; then | ||
| 173 | cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse | ||
| 41 | else | 174 | else |
| 42 | tr '\0' '\n' < ${acton} >> column_${column}; | 175 | echo Could not convert streetindexes |
| 43 | fi | 176 | fi |
| 44 | done; | 177 | |
| 45 | done | 178 | find . -name file_\* -delete |
| 46 | 179 | } | |
| 47 | mv column_0 01_Flags | 180 | |
| 48 | mv column_1 02_Nachname | 181 | # After function definitions, main() can use them |
| 49 | mv column_2 03_Vorname | 182 | main "$@" |
| 50 | mv column_3 04_05_Namenszusatz_Addresszusatz | ||
| 51 | mv column_4 09_Detail | ||
| 52 | mv column_5 07_08_Strassenindex_Hausnummer | ||
| 53 | mv column_6 12_Vorwahl | ||
| 54 | mv column_7 10_Postleitzahl | ||
| 55 | mv column_8 11_Ort | ||
| 56 | mv column_9 13_Rufnummer | ||
| 57 | mv column_10 14_15_Email_Webadresse | ||
| 58 | |||
| 59 | [ -e streets ] && tr '\0' '\n' < streets > 99_Strassenname && rm streets | ||
| 60 | |||
| 61 | if [ -e 99_Strassenname ]; then | ||
| 62 | cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse | ||
| 63 | else | ||
| 64 | echo Could not convert streetindexes | ||
| 65 | fi | ||
| 66 | |||
| 67 | find . -name file_\* -delete | ||
