summaryrefslogtreecommitdiff
path: root/makecolumns.sh
blob: cef88e03f5a456381fca52176784bcd4b4a8e45c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
#!/bin/sh

export LANG=C
export LC_CTYPE=C
export LC_ALL=C
export PATH=${PATH}:`pwd`/bin/

unset FIND_E
case `uname -s` in
    *BSD|Darwin) FIND_E="-E "
esac

main() {
    [ -f /usr/local/bin/el ] && EL=/usr/local/bin/el
    [ -f `dirname $0`/../bin/el ] && EL=`dirname $0`/../bin/el

    if [ -z "${EL}" ]; then
      echo "el not found. Get it at 'git clone git://erdgeist.org/el'"
      exit 1
    fi

    if [ $# -ne 1 ]; then
      echo "Syntax: $0 [phonebookdirectory]"
      exit 1
    fi

    # Compile all the binaries
    make all

    printf "Cleaning up old working directory ... "
    rm -rf work/`basename "${1#white_}"`
    printf "done.\n"
    mkdir -p work/`basename "${1#white_}"`
    cd work/`basename "${1#white_}"` || exit 1

    if [ -f "$1/phonebook.db" ]; then
        handle_format_version_4 "${1}"
    elif [ -f "${1}"/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] ]; then
        handle_format_version_3 "${1}"
    elif [ -n "`find "${1}" -iname atb?dd00 -ls -quit`" ]; then
        handle_format_version_2 "${1}"
    elif [ -n "`find "${1}" -name dpr00000.005 -ls -quit`" ]; then
        handle_format_version_1 "${1}"
    else
        echo "Not a recognized Telefonbuch folder"
    fi
    cd ../..
}

do_decompress_version_3() {
    printf "Extracting $2 chunks ... "
    mkdir LHA && cd LHA
    extract_version_3 "${1}"
    cd ..
    printf "done.\n"

    printf "Decompressing $2 chunks ... "
    numfiles=`find LHA | wc -l`
    reported=0; processed=0
    for archive in LHA/*.lha; do
        lha x ${archive} > /dev/null
        [ 1 -eq $(( ( ( (processed+=1) * 20 ) / numfiles ) > reported )) ] && printf "%d%% " $(( (reported+=1) * 5 ))
    done
    rm -r LHA
    [ $reported -lt 10 ] && printf "100% "
    printf "done.\n"
}

do_processfile_version_3() {
    working_on=`basename ${1}`
    mkdir "${working_on}" && cd "${working_on}"
    do_decompress_version_3 "${1}" "${2}"
    cd ..

    printf "Combining $2 into single file ... "
    if [ "${4}" = "convert_zeros" ]; then
        cat "${working_on}"/* | tr '\n\0' '\t\n' > $3
    else
        cat "${working_on}"/* > $3
    fi
    printf "done.\n"

    rm -rf "${working_on}"
}

size() {
case `uname -s` in
    *BSD|Darwin) stat -f %z `printf %0${filename_len}d "$1"`;;
    *) stat -c %s `printf %0${filename_len}d "$1"`;;
esac
}

get_dword() {
    # $1 file
    hexdump -n 4 -v -e '" " 1/4 "%u"' `printf %0${filename_len}d "${1}"`
}

handle_format_version_1() {
    echo "Working on $1. Detected 1992 Telefonbuch version."
    # Extract all dpr database files
    printf "Extracting dpr databases ... "
    find "$1" -name dpr\*.001 | extract_version_1
    printf "done.\n"

    # rename our extracted columns
    mv 01_unknown 01_Flags

    mv 02_unknown 02_Nachname
    mv 03_unknown 03_Vorname
    mv 05_unknown 06_Ortszusatz
    mv 06_unknown 10_Zustellamt_PLZOst
    mv 07_unknown 07_Strasse
    mv 08_unknown 08_Hausnummer
    mv 10_unknown 09_Verweise
    mv 11_unknown 12_Vorwahl
    mv 12_unknown 13_Rufnummer
    mv 13_unknown 11_Ort
    mv 14_unknown 10_Postleitzahl_West
    mv 15_unknown 12_Vorwahl_block

    printf "Splitting appartement to zusaetze ... "
    paste 07_Strasse 08_Hausnummer 09_unknown | sed -E $'s:^(.*)\;([0-9]+.*)\t(.*)\t.*$:\\1\t\\2\tWohnung \\3:;s:^(.*)tr(\t.*\t.*)$:\\1tr.\\2:' > tm_unknown
    cut -f 1 tm_unknown > 07_Strasse
    cut -f 2 tm_unknown > 08_Hausnummer
    printf "done.\n"

    printf "Normalizing zusaetze ... "
    cut -f 3 tm_unknown | sed -E -e 's:^, +:u. :' > 04_Namenszusatz
    sed -E -e 's:^, +:u. :' 04_unknown > 05_Adresszusatz
    paste 04_Namenszusatz 05_Adresszusatz | awk '{$1=$1};1' > 04_Zusaetze
    printf "done.\n"

    # For consistency, create files with empty lines
    tr -dC '\n' < 01_Flags > 14_Webadresse
    cp 14_Webadresse 15_Email
    sed $'s:.*:\t:' 01_Flags > 16_Koordinaten

    tidy_columns

    rm ??_unknown
}

handle_format_version_2() {
    echo "Working on $1. Detected pre 02/1996 Telefonbuch version."

    # Extract all CD's pkware compressed databases
    printf "Extracting 3 pkware databases ...\n"
    cd=1
    for database in `find "$1" -iname atb?dd00`; do
      dir=`dirname ${database}`
      base=`basename ${database}`
      printf "  %d/3 in %4s. Decompressing ..." ${cd} "$( basename ${dir} )"
      extract_version_2 "${database}" > ${base}.dump

      printf ", extracting ..."
      indexfile=$( find ${dir} -iname atb?di00 )
      split_version_2 "${base}.dump" "${indexfile}"

      printf ", cleaning up ... "
      rm "${base}.dump"
      printf "done.\n"
      cd=$((cd+1))
    done

    mv 01_unknown 01_Flags
    mv 16_unknown 02_Nachname
    mv 07_unknown 03_Vorname
    mv 12_unknown 06_Ortszusatz
    mv 08_unknown 07_Strasse
    mv 10_unknown 08_Hausnummer
    mv 13_unknown 09_Verweise
    mv 02_unknown 10_Postleitzahl
    mv 15_unknown 11_Ort
    mv 09_unknown 11_Ort_Gemeinde
    mv 05_unknown 12_Vorwahl
    mv 06_unknown 13_Rufnummer

    printf "Normalizing zusaetze ... "
    sed -E -e 's:^, +:u. :' 14_unknown > 04_Namenszusatz
    sed -E -e 's:^, +:u. :' 11_unknown > 05_Adresszusatz
    paste 04_Namenszusatz 05_Adresszusatz | awk '{$1=$1};1' > 04_Zusaetze
    printf "done.\n"

    # For consistency, create files with empty lines
    tr -dC '\n' < 01_Flags > 14_Webadresse
    cp 14_Webadresse 15_Email
    sed $'s:.*:\t:' 01_Flags > 16_Koordinaten

    tidy_columns

    rm ??_unknown
}

handle_format_version_3() {
    # glob
    teiln=`printf "%s" "$1"/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt]`
    braid=`printf "%s" "$1"/[Dd][Aa][Tt]/[Bb][Rr][Aa][Ii][Dd].[Dd][Aa][Tt]`
    streets=`printf "%s" "$1"/[Dd][Aa][Tt]/[Ss][Tt][Rr][Aa][Ss][Ss][Ee][Nn].[Dd][Aa][Tt]`
    karto=`printf "%s" "$1"/[Dd][Aa][Tt]/[Kk][Aa][Rr][Tt][Oo].[Dd][Aa][Tt]`

    if [ -f "${braid}" ]; then
      echo "Working on $1. Detected pre-2004 Yellow Pages version."
      is_yp=true
    else
      echo "Working on $1. Detected pre-2004 Telefonbuch version."
      unset is_yp
    fi
    # Extract teiln.dat
    do_decompress_version_3 "${teiln}" "teiln.dat"

    # See how long each filename is
    export filename_len=$(( `ls | head -n 1 | wc -c` - 1 ))

    # Get total amount of files, for reporting progress
    number_of_files=`find ${FIND_E} . -maxdepth 1 -regex '^\./[0123456789]+' | wc -l`

    # from 2000F on file 0+3*n is table, so make it default
    table_file=0; vname_file=2

    # if supposed vname file is larger than table file,
    # we're having a pre-2000F layout, so switch accordingly
    if [ `size ${table_file}` -lt `size ${vname_file}` ]; then
        table_file=2; nname_file=0; vname_file=1
    else
        nname_file=1
    fi

    # Table file has a table header with identical count
    # to nname file's header. Verify this
    if [ `get_dword ${nname_file}` -ne `get_dword ${table_file}` ]; then
        echo "Unknown layout."
        exit
    fi

    # Now loop over all files and dump them
    printf "Splitting decompressed nname chunks into their columns ... "
    JOT "%0${filename_len}d" ${nname_file} $(( number_of_files - 1 )) 3 | split_version_3 1 1
#        set -- `hexdump -n 8 -v -e '" " 1/4 "%u"' ${file}`
#        tail -c +$(( $2 + 1 )) ${file}
#    done | tr '\n\0' '\t\n' > 01_02_Flags_Nachname
    if [ "${is_yp}" ]; then
        cut -c 1  < 01_unknown > 01_Flags
        cut -c 2-7 < 01_unknown > 09_Branchenindex
        cut -c 8- < 01_unknown > 02_Nachname
    else
        cut -c 1  < 01_unknown > 01_Flags
        cut -c 2- < 01_unknown > 02_Nachname
    fi
    printf "done.\n"

    printf "Splitting decompress vname chunks into their columns ... "
    JOT "%0${filename_len}d" ${vname_file} $(( number_of_files - 1 )) 3 | xargs -n 128 cat | tr '\n\0' '\t\n' | tr -d '\377' | awk '{$1=$1};1' > 03_Vorname
    printf "done.\n"

    printf "Splitting decompress table file chunks into their columns ... "
    JOT "%0${filename_len}d" ${table_file} $(( number_of_files - 1 )) 3 | split_version_3 4 0
#    for file in `jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3`; do
#        # Offset into first table entry tells us how many
#        # fields are in table file
#        set -- `hexdump -n 64 -v -e '" " 1/4 "%u"' ${file}`
#        count=$1; table_entries=$(( $2 / 4 - 1 )); shift
#
#        # Now iterate over all entries in the table file
#        for idx in `jot ${table_entries}`; do
#            tail -c +$(( $1 + 1 )) ${file} | tr '\n\0' '\t\n' | head -n ${count} >> `printf %02d_unknown $(( idx + 3 ))`
#            shift
#        done
#    done
    printf "done.\n"

    # wipe all temporary extracted files
    printf "Cleaning up decompressed chunks ... "
    find ${FIND_E} . -maxdepth 1 -regex '^\./[0123456789]+' -delete
    printf "done.\n"

    # rename our columns extracted from the table file
    mv 06_unknown 06_Ortszusatz
    mv 08_unknown 08_Hausnummer
    mv 10_unknown 10_Postleitzahl
    mv 11_unknown 11_Ort
    mv 12_unknown 12_Vorwahl
    mv 13_unknown 13_Rufnummer
    if [ -f 14_unknown ]; then
      tr '\\' '/' < 14_unknown | iconv -f iso-8859-15 -t utf-8 > 15_Email
    else
      tr -dC '\n' < 01_Flags > 15_Email
    fi
    if [ -f 15_unknown ]; then
      tr '\\' '/' < 15_unknown | iconv -f iso-8859-15 -t utf-8 > 14_Webadresse
    else
      tr -dC '\n' < 01_Flags > 14_Webadresse
    fi

    printf "Normalizing flags ... "
    sed -i.bak -e s:^1$:00:g -e s:^3$:01:g -e s:^2$:02:g 01_Flags
    rm 01_Flags.bak
    printf "done.\n"

    printf "Normalizing zusaetze ... "
    sed -E -e 's:^, +:u. :' 04_unknown > 04_Namenszusatz
    sed -E -e 's:^, +:u. :' 05_unknown > 05_Adresszusatz
    paste 04_Namenszusatz 05_Adresszusatz | awk '{$1=$1};1' > 04_Zusaetze
    printf "done.\n"

    printf "Normalizing verweise ... "
    sed -E -e 's:^\|::g;s:\|$::g;s:\|:, :g' 09_unknown | awk '{$1=$1};1' > 09_Verweise
    printf "done.\n"

    # At least 2002_Q3 and 2003_Q1 are known to sport | -separated vname and nname fields
    # those fields are redundant, as they are being made explicit in 09_-column for these entries
    if grep -q '|' 03_Vorname; then

      printf "Treating vname fields with pipe separator ... (adds 3 minutes) ... "
      # Identify entries with | in nname and move content of 09_Verweise to 04_Zusaetze
      paste 03_Vorname 04_Zusaetze 09_Verweise | sed -E $'s:^([^|]*)\|.*\t(.*)\t(.*):\\1\t\\2 \\3\t:;' > 03_04_09_Temp
      cut -f 1 03_04_09_Temp | awk '{$1=$1};1' > 03_Vorname
      cut -f 2 03_04_09_Temp | awk '{$1=$1};1' > 04_Zusaetze
      cut -f 3 03_04_09_Temp | awk '{$1=$1};1' > 09_Verweise
      rm 03_04_09_Temp

      # Delete redundant nachnamen values
      cut -d '|' -f 1 02_Nachname > 02_Nachname.new
      mv 02_Nachname.new 02_Nachname

      printf "done.\n"
    fi

    # If street names come in an extra file, extract
    # street names first
    if [ -f "${streets}" ]; then
        printf "Extracting street name indexes ... "
        do_processfile_version_3 "${streets}" "street name" 99_Strassenname convert_zeros
        printf "done.\n"
    fi

    # extract street names if 07_unknown contains street indexes
    # instead of street names
    if [ -f 99_Strassenname ]; then
        mv 07_unknown 07_Strassenindex

        # fix up known broken Strassennamen file
        [ `stat -f %z ${streets}` -eq 1642716 ] && printf '9. Str.\n91. Str.\n91er-Str.\n' >> 99_Strassenname

        printf "Looking up street names from indexes ... "
        cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse
        printf "done.\n"
    else
        mv 07_unknown 07_Strasse
    fi

    if [ -f "${karto}" ]; then
      do_processfile_version_3 "${karto}" "geo coordinates" 90_Geokoordinaten_hnr_raw

      printf "Looking up geo coordinates for each phonebook entry ... "
      tr '\0;' '\n\t' < 90_Geokoordinaten_hnr_raw | cut -f "1,2,3,4,6,7" | tr '\n' '\0' > 90_Geokoordinaten_hnr
      rm 90_Geokoordinaten_hnr_raw
      paste 10_Postleitzahl 11_Ort 07_Strasse 08_Hausnummer | map_coords 90_Geokoordinaten_hnr | convert_coords > 16_Koordinaten
      printf "done.\n"
    else
      sed $'s:.*:\t:' 01_Flags > 16_Koordinaten
    fi

    if [ -f "${braid}" ]; then
      do_processfile_version_3 "${braid}" "branchen name index" 97_Branchenname convert_zeros

      printf "Looking up branch names from codes ... "
      map_branches_v3 97_Branchenname < 09_Branchenindex > 09_Branchen
      printf "done.\n"
    fi

    tidy_columns

    rm ??_unknown
}

fix_broken_v4_file() {
    name=$1
    offset=$2
    nullbytes=$3
    gap=$4

    head -c $(( offset - 1 )) file_${name} > file_${name}_
    [ "${nullbytes}" -gt 0 ] && printf '\x00' >> file_${name}_
    tail -c +$(( offset + gap )) file_${name} >> file_${name}_
    mv file_${name}_ file_${name}
}

handle_format_version_4() {
    if [ -f "$1/branchcodes.tl" ]; then
      is_yp=true
      echo "Working on $1. Detected post-2003 Yellow Pages version."
    else
      unset is_yp
      echo "Working on $1. Detected post-2003 Telefonbuch version."
    fi

    printf "Extracting street names ... "
    extract_version_4 "$1/streets.tl"

    cat file_* | tr '\n\0' '\t\n' > 99_Strassenname
    rm file_*
    printf "done.\n"

    printf "Extracting phonebook.db ... "
    extract_version_4 "$1/phonebook.db"

    case $1 in
    *2023_Q1*)
        # Fixup broken file in 2023_Q1
        fix_broken_v4_file 17F0A 0x01BB 1 93
        ;;
    *2023_Q3*)
        # Fixup broken file in 2023_Q3
        fix_broken_v4_file 15DB2 0x3D05 0 373
        fix_broken_v4_file 15DBD 0x8CA8 0 391
        fix_broken_v4_file 15DD3 0x72B7 0 120
        fix_broken_v4_file 15E2B 0x0549 0 2051
        fix_broken_v4_file 15EE6 0x0569 1 144
        fix_broken_v4_file 15F28 0x2EF8 0 103
        fix_broken_v4_file 15F3E 0x3C1C 0 164
        fix_broken_v4_file 16348 0x0942 0 221
        fix_broken_v4_file 16419 0x0CF8 0 140
        fix_broken_v4_file 16471 0x0681 0 106
        ;;
    esac

    rows=`find . -name file_\* | wc -l`
    printf "done.\n"

    printf "Splitting decompressed chunks into their columns (11 total) ... 1, "
    JOT "file_%05X" 0 $(( rows - 1 )) 11 | xargs -n 128 cat | xxd -ps -c1 > column_0

    for col in 1 2 3 4 5 6 7 8 9 10; do
      printf "%d, " $(( col + 1 ))
      JOT "file_%05X" ${col} $(( rows - 1 )) 11 | xargs -n 128 cat | tr '\n\0' '\t\n' > column_${col}
    done
    printf "done.\n"

    printf "Cleaning up decompressed chunks ... "
    find . -name file_\* -delete
    printf "done.\n"

    # the 'did not object to inverse search' flag is insane and needs to be reversed
    if grep -q ^40 column_0; then
      printf "Cleanung up inverted reverse search flags ... "
      awk '{ a=substr($0,1,1); printf "%x%x\n",index("5670123cdef89ab4",a)%16 ,substr($0,2,1) }' < column_0 > 01_Flags
      printf "done\n"
    else
      mv column_0 01_Flags
    fi

    mv column_1 02_Nachname
    mv column_2 03_Vorname
    mv column_3 04_05_Namenszusatz_Addresszusatz
    mv column_5 07_08_Strassenindex_Hausnummer
    mv column_6 12_Vorwahl
    mv column_7 10_Postleitzahl
    mv column_8 11_Ort
    mv column_9 13_Rufnummer
    mv column_10 14_15_Email_Webadresse

    printf "Looking up street names from indexes ... "
    cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse
    printf "done.\n"

    printf "Splitting house numbers ... "
    sed -E $'s:$:\t:' < 07_08_Strassenindex_Hausnummer | cut -f 2 > 08_Hausnummer
    printf "done.\n"

    printf "Normalizing zusaetze ... "
    sed -E -e $'s:(^|\t),: u. :g' 04_05_Namenszusatz_Addresszusatz | awk '{$1=$1};1' > 04_Zusaetze
    printf "done.\n"

    printf "Normalizing verweise ... "
    sed -E -e $'s:^\|+::g;s:\|+$::g;s:\|:, :g' column_4 | awk '{$1=$1};1' > 09_Verweise
    printf "done.\n"

    printf "Splitting webaddress ... "
    cut -d $'\t' -f 1 14_15_Email_Webadresse | tr '\\' '/' | iconv -f iso-8859-15 -t utf-8 > 14_Webadresse
    printf "done.\n"

    printf "Splitting email ... "
    sed $'s:$:\t:' < 14_15_Email_Webadresse | cut -sd $'\t' -f 2 | tr '\\' '/' | iconv -f iso-8859-15 -t utf-8 > 15_Email
    printf "done.\n"

    if [ -f "$1/zip-streets-hn-geo.tl" ]; then
      printf "Extracting geo coordinates (precision: house number) ... "
      extract_version_4 "$1/zip-streets-hn-geo.tl"
      cat file_* > 90_Geokoordinaten_hnr
      printf "done.\n"
      printf "Looking up geo coordinates for each phonebook entry ... "
      paste 10_Postleitzahl 07_Strasse 08_Hausnummer | map_coords 90_Geokoordinaten_hnr | convert_coords > 16_Koordinaten
      printf "done.\n"
    elif [ -f "$1/zip-streets-geo.tl" ]; then
      printf "Extracting geo coordinates (precision: street) ... "
      extract_version_4 "$1/zip-streets-geo.tl"
      cat file_* > 91_Geokoordinaten_str
      printf "done.\n"
      printf "Looking up geo coordinates for each phonebook entry ... "
      paste 10_Postleitzahl 07_Strasse | map_coords 91_Geokoordinaten_str | convert_coords > 16_Koordinaten
      printf "done.\n"
    else
      sed $'s:.*:\t:' 01_Flags > 16_Koordinaten
    fi
    rm file_*

    if [ "${is_yp}" ]; then
      printf "Extracting branch names ... "
      extract_version_4 "$1/branchcodes.tl"
      cat file_* | tr '\n\0' '\t\n' > 97_Branchenname
      rm file_*
      printf "done.\n"

      printf "Looking up branch names from codes ... "
      map_branches_v4 97_Branchenname < 09_Verweise > 09_Branchen
      printf "done.\n"
    fi

    tidy_columns
    rm column_*
}

tidy_columns () {

  printf "Removing backslashes from Nachnamen ... "
  sed -E -e 's:\\::g' 02_Nachname | awk '{$1=$1};1' | iconv -f iso-8859-15 -t utf-8 > 02_Nachname.new
  mv 02_Nachname.new 02_Nachname
  printf "done.\n"

  printf "Unicoding Vornamen ... "
  iconv -f iso-8859-15 -t utf-8 03_Vorname > 03_Vorname.new
  mv 03_Vorname.new 03_Vorname
  printf "done.\n"

  printf "Unicoding Zusaetze ... "
  iconv -f iso-8859-15 -t utf-8 04_Zusaetze > 04_Zusaetze.new
  mv 04_Zusaetze.new 04_Zusaetze
  printf "done.\n"

  printf "Tidying up streetnames ... "
  # Replace any dots at end of line by a single one
  # finish any str abbreviation without a period with a period
  sed -E 's/\.+/./g;s/(S|s)tr( |:)?$/\1tr./;s/(.*)-(.*) -/\1-\2-Str./;s/ -$/ Str./;s/-$/str./;s/^(.*-.*) Str\.?$/\1-Str./' 07_Strasse | iconv -f iso-8859-15 -t utf-8 > 07_Strasse.new
  mv 07_Strasse.new 07_Strasse
  printf "done.\n"

  printf "Normalizing house numbers ... "
  sed -E -e 's:^([[:digit:]]+) *([A-Za-z])$:\1 \2:' -e 's: a$: A:;s: b$: B:;s: c$: C:;s: d$: D:;s: e$: E:;s: f$: F:;s: g$: G:;s: h$: H:;s: i$: I:;s: j$: J:;s: k$: K:;s: l$: L:;s: m$: M:;s: n$: N:;s: o$: O:;' 08_Hausnummer | iconv -f iso-8859-15 -t utf-8 > 08_Hausnummer.new
  mv 08_Hausnummer.new 08_Hausnummer
  printf "done.\n"

  printf "Unicoding Verweise ... "
  iconv -f iso-8859-15 -t utf-8 09_Verweise > 09_Verweise.new
  mv 09_Verweise.new 09_Verweise
  printf "done.\n"

  printf "Unicoding Postleitzahl ...  "
  iconv -f iso-8859-15 -t utf-8 10_Postleitzahl > 10_Postleitzahl.new
  mv 10_Postleitzahl.new 10_Postleitzahl
  printf "done.\n"

  printf "Removing trailing * from Ort ... "
  sed -E -e 's:\*$::' 11_Ort | iconv -f iso-8859-15 -t utf-8 > 11_Ort.new
  mv 11_Ort.new 11_Ort
  printf "done.\n"

  printf "Unicoding Vorwahl ...  "
  iconv -f iso-8859-15 -t utf-8 12_Vorwahl > 12_Vorwahl.new
  mv 12_Vorwahl.new 12_Vorwahl
  printf "done.\n"

  printf "Unicoding Rufnummer ...  "
  iconv -f iso-8859-15 -t utf-8 13_Rufnummer > 13_Rufnummer.new
  mv 13_Rufnummer.new 13_Rufnummer
  printf "done.\n"

}

# JOT <format> <begin> <end> <step>
JOT () {
 case `uname -s` in
    *BSD|Darwin)
      jot -w "$1" - "$2" "$3" "$4"
      ;;
    *)
      for x in `seq "$2" "$4" "$3"`; do printf "$1\n" "$x"; done
      ;;
  esac
}

# After function definitions, main() can use them
main "$@"