Use integers to count through files on old telefonbuch format. Only convert them to fixed with representation when needed

author: Dirk Engling <erdgeist@erdgeist.org> 2014-02-10 15:35:47 +0100
committer: Dirk Engling <erdgeist@erdgeist.org> 2014-02-10 15:35:47 +0100
commit: ae1f17a7b0abbc425f33106c666f1bc71e1b4711 (patch)
tree: be0d685293274bbaa1add147617861d050075856 /src
parent: 1e0ae2a4e3c0bab562d7f8c8ee9539a0613357b6 (diff)
1 files changed, 27 insertions, 29 deletions
diff --git a/src/makecolumns.sh b/src/makecolumns.sh
index ab61c29..476ce90 100755
--- a/src/makecolumns.sh
+++ b/src/makecolumns.sh
@@ -38,12 +38,6 @@ main() {
    cd ..
 }
-get_dword() {
-    # $1 file, $2 offset
-    set -- `od -tu4 -N4 -j$(( 4*${2:-0} )) ${1}`
-    printf "%d\n" $2
-}
 do_decompress_old() {
    printf "Extracting $2 chunks ... "
    extractblocks "${1}"
@@ -78,29 +72,37 @@ do_processfile_old() {
    rm -rf ${working_on}
 }
+size() {
+    stat -f %z `printf %0${filename_len}d $1`
+}
+get_dword() {
+    # $1 file, $2 offset
+    file=`printf %0${filename_len}d ${1}`
+    set -- `od -tu4 -N4 -j$(( 4*${2:-0} )) ${file}`
+    printf "%d\n" $2
+}
 handle_old_format() {
    echo "Working on $1. Detected pre-2004 Telefonbuch version."
    # Extract teiln.dat
    do_decompress_old $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] "teiln.dat"
    # See how long each filename is
-    filename_len=$(( `ls | head -n 1 | wc -c` - 1 ))
+    export filename_len=$(( `ls | head -n 1 | wc -c` - 1 ))
    # Get total amount of files, for reporting progress
    number_of_files=`find -E . -depth 1 -regex '^\./[0123456789]+' | wc -l`
    # from 2000F on file 0+3*n is table, so make it default
-    table_file=`printf %0${filename_len}d 0`
+    table_file=0; vname_file=2
-    vname_file=`printf %0${filename_len}d 2`
    # if supposed vname file is larger than table file,
    # we're having a pre-2000F layout, so switch accordingly
-    if [ `stat -f %z ${table_file}` -lt `stat -f %z ${vname_file}` ]; then
+    if [ `size ${table_file}` -lt `size ${vname_file}` ]; then
-        table_file=`printf %0${filename_len}d 2`
+        table_file=2; nname_file=0; vname_file=1
-        nname_file=`printf %0${filename_len}d 0`
-        vname_file=`printf %0${filename_len}d 1`
    else
-        nname_file=`printf %0${filename_len}d 1`
+        nname_file=1
    fi
    # Table file has a table header with identical count
@@ -113,7 +115,7 @@ handle_old_format() {
    # Now loop over all files and dump them
    printf "Splitting decompressed chunks into their columns ... "
    reported=0
-    while [ -f ${nname_file} ]; do
+    while [ -f `printf %0${filename_len}d ${nname_file}` ]; do
        # Get number of entries in this round
        count=`get_dword ${nname_file}`
@@ -121,11 +123,11 @@ handle_old_format() {
        nname_off=$(( `get_dword ${nname_file} 1` + 1 ))
        # Now get the flags before the nnames
-        tail -c +${nname_off} ${nname_file} | tr '\n\0' '\t\n' | head -n ${count} | cut -c -1 >> 01_Flags
+        tail -c +${nname_off} `printf %0${filename_len}d ${nname_file}` | tr '\n\0' '\t\n' | head -n ${count} | cut -c -1 >> 01_Flags
-        tail -c +${nname_off} ${nname_file} | tr '\n\0' '\t\n' | head -n ${count} | cut -c 2- >> 02_Nachname
+        tail -c +${nname_off} `printf %0${filename_len}d ${nname_file}` | tr '\n\0' '\t\n' | head -n ${count} | cut -c 2- >> 02_Nachname
        # Extract the vnames
-        tr '\n\0' '\t\n' < ${vname_file} | head -n ${count} >> 03_Vorname
+        tr '\n\0' '\t\n' < `printf %0${filename_len}d ${vname_file}` | head -n ${count} >> 03_Vorname
        # Offset into first table entry tells us how many
        # fields are in table file
@@ -134,18 +136,14 @@ handle_old_format() {
        # Now iterate over all entries in the table file
        for table_index in `jot ${table_entries}`; do
            table_off=`get_dword ${table_file} ${table_index}`
-            tail -c +$(( table_off + 1 )) ${table_file} | tr '\n\0' '\t\n' | head -n ${count} >> `printf %02d_unknown $(( table_index + 3 ))`
+            tail -c +$(( table_off + 1 )) `printf %0${filename_len}d ${table_file}` | tr '\n\0' '\t\n' | head -n ${count} >> `printf %02d_unknown $(( table_index + 3 ))`
        done
-        # Advance the filenames. Note, that we need bc because
+        # Advance the filenames.
-        # builtin arithmetic treats numbers with leading zeros as octals
+        nname_file=$(( nname_file+3 ))
-        nname_file=`printf "%s + 3\n" ${nname_file} | bc`
+        vname_file=$(( vname_file+3 ))
-        nname_file=`printf %0${filename_len}d ${nname_file}`
+        table_file=$(( table_file+3 ))
-        vname_file=`printf "%s + 3\n" ${vname_file} | bc`
+        [ 1 -eq $(( ( ( table_file * 20 ) / number_of_files ) > reported )) ] && printf "%d%% " $(( (reported+=1) * 5 ))
-        vname_file=`printf %0${filename_len}d ${vname_file}`
-        table_file=`printf "%s + 3\n" ${table_file} | bc`
-        [ 1 -eq $(( ( table_file * 20 ) / number_of_files > reported )) ] && printf "%d%% " $(( (reported+=1) * 5 ))
-        table_file=`printf %0${filename_len}d ${table_file}`
    done
    printf "done.\n"
@@ -170,7 +168,7 @@ handle_old_format() {
    # If street names come in an extra file, extract
    # street names first
    streets=$1/[Dd][Aa][Tt]/[Ss][Tt][Rr][Aa][Ss][Ss][Ee][Nn].[Dd][Aa][Tt]
-    [ -f ${streets} ] && do_processfile_old "${streets}" "street name" 99_Strassenname convert_zeros
+    [ -f ${streets} ] && do_processfile_old ${streets} "street name" 99_Strassenname convert_zeros
    # extract street names if 07_unknown contains street indexes
    # instead of street names
author	Dirk Engling <erdgeist@erdgeist.org>	2014-02-10 15:35:47 +0100
committer	Dirk Engling <erdgeist@erdgeist.org>	2014-02-10 15:35:47 +0100
commit	ae1f17a7b0abbc425f33106c666f1bc71e1b4711 (patch)
tree	be0d685293274bbaa1add147617861d050075856 /src
parent	1e0ae2a4e3c0bab562d7f8c8ee9539a0613357b6 (diff)