diff options
| author | Dirk Engling <erdgeist@erdgeist.org> | 2014-02-10 15:35:47 +0100 |
|---|---|---|
| committer | Dirk Engling <erdgeist@erdgeist.org> | 2014-02-10 15:35:47 +0100 |
| commit | ae1f17a7b0abbc425f33106c666f1bc71e1b4711 (patch) | |
| tree | be0d685293274bbaa1add147617861d050075856 /src | |
| parent | 1e0ae2a4e3c0bab562d7f8c8ee9539a0613357b6 (diff) | |
Use integers to count through files on old telefonbuch format. Only convert them to fixed with representation when needed
Diffstat (limited to 'src')
| -rwxr-xr-x | src/makecolumns.sh | 56 |
1 files changed, 27 insertions, 29 deletions
diff --git a/src/makecolumns.sh b/src/makecolumns.sh index ab61c29..476ce90 100755 --- a/src/makecolumns.sh +++ b/src/makecolumns.sh | |||
| @@ -38,12 +38,6 @@ main() { | |||
| 38 | cd .. | 38 | cd .. |
| 39 | } | 39 | } |
| 40 | 40 | ||
| 41 | get_dword() { | ||
| 42 | # $1 file, $2 offset | ||
| 43 | set -- `od -tu4 -N4 -j$(( 4*${2:-0} )) ${1}` | ||
| 44 | printf "%d\n" $2 | ||
| 45 | } | ||
| 46 | |||
| 47 | do_decompress_old() { | 41 | do_decompress_old() { |
| 48 | printf "Extracting $2 chunks ... " | 42 | printf "Extracting $2 chunks ... " |
| 49 | extractblocks "${1}" | 43 | extractblocks "${1}" |
| @@ -78,29 +72,37 @@ do_processfile_old() { | |||
| 78 | rm -rf ${working_on} | 72 | rm -rf ${working_on} |
| 79 | } | 73 | } |
| 80 | 74 | ||
| 75 | size() { | ||
| 76 | stat -f %z `printf %0${filename_len}d $1` | ||
| 77 | } | ||
| 78 | |||
| 79 | get_dword() { | ||
| 80 | # $1 file, $2 offset | ||
| 81 | file=`printf %0${filename_len}d ${1}` | ||
| 82 | set -- `od -tu4 -N4 -j$(( 4*${2:-0} )) ${file}` | ||
| 83 | printf "%d\n" $2 | ||
| 84 | } | ||
| 85 | |||
| 81 | handle_old_format() { | 86 | handle_old_format() { |
| 82 | echo "Working on $1. Detected pre-2004 Telefonbuch version." | 87 | echo "Working on $1. Detected pre-2004 Telefonbuch version." |
| 83 | # Extract teiln.dat | 88 | # Extract teiln.dat |
| 84 | do_decompress_old $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] "teiln.dat" | 89 | do_decompress_old $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] "teiln.dat" |
| 85 | 90 | ||
| 86 | # See how long each filename is | 91 | # See how long each filename is |
| 87 | filename_len=$(( `ls | head -n 1 | wc -c` - 1 )) | 92 | export filename_len=$(( `ls | head -n 1 | wc -c` - 1 )) |
| 88 | 93 | ||
| 89 | # Get total amount of files, for reporting progress | 94 | # Get total amount of files, for reporting progress |
| 90 | number_of_files=`find -E . -depth 1 -regex '^\./[0123456789]+' | wc -l` | 95 | number_of_files=`find -E . -depth 1 -regex '^\./[0123456789]+' | wc -l` |
| 91 | 96 | ||
| 92 | # from 2000F on file 0+3*n is table, so make it default | 97 | # from 2000F on file 0+3*n is table, so make it default |
| 93 | table_file=`printf %0${filename_len}d 0` | 98 | table_file=0; vname_file=2 |
| 94 | vname_file=`printf %0${filename_len}d 2` | ||
| 95 | 99 | ||
| 96 | # if supposed vname file is larger than table file, | 100 | # if supposed vname file is larger than table file, |
| 97 | # we're having a pre-2000F layout, so switch accordingly | 101 | # we're having a pre-2000F layout, so switch accordingly |
| 98 | if [ `stat -f %z ${table_file}` -lt `stat -f %z ${vname_file}` ]; then | 102 | if [ `size ${table_file}` -lt `size ${vname_file}` ]; then |
| 99 | table_file=`printf %0${filename_len}d 2` | 103 | table_file=2; nname_file=0; vname_file=1 |
| 100 | nname_file=`printf %0${filename_len}d 0` | ||
| 101 | vname_file=`printf %0${filename_len}d 1` | ||
| 102 | else | 104 | else |
| 103 | nname_file=`printf %0${filename_len}d 1` | 105 | nname_file=1 |
| 104 | fi | 106 | fi |
| 105 | 107 | ||
| 106 | # Table file has a table header with identical count | 108 | # Table file has a table header with identical count |
| @@ -113,7 +115,7 @@ handle_old_format() { | |||
| 113 | # Now loop over all files and dump them | 115 | # Now loop over all files and dump them |
| 114 | printf "Splitting decompressed chunks into their columns ... " | 116 | printf "Splitting decompressed chunks into their columns ... " |
| 115 | reported=0 | 117 | reported=0 |
| 116 | while [ -f ${nname_file} ]; do | 118 | while [ -f `printf %0${filename_len}d ${nname_file}` ]; do |
| 117 | # Get number of entries in this round | 119 | # Get number of entries in this round |
| 118 | count=`get_dword ${nname_file}` | 120 | count=`get_dword ${nname_file}` |
| 119 | 121 | ||
| @@ -121,11 +123,11 @@ handle_old_format() { | |||
| 121 | nname_off=$(( `get_dword ${nname_file} 1` + 1 )) | 123 | nname_off=$(( `get_dword ${nname_file} 1` + 1 )) |
| 122 | 124 | ||
| 123 | # Now get the flags before the nnames | 125 | # Now get the flags before the nnames |
| 124 | tail -c +${nname_off} ${nname_file} | tr '\n\0' '\t\n' | head -n ${count} | cut -c -1 >> 01_Flags | 126 | tail -c +${nname_off} `printf %0${filename_len}d ${nname_file}` | tr '\n\0' '\t\n' | head -n ${count} | cut -c -1 >> 01_Flags |
| 125 | tail -c +${nname_off} ${nname_file} | tr '\n\0' '\t\n' | head -n ${count} | cut -c 2- >> 02_Nachname | 127 | tail -c +${nname_off} `printf %0${filename_len}d ${nname_file}` | tr '\n\0' '\t\n' | head -n ${count} | cut -c 2- >> 02_Nachname |
| 126 | 128 | ||
| 127 | # Extract the vnames | 129 | # Extract the vnames |
| 128 | tr '\n\0' '\t\n' < ${vname_file} | head -n ${count} >> 03_Vorname | 130 | tr '\n\0' '\t\n' < `printf %0${filename_len}d ${vname_file}` | head -n ${count} >> 03_Vorname |
| 129 | 131 | ||
| 130 | # Offset into first table entry tells us how many | 132 | # Offset into first table entry tells us how many |
| 131 | # fields are in table file | 133 | # fields are in table file |
| @@ -134,18 +136,14 @@ handle_old_format() { | |||
| 134 | # Now iterate over all entries in the table file | 136 | # Now iterate over all entries in the table file |
| 135 | for table_index in `jot ${table_entries}`; do | 137 | for table_index in `jot ${table_entries}`; do |
| 136 | table_off=`get_dword ${table_file} ${table_index}` | 138 | table_off=`get_dword ${table_file} ${table_index}` |
| 137 | tail -c +$(( table_off + 1 )) ${table_file} | tr '\n\0' '\t\n' | head -n ${count} >> `printf %02d_unknown $(( table_index + 3 ))` | 139 | tail -c +$(( table_off + 1 )) `printf %0${filename_len}d ${table_file}` | tr '\n\0' '\t\n' | head -n ${count} >> `printf %02d_unknown $(( table_index + 3 ))` |
| 138 | done | 140 | done |
| 139 | 141 | ||
| 140 | # Advance the filenames. Note, that we need bc because | 142 | # Advance the filenames. |
| 141 | # builtin arithmetic treats numbers with leading zeros as octals | 143 | nname_file=$(( nname_file+3 )) |
| 142 | nname_file=`printf "%s + 3\n" ${nname_file} | bc` | 144 | vname_file=$(( vname_file+3 )) |
| 143 | nname_file=`printf %0${filename_len}d ${nname_file}` | 145 | table_file=$(( table_file+3 )) |
| 144 | vname_file=`printf "%s + 3\n" ${vname_file} | bc` | 146 | [ 1 -eq $(( ( ( table_file * 20 ) / number_of_files ) > reported )) ] && printf "%d%% " $(( (reported+=1) * 5 )) |
| 145 | vname_file=`printf %0${filename_len}d ${vname_file}` | ||
| 146 | table_file=`printf "%s + 3\n" ${table_file} | bc` | ||
| 147 | [ 1 -eq $(( ( table_file * 20 ) / number_of_files > reported )) ] && printf "%d%% " $(( (reported+=1) * 5 )) | ||
| 148 | table_file=`printf %0${filename_len}d ${table_file}` | ||
| 149 | done | 147 | done |
| 150 | printf "done.\n" | 148 | printf "done.\n" |
| 151 | 149 | ||
| @@ -170,7 +168,7 @@ handle_old_format() { | |||
| 170 | # If street names come in an extra file, extract | 168 | # If street names come in an extra file, extract |
| 171 | # street names first | 169 | # street names first |
| 172 | streets=$1/[Dd][Aa][Tt]/[Ss][Tt][Rr][Aa][Ss][Ss][Ee][Nn].[Dd][Aa][Tt] | 170 | streets=$1/[Dd][Aa][Tt]/[Ss][Tt][Rr][Aa][Ss][Ss][Ee][Nn].[Dd][Aa][Tt] |
| 173 | [ -f ${streets} ] && do_processfile_old "${streets}" "street name" 99_Strassenname convert_zeros | 171 | [ -f ${streets} ] && do_processfile_old ${streets} "street name" 99_Strassenname convert_zeros |
| 174 | 172 | ||
| 175 | # extract street names if 07_unknown contains street indexes | 173 | # extract street names if 07_unknown contains street indexes |
| 176 | # instead of street names | 174 | # instead of street names |
