summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDirk Engling <erdgeist@erdgeist.org>2019-02-02 22:56:02 +0100
committerDirk Engling <erdgeist@erdgeist.org>2019-02-02 22:56:02 +0100
commita73a9f7ebe6c82a9210e63700481b0b2dfcb0b4b (patch)
tree8827b08d032b9b334b8bb0cc7bae5ae3998a7a95
parent1e838c25d6fb9e9793b8dd4adbf5ce3078e1d0e5 (diff)
First attempt to sort for unifications
-rw-r--r--src/postprocess/sort_plz.c132
1 files changed, 132 insertions, 0 deletions
diff --git a/src/postprocess/sort_plz.c b/src/postprocess/sort_plz.c
new file mode 100644
index 0000000..f44cec7
--- /dev/null
+++ b/src/postprocess/sort_plz.c
@@ -0,0 +1,132 @@
1/*
2 target is in current directory:
3 entries_single/<PLZ> entries_multi/<PLZ>
4 <PLZ> is either [0-9_]{5} or _____ or brken
5 opens files in source directory:
6 01_Flags 02_Nachname 03_Vorname 04_Zusaetze 07_Strasse 08_Hausnummer 09_Verweise 10_Postleitzahl
7 11_Ort 12_Vorwahl 13_Rufnummer 16_Koordinaten
8 appends to all of the above dirs plus
9 00_Jahr
10*/
11
12#include <sys/stat.h>
13#include <stdlib.h>
14#include <stdio.h>
15#include <string.h>
16#include <errno.h>
17#include <err.h>
18
19enum { F_00, F_01, F_10, F_02, F_03, F_04, F_07, F_08, F_09, F_11, F_12, F_13, F_16, F_COUNT };
20
21static char *g_filenames[] = {
22 "00_Jahr", "01_Flags", "10_Postleitzahl", "02_Nachname", "03_Vorname", "04_Zusaetze", "07_Strasse", "08_Hausnummer", "09_Verweise", "11_Ort", "12_Vorwahl", "13_Rufnummer", "16_Koordinaten" };
23
24FILE * fopen_prefix(char *prefix, int file_id, int readonly) {
25 char filename[1024];
26 snprintf( filename, sizeof(filename), "%s/%s", prefix, g_filenames[file_id]);
27 return fopen(filename, readonly ? "r" : "a");
28}
29
30int main(int argc, char **args) {
31 FILE * in_handles[F_COUNT] = { NULL };
32 FILE * out_handles[F_COUNT] = { NULL };
33 char flags[4];
34 int i, in_multi = 0;
35 char *input = malloc(1024);
36 size_t input_size = 1024;
37
38 /* First open all input files */
39 for (i=F_01; i<F_COUNT; ++i) {
40 in_handles[i] = fopen_prefix(args[1], i, 1);
41 if (!in_handles[i])
42 errx( 1, "Couldn't open file %s\n", g_filenames[i]);
43 }
44
45 mkdir( "multi", 0755);
46 mkdir( "single", 0755);
47
48 /* Get Flags to check if we're processing a continuation */
49 while (fgets(flags, 4, in_handles[F_01])) {
50 char out_dir[32];
51 ssize_t linelen;
52 char flag = strtoul(flags, 0, 16);
53 char *type = flag & 1 ? "multi/" : "single/";
54
55 /* If we're in multiline mode, we just copy lines as long as we see continuations */
56 if (in_multi) {
57 if (flag & 0x2) {
58 fputs(args[1], out_handles[F_00]); // write Jahr
59 fputc(10, out_handles[F_00]);
60 fwrite(flags, 3, 1, out_handles[F_01]); // copy Flags verbatim
61 for (i=F_10; i<F_COUNT; ++i) { // process the rest of entries
62 ssize_t linelen = getline(&input, &input_size, in_handles[i]);
63 fwrite(input, linelen, 1, out_handles[i]);
64 }
65 continue;
66 }
67 /* If the entry is not a continuation, close all output files and switch off multi mode */
68 for (i=0; i<F_COUNT; ++i) {
69 fclose(out_handles[i]);
70 out_handles[i] = NULL;
71 }
72 in_multi = 0;
73 }
74
75 if (flag & 0x1)
76 in_multi = 1;
77
78 /* Read Postleitzahl to get destination */
79 linelen = getline(&input, &input_size, in_handles[F_10]);
80 if (linelen && input[linelen - 1] == 10) { // chomp
81 input[linelen - 1] = 0;
82 --linelen;
83 }
84
85 if (linelen == 0) // empty PLZ
86 strcpy(out_dir, in_multi ? "multi/_____" : "single/_____");
87 else if (linelen == 5) { // potentially normal
88 int broken = 0;
89 char * dest = out_dir + sprintf(out_dir, in_multi ? "multi/" : "single/");
90 for (i=0; i<5; ++i) {
91 if ( (input[i] < '0' || input[i] > '9') && input[i] != '.') {
92 broken = 1;
93 break;
94 }
95 dest[i] = input[i];
96 if (dest[i] == '.') dest[i] = '_';
97 }
98 dest[5] = 0;
99 if (broken)
100 strcpy(out_dir, in_multi ? "multi/broken" : "single/broken");
101 } else
102 strcpy(out_dir, in_multi ? "multi/broken" : "single/broken");
103
104 if (mkdir(out_dir, 0755) == -1 && errno != EEXIST)
105 errx( 1, "Couldn't create directory %s %d\n", out_dir, errno);
106
107 for (i=F_00; i<F_COUNT; ++i) {
108 out_handles[i] = fopen_prefix(out_dir, i, 0);
109 if (!out_handles[i])
110 errx( 1, "Couldn't open file %s\n", g_filenames[i]);
111 }
112
113 fputs(args[1], out_handles[F_00]); // write Jahr
114 fputc(10, out_handles[F_00]);
115 fwrite(flags, 3, 1, out_handles[F_01]); // copy Flags verbatim
116 fputs(input, out_handles[F_10]); // copy Postleitzahl verbatim
117 fputc(10, out_handles[F_10]);
118
119 for (i=F_02; i<F_COUNT; ++i) { // process the rest of entries
120 ssize_t linelen = getline(&input, &input_size, in_handles[i]);
121 fwrite(input, linelen, 1, out_handles[i]);
122 }
123
124 if (!in_multi)
125 for (i=0; i<F_COUNT; ++i) {
126 fclose(out_handles[i]);
127 out_handles[i] = NULL;
128 }
129 }
130
131 return 0;
132}