diff options
author | Dirk Engling <erdgeist@erdgeist.org> | 2019-03-07 15:25:31 +0100 |
---|---|---|
committer | Dirk Engling <erdgeist@erdgeist.org> | 2019-03-07 15:25:31 +0100 |
commit | 2e86c5fe02332189948672df1fec49b68584bfc3 (patch) | |
tree | 4c08ab8abf1bda5a76557c97ebcacdc982453c8b /src | |
parent | a448659fb888440de4fd447f07e9629dd5150624 (diff) |
merge_entries uses siphash to identify common chunks
Diffstat (limited to 'src')
-rw-r--r-- | src/postprocess/halfsiphash.c | 152 |
1 files changed, 152 insertions, 0 deletions
diff --git a/src/postprocess/halfsiphash.c b/src/postprocess/halfsiphash.c new file mode 100644 index 0000000..d74d3be --- /dev/null +++ b/src/postprocess/halfsiphash.c | |||
@@ -0,0 +1,152 @@ | |||
1 | |||
2 | /* | ||
3 | SipHash reference C implementation | ||
4 | |||
5 | Copyright (c) 2016 Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com> | ||
6 | |||
7 | To the extent possible under law, the author(s) have dedicated all copyright | ||
8 | and related and neighboring rights to this software to the public domain | ||
9 | worldwide. This software is distributed without any warranty. | ||
10 | |||
11 | You should have received a copy of the CC0 Public Domain Dedication along | ||
12 | with | ||
13 | this software. If not, see | ||
14 | <http://creativecommons.org/publicdomain/zero/1.0/>. | ||
15 | */ | ||
16 | #include <assert.h> | ||
17 | #include <stdint.h> | ||
18 | #include <stdio.h> | ||
19 | #include <string.h> | ||
20 | |||
21 | /* default: SipHash-2-4 */ | ||
22 | #define cROUNDS 2 | ||
23 | #define dROUNDS 4 | ||
24 | |||
25 | #define ROTL(x, b) (uint32_t)(((x) << (b)) | ((x) >> (32 - (b)))) | ||
26 | |||
27 | #define U32TO8_LE(p, v) \ | ||
28 | (p)[0] = (uint8_t)((v)); \ | ||
29 | (p)[1] = (uint8_t)((v) >> 8); \ | ||
30 | (p)[2] = (uint8_t)((v) >> 16); \ | ||
31 | (p)[3] = (uint8_t)((v) >> 24); | ||
32 | |||
33 | #define U32TO8_LE(p, v) \ | ||
34 | (p)[0] = (uint8_t)((v)); \ | ||
35 | (p)[1] = (uint8_t)((v) >> 8); \ | ||
36 | (p)[2] = (uint8_t)((v) >> 16); \ | ||
37 | (p)[3] = (uint8_t)((v) >> 24); | ||
38 | |||
39 | #define U8TO32_LE(p) \ | ||
40 | (((uint32_t)((p)[0])) | ((uint32_t)((p)[1]) << 8) | \ | ||
41 | ((uint32_t)((p)[2]) << 16) | ((uint32_t)((p)[3]) << 24)) | ||
42 | |||
43 | #define SIPROUND \ | ||
44 | do { \ | ||
45 | v0 += v1; \ | ||
46 | v1 = ROTL(v1, 5); \ | ||
47 | v1 ^= v0; \ | ||
48 | v0 = ROTL(v0, 16); \ | ||
49 | v2 += v3; \ | ||
50 | v3 = ROTL(v3, 8); \ | ||
51 | v3 ^= v2; \ | ||
52 | v0 += v3; \ | ||
53 | v3 = ROTL(v3, 7); \ | ||
54 | v3 ^= v0; \ | ||
55 | v2 += v1; \ | ||
56 | v1 = ROTL(v1, 13); \ | ||
57 | v1 ^= v2; \ | ||
58 | v2 = ROTL(v2, 16); \ | ||
59 | } while (0) | ||
60 | |||
61 | #ifdef DEBUG | ||
62 | #define TRACE \ | ||
63 | do { \ | ||
64 | printf("(%3d) v0 %08x\n", (int)inlen, v0); \ | ||
65 | printf("(%3d) v1 %08x\n", (int)inlen, v1); \ | ||
66 | printf("(%3d) v2 %08x\n", (int)inlen, v2); \ | ||
67 | printf("(%3d) v3 %08x\n", (int)inlen, v3); \ | ||
68 | } while (0) | ||
69 | #else | ||
70 | #define TRACE | ||
71 | #endif | ||
72 | |||
73 | int halfsiphash(const uint8_t *in, const size_t inlen, const uint8_t *k, | ||
74 | uint8_t *out, const size_t outlen) { | ||
75 | |||
76 | assert((outlen == 4) || (outlen == 8)); | ||
77 | uint32_t v0 = 0; | ||
78 | uint32_t v1 = 0; | ||
79 | uint32_t v2 = 0x6c796765; | ||
80 | uint32_t v3 = 0x74656462; | ||
81 | uint32_t k0 = U8TO32_LE(k); | ||
82 | uint32_t k1 = U8TO32_LE(k + 4); | ||
83 | uint32_t m; | ||
84 | int i; | ||
85 | const uint8_t *end = in + inlen - (inlen % sizeof(uint32_t)); | ||
86 | const int left = inlen & 3; | ||
87 | uint32_t b = ((uint32_t)inlen) << 24; | ||
88 | v3 ^= k1; | ||
89 | v2 ^= k0; | ||
90 | v1 ^= k1; | ||
91 | v0 ^= k0; | ||
92 | |||
93 | if (outlen == 8) | ||
94 | v1 ^= 0xee; | ||
95 | |||
96 | for (; in != end; in += 4) { | ||
97 | m = U8TO32_LE(in); | ||
98 | v3 ^= m; | ||
99 | |||
100 | TRACE; | ||
101 | for (i = 0; i < cROUNDS; ++i) | ||
102 | SIPROUND; | ||
103 | |||
104 | v0 ^= m; | ||
105 | } | ||
106 | |||
107 | switch (left) { | ||
108 | case 3: | ||
109 | b |= ((uint32_t)in[2]) << 16; | ||
110 | case 2: | ||
111 | b |= ((uint32_t)in[1]) << 8; | ||
112 | case 1: | ||
113 | b |= ((uint32_t)in[0]); | ||
114 | break; | ||
115 | case 0: | ||
116 | break; | ||
117 | } | ||
118 | |||
119 | v3 ^= b; | ||
120 | |||
121 | TRACE; | ||
122 | for (i = 0; i < cROUNDS; ++i) | ||
123 | SIPROUND; | ||
124 | |||
125 | v0 ^= b; | ||
126 | |||
127 | if (outlen == 8) | ||
128 | v2 ^= 0xee; | ||
129 | else | ||
130 | v2 ^= 0xff; | ||
131 | |||
132 | TRACE; | ||
133 | for (i = 0; i < dROUNDS; ++i) | ||
134 | SIPROUND; | ||
135 | |||
136 | b = v1 ^ v3; | ||
137 | U32TO8_LE(out, b); | ||
138 | |||
139 | if (outlen == 4) | ||
140 | return 0; | ||
141 | |||
142 | v1 ^= 0xdd; | ||
143 | |||
144 | TRACE; | ||
145 | for (i = 0; i < dROUNDS; ++i) | ||
146 | SIPROUND; | ||
147 | |||
148 | b = v1 ^ v3; | ||
149 | U32TO8_LE(out + 4, b); | ||
150 | |||
151 | return 0; | ||
152 | } | ||