1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5 /**
6 * Non-cryptographic, lookup hash functions.
7 *
8 * Copyright: Eugene Wissner 2018-2020.
9 * License: $(LINK2 https://www.mozilla.org/en-US/MPL/2.0/,
10 * Mozilla Public License, v. 2.0).
11 * Authors: $(LINK2 mailto:info@caraus.de, Eugene Wissner)
12 * Source: $(LINK2 https://github.com/caraus-ecms/tanya/blob/master/source/tanya/hash/lookup.d,
13 * tanya/hash/lookup.d)
14 */
15 module tanya.hash.lookup;
16
17 import tanya.meta.trait;
18 import tanya.range.primitive;
19
20 private struct Hasher
21 {
22 static if (size_t.sizeof == 4)
23 {
24 enum uint offsetBasis = 2166136261;
25 enum uint prime = 16777619;
26 }
27 else static if (size_t.sizeof == 8)
28 {
29 enum ulong offsetBasis = 14695981039346656037UL;
30 enum ulong prime = 1099511628211UL;
31 }
32 else static if (size_t.sizeof == 16)
33 {
34 enum size_t offsetBasis = (size_t(0x6c62272e07bb0142UL) << 64) + 0x62b821756295c58dUL;
35 enum size_t prime = (size_t(1) << 88) + (1 << 8) + 0x3b;
36 }
37 else
38 {
39 static assert(false, "FNV requires at least 32-bit hash length");
40 }
41
42 size_t hash = offsetBasis;
43
44 void opCall(T)(auto ref T key)
45 {
46 static if (is(typeof(key.toHash()) == size_t))
47 {
48 opCall(key.toHash()); // Combine user-defined hashes
49 }
50 else static if (isScalarType!T || isPointer!T)
51 {
52 // Treat as an array of words
53 static if (T.sizeof % size_t.sizeof == 0
54 && T.alignof >= size_t.alignof)
55 alias CastT = size_t;
56 // (64-bit or 128-bit) Treat as an array of ints
57 else static if (T.sizeof % uint.sizeof == 0
58 && T.alignof >= uint.alignof)
59 alias CastT = uint;
60 // Treat as an array of bytes
61 else
62 alias CastT = ubyte;
63 add((() @trusted => (cast(const CastT*) &key)[0 .. T.sizeof / CastT.sizeof])());
64 }
65 else static if (isArray!T && isScalarType!(ElementType!T))
66 {
67 // Treat as an array of words
68 static if (ElementType!T.sizeof % size_t.sizeof == 0
69 && ElementType!T.alignof >= size_t.alignof)
70 alias CastT = size_t;
71 // (64-bit or 128-bit) Treat as an array of ints
72 else static if (ElementType!T.sizeof % uint.sizeof == 0
73 && ElementType!T.alignof >= uint.alignof)
74 alias CastT = uint;
75 // Treat as an array of bytes
76 else
77 alias CastT = ubyte;
78 add(cast(const CastT[]) key);
79 }
80 else static if (is(T == typeof(null)))
81 {
82 add(key);
83 }
84 else static if (isInputRange!T && !isInfinite!T)
85 {
86 foreach (e; key)
87 {
88 opCall(e);
89 }
90 }
91 else
92 {
93 static assert(false, "Hash function is not available");
94 }
95 }
96
97 void add(scope const ubyte[] key) @nogc nothrow pure @safe
98 {
99 // FNV-1a
100 foreach (c; key)
101 {
102 this.hash = (this.hash ^ c) * prime;
103 }
104 }
105
106 void add(scope const size_t[] key) @nogc nothrow pure @safe
107 {
108 static if (size_t.sizeof == 4)
109 {
110 // Partial MurmurHash3_x86_32 (no finalization)
111 enum uint c1 = 0xcc9e2d51;
112 enum uint c2 = 0x1b873593;
113 alias h1 = hash;
114 foreach (x; key)
115 {
116 auto k1 = x * c1;
117 k1 = (k1 << 15) | (k1 >> (32 - 15));
118 k1 *= c2;
119
120 h1 ^= k1;
121 h1 = (h1 << 13) | (h1 >> (32 - 13));
122 h1 = h1 * 5 + 0xe6546b64;
123 }
124 }
125 else static if (size_t.sizeof == 8)
126 {
127 // Partial 64-bit MurmurHash64A (no finalization)
128 alias h = hash;
129 enum ulong m = 0xc6a4a7935bd1e995UL;
130 foreach (x; key)
131 {
132 auto k = x * m;
133 k ^= k >>> 47;
134 k *= m;
135
136 h ^= k;
137 h *= m;
138 }
139 }
140 else static if (size_t.sizeof == 16)
141 {
142 // Partial MurmurHash3_x64_128 (no finalization)
143 // treating each size_t as a pair of ulong.
144 ulong h1 = cast(ulong) hash;
145 ulong h2 = cast(ulong) (hash >> 64);
146
147 enum ulong c1 = 0x87c37b91114253d5UL;
148 enum ulong c2 = 0x4cf5ad432745937fUL;
149
150 foreach (x; key)
151 {
152 auto k1 = cast(ulong) x;
153 auto k2 = cast(ulong) (x >> 64);
154
155 k1 *= c1; k1 = (k1 << 32) | (k1 >> (64 - 31)); k1 *= c2; h1 ^= k1;
156 h1 = (h1 << 27) | (h1 >> (64 - 27)); h1 += h2; h1 = h1*5+0x52dce729;
157 k2 *= c2; k2 = (k2 << 33) | (k2 >> (64 - 33)); k2 *= c1; h2 ^= k2;
158 h2 = (h2 << 31) | (h2 >> (64 - 31)); h2 += h1; h2 = h2*5+0x38495ab5;
159 }
160
161 hash = cast(size_t) h1 + ((cast(size_t) h2) << 64);
162 }
163 else
164 {
165 static assert(0, "Hash length must be either 32, 64, or 128 bits.");
166 }
167 }
168
169 static if (size_t.sizeof != uint.sizeof)
170 void add(scope const uint[] key) @nogc nothrow pure @trusted
171 {
172 static if (size_t.sizeof == 8)
173 {
174 // Partial 32-bit MurmurHash64B (no finalization)
175 enum uint m = 0x5bd1e995;
176 enum r = 24;
177
178 uint h1 = cast(uint) hash;
179 uint h2 = cast(uint) (hash >> 32);
180 const(uint)* data = key.ptr;
181 auto len = key.length;
182
183 for (; len >= 2; data += 2, len -= 2)
184 {
185 uint k1 = data[0];
186 k1 *= m; k1 ^= k1 >> r; k1 *= m;
187 h1 *= m; h1 ^= k1;
188
189 uint k2 = data[1];
190 k2 *= m; k2 ^= k2 >> r; k2 *= m;
191 h2 *= m; h2 ^= k2;
192 }
193 if (len)
194 {
195 uint k1 = data[0];
196 k1 *= m; k1 ^= k1 >> r; k1 *= m;
197 h1 *= m; h1 ^= k1;
198 }
199 hash = cast(ulong) h1 + ((cast(ulong) h2) << 32);
200 }
201 else static if (size_t.sizeof == 16)
202 {
203 // Partial MurmurHash3_x86_128 (no finalization)
204 enum uint c1 = 0x239b961b;
205 enum uint c2 = 0xab0e9789;
206 enum uint c3 = 0x38b34ae5;
207 enum uint c4 = 0xa1e38b93;
208
209 uint h1 = cast(uint) hash;
210 uint h2 = cast(uint) (hash >> 32);
211 uint h3 = cast(uint) (hash >> 64);
212 uint h4 = cast(uint) (hash >> 96);
213 const(uint)* data = key.ptr;
214 auto len = key.length;
215
216 for (; len >= 4; data += 4, len -= 4)
217 {
218 uint k1 = data[0];
219 uint k2 = data[1];
220 uint k3 = data[2];
221 uint k4 = data[3];
222
223 h1 = (h1 << 19) | (h1 >> (32 - 19)); h1 += h2; h1 = h1*5+0x561ccd1b;
224 k2 *= c2; k2 = (k2 << 16) | (k2 >> (32 - 16)); k2 *= c3; h2 ^= k2;
225 h2 = (h2 << 17) | (h2 >> (32 - 17)); h2 += h3; h2 = h2*5+0x0bcaa747;
226 k3 *= c3; k3 = (k3 << 17) | (k3 >> (32 - 17)); k3 *= c4; h3 ^= k3;
227 h3 = (h3 << 15) | (h3 >> (32 - 15)); h3 += h4; h3 = h3*5+0x96cd1c35;
228 k4 *= c4; k4 = (k4 << 18) | (k4 >> (32 - 18)); k4 *= c1; h4 ^= k4;
229 h4 = (h4 << 13) | (h4 >> (32 - 13)); h4 += h1; h4 = h4*5+0x32ac3b17;
230 }
231 uint k1, k2, k3;
232 switch (len) // 0, 1, 2, 3
233 {
234 case 3:
235 k3 = data[2];
236 k3 *= c3; k3 = (k3 << 17) | (k3 >> (32 - 17)); k3 *= c4; h3 ^= k3;
237 goto case;
238 case 2:
239 k2 = data[1];
240 k2 *= c2; k2 = (k2 << 16) | (k2 >> (32 - 16)); k2 *= c3; h2 ^= k2;
241 goto case;
242 case 1:
243 k1 = data[0];
244 k1 *= c1; k1 = (k1 << 15) | (k1 >> (32 - 15)); k1 *= c2; h1 ^= k1;
245 break;
246 }
247 hash = cast(size_t) h1 +
248 ((cast(size_t) h2) << 32) +
249 ((cast(size_t) h3) << 64) +
250 ((cast(size_t) h4) << 96);
251 }
252 else
253 {
254 static assert(0, "Hash length must be either 32, 64, or 128 bits.");
255 }
256 }
257 }
258
259 /**
260 * Takes an argument of an arbitrary type $(D_PARAM T) and calculates the hash
261 * value.
262 *
263 * Hash calculation is supported for all scalar types. Aggregate types, like
264 * $(D_KEYWORD struct)s, should implement `toHash`-function:
265 * ---
266 * size_t toHash() const
267 * {
268 * return hash;
269 * }
270 * ---
271 *
272 * For pointers and for scalar types implicitly convertible to `size_t` this
273 * is an identity operation (i.e. the value is cast to `size_t` and returned
274 * unaltered). Integer types wider than `size_t` are XOR folded down to
275 * `size_t`. Other scalar types use an architecture-dependent hash function
276 * based on their width and alignment.
277 * If the type provides a `toHash`-function, only `toHash()` is called and its
278 * result is returned.
279 *
280 * This function also accepts input ranges that contain hashable elements.
281 * Individual values are combined then and the resulting hash is returned.
282 *
283 * Params:
284 * T = Hashable type.
285 * key = Hashable value.
286 *
287 * Returns: Calculated hash value.
288 *
289 * See_Also: $(LINK http://www.isthe.com/chongo/tech/comp/fnv/).
290 */
291 size_t hash(T)(auto ref T key)
292 {
293 static if (is(typeof(key.toHash()) == size_t))
294 {
295 return key.toHash();
296 }
297 else static if ((isIntegral!T || isSomeChar!T || isBoolean!T)
298 && T.sizeof <= size_t.sizeof)
299 {
300 return cast(size_t) key;
301 }
302 else static if (isIntegral!T && T.sizeof > size_t.sizeof)
303 {
304 return cast(size_t) (key ^ (key >>> (size_t.sizeof * 8)));
305 }
306 else static if (isPointer!T || is(T : typeof(null)))
307 {
308 return (() @trusted => cast(size_t) key)();
309 }
310 else
311 {
312 Hasher hasher;
313 hasher(key);
314 return hasher.hash;
315 }
316 }
317
318 /**
319 * Determines whether $(D_PARAM hasher) is hash function for $(D_PARAM T), i.e.
320 * it is callable with a value of type $(D_PARAM T) and returns a
321 * $(D_PSYMBOL size_t) value.
322 *
323 * Params:
324 * hasher = Hash function candidate.
325 * T = Type to test the hash function with.
326 *
327 * Returns: $(D_KEYWORD true) if $(D_PARAM hasher) is a hash function for
328 * $(D_PARAM T), $(D_KEYWORD false) otherwise.
329 */
330 template isHashFunction(alias hasher, T)
331 {
332 private alias wrapper = (T x) => hasher(x);
333 enum bool isHashFunction = is(typeof(wrapper(T.init)) == size_t);
334 }
335
336 ///
337 @nogc nothrow pure @safe unittest
338 {
339 static assert(isHashFunction!(hash, int));
340 }