1 /* This Source Code Form is subject to the terms of the Mozilla Public
2  * License, v. 2.0. If a copy of the MPL was not distributed with this
3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 
5 /**
6  * Non-cryptographic, lookup hash functions.
7  *
8  * Copyright: Eugene Wissner 2018-2020.
9  * License: $(LINK2 https://www.mozilla.org/en-US/MPL/2.0/,
10  *                  Mozilla Public License, v. 2.0).
11  * Authors: $(LINK2 mailto:info@caraus.de, Eugene Wissner)
12  * Source: $(LINK2 https://github.com/caraus-ecms/tanya/blob/master/source/tanya/hash/lookup.d,
13  *                 tanya/hash/lookup.d)
14  */
15 module tanya.hash.lookup;
16 
17 import tanya.meta.trait;
18 import tanya.range.primitive;
19 
20 private struct Hasher
21 {
22     static if (size_t.sizeof == 4)
23     {
24         enum uint offsetBasis = 2166136261;
25         enum uint prime = 16777619;
26     }
27     else static if (size_t.sizeof == 8)
28     {
29         enum ulong offsetBasis = 14695981039346656037UL;
30         enum ulong prime = 1099511628211UL;
31     }
32     else static if (size_t.sizeof == 16)
33     {
34         enum size_t offsetBasis = (size_t(0x6c62272e07bb0142UL) << 64) + 0x62b821756295c58dUL;
35         enum size_t prime = (size_t(1) << 88) + (1 << 8) + 0x3b;
36     }
37     else
38     {
39         static assert(false, "FNV requires at least 32-bit hash length");
40     }
41 
42     size_t hash = offsetBasis;
43 
44     void opCall(T)(auto ref T key)
45     {
46         static if (is(typeof(key.toHash()) == size_t))
47         {
48             opCall(key.toHash()); // Combine user-defined hashes
49         }
50         else static if (isScalarType!T || isPointer!T)
51         {
52             // Treat as an array of words
53             static if (T.sizeof % size_t.sizeof == 0
54                     && T.alignof >= size_t.alignof)
55                 alias CastT = size_t;
56             // (64-bit or 128-bit) Treat as an array of ints
57             else static if (T.sizeof % uint.sizeof == 0
58                     && T.alignof >= uint.alignof)
59                 alias CastT = uint;
60             // Treat as an array of bytes
61             else
62                 alias CastT = ubyte;
63             add((() @trusted => (cast(const CastT*) &key)[0 .. T.sizeof / CastT.sizeof])());
64         }
65         else static if (isArray!T && isScalarType!(ElementType!T))
66         {
67             // Treat as an array of words
68             static if (ElementType!T.sizeof % size_t.sizeof == 0
69                     && ElementType!T.alignof >= size_t.alignof)
70                 alias CastT = size_t;
71             // (64-bit or 128-bit) Treat as an array of ints
72             else static if (ElementType!T.sizeof % uint.sizeof == 0
73                     && ElementType!T.alignof >= uint.alignof)
74                 alias CastT = uint;
75             // Treat as an array of bytes
76             else
77                 alias CastT = ubyte;
78             add(cast(const CastT[]) key);
79         }
80         else static if (is(T == typeof(null)))
81         {
82             add(key);
83         }
84         else static if (isInputRange!T && !isInfinite!T)
85         {
86             foreach (e; key)
87             {
88                 opCall(e);
89             }
90         }
91         else
92         {
93             static assert(false, "Hash function is not available");
94         }
95     }
96 
97     void add(scope const ubyte[] key) @nogc nothrow pure @safe
98     {
99         // FNV-1a
100         foreach (c; key)
101         {
102             this.hash = (this.hash ^ c) * prime;
103         }
104     }
105 
106     void add(scope const size_t[] key) @nogc nothrow pure @safe
107     {
108         static if (size_t.sizeof == 4)
109         {
110             // Partial MurmurHash3_x86_32 (no finalization)
111             enum uint c1 = 0xcc9e2d51;
112             enum uint c2 = 0x1b873593;
113             alias h1 = hash;
114             foreach (x; key)
115             {
116                 auto k1 = x * c1;
117                 k1 = (k1 << 15) | (k1 >> (32 - 15));
118                 k1 *= c2;
119 
120                 h1 ^= k1;
121                 h1 = (h1 << 13) | (h1 >> (32 - 13));
122                 h1 = h1 * 5 + 0xe6546b64;
123             }
124         }
125         else static if (size_t.sizeof == 8)
126         {
127             // Partial 64-bit MurmurHash64A (no finalization)
128             alias h = hash;
129             enum ulong m = 0xc6a4a7935bd1e995UL;
130             foreach (x; key)
131             {
132                 auto k = x * m;
133                 k ^= k >>> 47;
134                 k *= m;
135 
136                 h ^= k;
137                 h *= m;
138             }
139         }
140         else static if (size_t.sizeof == 16)
141         {
142             // Partial MurmurHash3_x64_128 (no finalization)
143             // treating each size_t as a pair of ulong.
144             ulong h1 = cast(ulong) hash;
145             ulong h2 = cast(ulong) (hash >> 64);
146 
147             enum ulong c1 = 0x87c37b91114253d5UL;
148             enum ulong c2 = 0x4cf5ad432745937fUL;
149 
150             foreach (x; key)
151             {
152                 auto k1 = cast(ulong) x;
153                 auto k2 = cast(ulong) (x >> 64);
154 
155                 k1 *= c1; k1 = (k1 << 32) | (k1 >> (64 - 31)); k1 *= c2; h1 ^= k1;
156                 h1 = (h1 << 27) | (h1 >> (64 - 27)); h1 += h2; h1 = h1*5+0x52dce729;
157                 k2 *= c2; k2 = (k2 << 33) | (k2 >> (64 - 33)); k2 *= c1; h2 ^= k2;
158                 h2 = (h2 << 31) | (h2 >> (64 - 31)); h2 += h1; h2 = h2*5+0x38495ab5;
159             }
160 
161             hash = cast(size_t) h1 + ((cast(size_t) h2) << 64);
162         }
163         else
164         {
165             static assert(0, "Hash length must be either 32, 64, or 128 bits.");
166         }
167     }
168 
169     static if (size_t.sizeof != uint.sizeof)
170     void add(scope const uint[] key) @nogc nothrow pure @trusted
171     {
172         static if (size_t.sizeof == 8)
173         {
174             // Partial 32-bit MurmurHash64B (no finalization)
175             enum uint m = 0x5bd1e995;
176             enum r = 24;
177 
178             uint h1 = cast(uint) hash;
179             uint h2 = cast(uint) (hash >> 32);
180             const(uint)* data = key.ptr;
181             auto len = key.length;
182 
183             for (; len >= 2; data += 2, len -= 2)
184             {
185                 uint k1 = data[0];
186                 k1 *= m; k1 ^= k1 >> r; k1 *= m;
187                 h1 *= m; h1 ^= k1;
188 
189                 uint k2 = data[1];
190                 k2 *= m; k2 ^= k2 >> r; k2 *= m;
191                 h2 *= m; h2 ^= k2;
192             }
193             if (len)
194             {
195                 uint k1 = data[0];
196                 k1 *= m; k1 ^= k1 >> r; k1 *= m;
197                 h1 *= m; h1 ^= k1;
198             }
199             hash = cast(ulong) h1 + ((cast(ulong) h2) << 32);
200         }
201         else static if (size_t.sizeof == 16)
202         {
203             // Partial MurmurHash3_x86_128 (no finalization)
204             enum uint c1 = 0x239b961b;
205             enum uint c2 = 0xab0e9789;
206             enum uint c3 = 0x38b34ae5;
207             enum uint c4 = 0xa1e38b93;
208 
209             uint h1 = cast(uint) hash;
210             uint h2 = cast(uint) (hash >> 32);
211             uint h3 = cast(uint) (hash >> 64);
212             uint h4 = cast(uint) (hash >> 96);
213             const(uint)* data = key.ptr;
214             auto len = key.length;
215 
216             for (; len >= 4; data += 4, len -= 4)
217             {
218                 uint k1 = data[0];
219                 uint k2 = data[1];
220                 uint k3 = data[2];
221                 uint k4 = data[3];
222 
223                 h1 = (h1 << 19) | (h1 >> (32 - 19)); h1 += h2; h1 = h1*5+0x561ccd1b;
224                 k2 *= c2; k2 = (k2 << 16) | (k2 >> (32 - 16)); k2 *= c3; h2 ^= k2;
225                 h2 = (h2 << 17) | (h2 >> (32 - 17)); h2 += h3; h2 = h2*5+0x0bcaa747;
226                 k3 *= c3; k3 = (k3 << 17) | (k3 >> (32 - 17)); k3 *= c4; h3 ^= k3;
227                 h3 = (h3 << 15) | (h3 >> (32 - 15)); h3 += h4; h3 = h3*5+0x96cd1c35;
228                 k4 *= c4; k4 = (k4 << 18) | (k4 >> (32 - 18)); k4 *= c1; h4 ^= k4;
229                 h4 = (h4 << 13) | (h4 >> (32 - 13)); h4 += h1; h4 = h4*5+0x32ac3b17;
230             }
231             uint k1, k2, k3;
232             switch (len) // 0, 1, 2, 3
233             {
234                 case 3:
235                     k3 = data[2];
236                     k3 *= c3; k3 = (k3 << 17) | (k3 >> (32 - 17)); k3 *= c4; h3 ^= k3;
237                     goto case;
238                 case 2:
239                     k2 = data[1];
240                     k2 *= c2; k2 = (k2 << 16) | (k2 >> (32 - 16)); k2 *= c3; h2 ^= k2;
241                     goto case;
242                 case 1:
243                     k1 = data[0];
244                     k1 *= c1; k1 = (k1 << 15) | (k1 >> (32 - 15)); k1 *= c2; h1 ^= k1;
245                     break;
246             }
247             hash = cast(size_t) h1 +
248                    ((cast(size_t) h2) << 32) +
249                    ((cast(size_t) h3) << 64) +
250                    ((cast(size_t) h4) << 96);
251         }
252         else
253         {
254             static assert(0, "Hash length must be either 32, 64, or 128 bits.");
255         }
256     }
257 }
258 
259 /**
260  * Takes an argument of an arbitrary type $(D_PARAM T) and calculates the hash
261  * value.
262  *
263  * Hash calculation is supported for all scalar types. Aggregate types, like
264  * $(D_KEYWORD struct)s, should implement `toHash`-function:
265  * ---
266  * size_t toHash() const
267  * {
268  *  return hash;
269  * }
270  * ---
271  *
272  * For pointers and for scalar types implicitly convertible to `size_t` this
273  * is an identity operation (i.e. the value is cast to `size_t` and returned
274  * unaltered). Integer types wider than `size_t` are XOR folded down to
275  * `size_t`. Other scalar types use an architecture-dependent hash function
276  * based on their width and alignment.
277  * If the type provides a `toHash`-function, only `toHash()` is called and its
278  * result is returned.
279  *
280  * This function also accepts input ranges that contain hashable elements.
281  * Individual values are combined then and the resulting hash is returned.
282  *
283  * Params:
284  *  T   = Hashable type.
285  *  key = Hashable value.
286  *
287  * Returns: Calculated hash value.
288  *
289  * See_Also: $(LINK http://www.isthe.com/chongo/tech/comp/fnv/).
290  */
291 size_t hash(T)(auto ref T key)
292 {
293     static if (is(typeof(key.toHash()) == size_t))
294     {
295         return key.toHash();
296     }
297     else static if ((isIntegral!T || isSomeChar!T || isBoolean!T)
298             && T.sizeof <= size_t.sizeof)
299     {
300         return cast(size_t) key;
301     }
302     else static if (isIntegral!T && T.sizeof > size_t.sizeof)
303     {
304         return cast(size_t) (key ^ (key >>> (size_t.sizeof * 8)));
305     }
306     else static if (isPointer!T || is(T : typeof(null)))
307     {
308         return (() @trusted => cast(size_t) key)();
309     }
310     else
311     {
312         Hasher hasher;
313         hasher(key);
314         return hasher.hash;
315     }
316 }
317 
318 /**
319  * Determines whether $(D_PARAM hasher) is hash function for $(D_PARAM T), i.e.
320  * it is callable with a value of type $(D_PARAM T) and returns a
321  * $(D_PSYMBOL size_t) value.
322  *
323  * Params:
324  *  hasher = Hash function candidate.
325  *  T      = Type to test the hash function with.
326  *
327  * Returns: $(D_KEYWORD true) if $(D_PARAM hasher) is a hash function for
328  *          $(D_PARAM T), $(D_KEYWORD false) otherwise.
329  */
330 template isHashFunction(alias hasher, T)
331 {
332     private alias wrapper = (T x) => hasher(x);
333     enum bool isHashFunction = is(typeof(wrapper(T.init)) == size_t);
334 }
335 
336 ///
337 @nogc nothrow pure @safe unittest
338 {
339     static assert(isHashFunction!(hash, int));
340 }