1 /* This Source Code Form is subject to the terms of the Mozilla Public
2  * License, v. 2.0. If a copy of the MPL was not distributed with this
3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 
5 /**
6  * Non-cryptographic, lookup hash functions.
7  *
8  * Copyright: Eugene Wissner 2018-2020.
9  * License: $(LINK2 https://www.mozilla.org/en-US/MPL/2.0/,
10  *                  Mozilla Public License, v. 2.0).
11  * Authors: $(LINK2 mailto:info@caraus.de, Eugene Wissner)
12  * Source: $(LINK2 https://github.com/caraus-ecms/tanya/blob/master/source/tanya/hash/lookup.d,
13  *                 tanya/hash/lookup.d)
14  */
15 module tanya.hash.lookup;
16 
17 import std.traits : isScalarType;
18 import tanya.meta.trait;
19 import tanya.range.primitive;
20 
21 private struct Hasher
22 {
23     static if (size_t.sizeof == 4)
24     {
25         enum uint offsetBasis = 2166136261;
26         enum uint prime = 16777619;
27     }
28     else static if (size_t.sizeof == 8)
29     {
30         enum ulong offsetBasis = 14695981039346656037UL;
31         enum ulong prime = 1099511628211UL;
32     }
33     else static if (size_t.sizeof == 16)
34     {
35         enum size_t offsetBasis = (size_t(0x6c62272e07bb0142UL) << 64) + 0x62b821756295c58dUL;
36         enum size_t prime = (size_t(1) << 88) + (1 << 8) + 0x3b;
37     }
38     else
39     {
40         static assert(false, "FNV requires at least 32-bit hash length");
41     }
42 
43     size_t hash = offsetBasis;
44 
45     void opCall(T)(auto ref T key)
46     {
47         static if (is(typeof(key.toHash()) == size_t))
48         {
49             opCall(key.toHash()); // Combine user-defined hashes
50         }
51         else static if (isScalarType!T || isPointer!T)
52         {
53             // Treat as an array of words
54             static if (T.sizeof % size_t.sizeof == 0
55                     && T.alignof >= size_t.alignof)
56                 alias CastT = size_t;
57             // (64-bit or 128-bit) Treat as an array of ints
58             else static if (T.sizeof % uint.sizeof == 0
59                     && T.alignof >= uint.alignof)
60                 alias CastT = uint;
61             // Treat as an array of bytes
62             else
63                 alias CastT = ubyte;
64             add((() @trusted => (cast(const CastT*) &key)[0 .. T.sizeof / CastT.sizeof])());
65         }
66         else static if (isArray!T && isScalarType!(ElementType!T))
67         {
68             // Treat as an array of words
69             static if (ElementType!T.sizeof % size_t.sizeof == 0
70                     && ElementType!T.alignof >= size_t.alignof)
71                 alias CastT = size_t;
72             // (64-bit or 128-bit) Treat as an array of ints
73             else static if (ElementType!T.sizeof % uint.sizeof == 0
74                     && ElementType!T.alignof >= uint.alignof)
75                 alias CastT = uint;
76             // Treat as an array of bytes
77             else
78                 alias CastT = ubyte;
79             add(cast(const CastT[]) key);
80         }
81         else static if (is(T == typeof(null)))
82         {
83             add(key);
84         }
85         else static if (isInputRange!T && !isInfinite!T)
86         {
87             foreach (e; key)
88             {
89                 opCall(e);
90             }
91         }
92         else
93         {
94             static assert(false, "Hash function is not available");
95         }
96     }
97 
98     void add(scope const ubyte[] key) @nogc nothrow pure @safe
99     {
100         // FNV-1a
101         foreach (c; key)
102         {
103             this.hash = (this.hash ^ c) * prime;
104         }
105     }
106 
107     void add(scope const size_t[] key) @nogc nothrow pure @safe
108     {
109         static if (size_t.sizeof == 4)
110         {
111             // Partial MurmurHash3_x86_32 (no finalization)
112             enum uint c1 = 0xcc9e2d51;
113             enum uint c2 = 0x1b873593;
114             alias h1 = hash;
115             foreach (x; key)
116             {
117                 auto k1 = x * c1;
118                 k1 = (k1 << 15) | (k1 >> (32 - 15));
119                 k1 *= c2;
120 
121                 h1 ^= k1;
122                 h1 = (h1 << 13) | (h1 >> (32 - 13));
123                 h1 = h1 * 5 + 0xe6546b64;
124             }
125         }
126         else static if (size_t.sizeof == 8)
127         {
128             // Partial 64-bit MurmurHash64A (no finalization)
129             alias h = hash;
130             enum ulong m = 0xc6a4a7935bd1e995UL;
131             foreach (x; key)
132             {
133                 auto k = x * m;
134                 k ^= k >>> 47;
135                 k *= m;
136 
137                 h ^= k;
138                 h *= m;
139             }
140         }
141         else static if (size_t.sizeof == 16)
142         {
143             // Partial MurmurHash3_x64_128 (no finalization)
144             // treating each size_t as a pair of ulong.
145             ulong h1 = cast(ulong) hash;
146             ulong h2 = cast(ulong) (hash >> 64);
147 
148             enum ulong c1 = 0x87c37b91114253d5UL;
149             enum ulong c2 = 0x4cf5ad432745937fUL;
150 
151             foreach (x; key)
152             {
153                 auto k1 = cast(ulong) x;
154                 auto k2 = cast(ulong) (x >> 64);
155 
156                 k1 *= c1; k1 = (k1 << 32) | (k1 >> (64 - 31)); k1 *= c2; h1 ^= k1;
157                 h1 = (h1 << 27) | (h1 >> (64 - 27)); h1 += h2; h1 = h1*5+0x52dce729;
158                 k2 *= c2; k2 = (k2 << 33) | (k2 >> (64 - 33)); k2 *= c1; h2 ^= k2;
159                 h2 = (h2 << 31) | (h2 >> (64 - 31)); h2 += h1; h2 = h2*5+0x38495ab5;
160             }
161 
162             hash = cast(size_t) h1 + ((cast(size_t) h2) << 64);
163         }
164         else
165         {
166             static assert(0, "Hash length must be either 32, 64, or 128 bits.");
167         }
168     }
169 
170     static if (size_t.sizeof != uint.sizeof)
171     void add(scope const uint[] key) @nogc nothrow pure @trusted
172     {
173         static if (size_t.sizeof == 8)
174         {
175             // Partial 32-bit MurmurHash64B (no finalization)
176             enum uint m = 0x5bd1e995;
177             enum r = 24;
178 
179             uint h1 = cast(uint) hash;
180             uint h2 = cast(uint) (hash >> 32);
181             const(uint)* data = key.ptr;
182             auto len = key.length;
183 
184             for (; len >= 2; data += 2, len -= 2)
185             {
186                 uint k1 = data[0];
187                 k1 *= m; k1 ^= k1 >> r; k1 *= m;
188                 h1 *= m; h1 ^= k1;
189 
190                 uint k2 = data[1];
191                 k2 *= m; k2 ^= k2 >> r; k2 *= m;
192                 h2 *= m; h2 ^= k2;
193             }
194             if (len)
195             {
196                 uint k1 = data[0];
197                 k1 *= m; k1 ^= k1 >> r; k1 *= m;
198                 h1 *= m; h1 ^= k1;
199             }
200             hash = cast(ulong) h1 + ((cast(ulong) h2) << 32);
201         }
202         else static if (size_t.sizeof == 16)
203         {
204             // Partial MurmurHash3_x86_128 (no finalization)
205             enum uint c1 = 0x239b961b;
206             enum uint c2 = 0xab0e9789;
207             enum uint c3 = 0x38b34ae5;
208             enum uint c4 = 0xa1e38b93;
209 
210             uint h1 = cast(uint) hash;
211             uint h2 = cast(uint) (hash >> 32);
212             uint h3 = cast(uint) (hash >> 64);
213             uint h4 = cast(uint) (hash >> 96);
214             const(uint)* data = key.ptr;
215             auto len = key.length;
216 
217             for (; len >= 4; data += 4, len -= 4)
218             {
219                 uint k1 = data[0];
220                 uint k2 = data[1];
221                 uint k3 = data[2];
222                 uint k4 = data[3];
223 
224                 h1 = (h1 << 19) | (h1 >> (32 - 19)); h1 += h2; h1 = h1*5+0x561ccd1b;
225                 k2 *= c2; k2 = (k2 << 16) | (k2 >> (32 - 16)); k2 *= c3; h2 ^= k2;
226                 h2 = (h2 << 17) | (h2 >> (32 - 17)); h2 += h3; h2 = h2*5+0x0bcaa747;
227                 k3 *= c3; k3 = (k3 << 17) | (k3 >> (32 - 17)); k3 *= c4; h3 ^= k3;
228                 h3 = (h3 << 15) | (h3 >> (32 - 15)); h3 += h4; h3 = h3*5+0x96cd1c35;
229                 k4 *= c4; k4 = (k4 << 18) | (k4 >> (32 - 18)); k4 *= c1; h4 ^= k4;
230                 h4 = (h4 << 13) | (h4 >> (32 - 13)); h4 += h1; h4 = h4*5+0x32ac3b17;
231             }
232             uint k1, k2, k3;
233             switch (len) // 0, 1, 2, 3
234             {
235                 case 3:
236                     k3 = data[2];
237                     k3 *= c3; k3 = (k3 << 17) | (k3 >> (32 - 17)); k3 *= c4; h3 ^= k3;
238                     goto case;
239                 case 2:
240                     k2 = data[1];
241                     k2 *= c2; k2 = (k2 << 16) | (k2 >> (32 - 16)); k2 *= c3; h2 ^= k2;
242                     goto case;
243                 case 1:
244                     k1 = data[0];
245                     k1 *= c1; k1 = (k1 << 15) | (k1 >> (32 - 15)); k1 *= c2; h1 ^= k1;
246                     break;
247             }
248             hash = cast(size_t) h1 +
249                    ((cast(size_t) h2) << 32) +
250                    ((cast(size_t) h3) << 64) +
251                    ((cast(size_t) h4) << 96);
252         }
253         else
254         {
255             static assert(0, "Hash length must be either 32, 64, or 128 bits.");
256         }
257     }
258 }
259 
260 /**
261  * Takes an argument of an arbitrary type $(D_PARAM T) and calculates the hash
262  * value.
263  *
264  * Hash calculation is supported for all scalar types. Aggregate types, like
265  * $(D_KEYWORD struct)s, should implement `toHash`-function:
266  * ---
267  * size_t toHash() const
268  * {
269  *  return hash;
270  * }
271  * ---
272  *
273  * For pointers and for scalar types implicitly convertible to `size_t` this
274  * is an identity operation (i.e. the value is cast to `size_t` and returned
275  * unaltered). Integer types wider than `size_t` are XOR folded down to
276  * `size_t`. Other scalar types use an architecture-dependent hash function
277  * based on their width and alignment.
278  * If the type provides a `toHash`-function, only `toHash()` is called and its
279  * result is returned.
280  *
281  * This function also accepts input ranges that contain hashable elements.
282  * Individual values are combined then and the resulting hash is returned.
283  *
284  * Params:
285  *  T   = Hashable type.
286  *  key = Hashable value.
287  *
288  * Returns: Calculated hash value.
289  *
290  * See_Also: $(LINK http://www.isthe.com/chongo/tech/comp/fnv/).
291  */
292 size_t hash(T)(auto ref T key)
293 {
294     static if (is(typeof(key.toHash()) == size_t))
295     {
296         return key.toHash();
297     }
298     else static if ((isIntegral!T || isSomeChar!T || isBoolean!T)
299             && T.sizeof <= size_t.sizeof)
300     {
301         return cast(size_t) key;
302     }
303     else static if (isIntegral!T && T.sizeof > size_t.sizeof)
304     {
305         return cast(size_t) (key ^ (key >>> (size_t.sizeof * 8)));
306     }
307     else static if (isPointer!T || is(T : typeof(null)))
308     {
309         return (() @trusted => cast(size_t) key)();
310     }
311     else
312     {
313         Hasher hasher;
314         hasher(key);
315         return hasher.hash;
316     }
317 }
318 
319 /**
320  * Determines whether $(D_PARAM hasher) is hash function for $(D_PARAM T), i.e.
321  * it is callable with a value of type $(D_PARAM T) and returns a
322  * $(D_PSYMBOL size_t) value.
323  *
324  * Params:
325  *  hasher = Hash function candidate.
326  *  T      = Type to test the hash function with.
327  *
328  * Returns: $(D_KEYWORD true) if $(D_PARAM hasher) is a hash function for
329  *          $(D_PARAM T), $(D_KEYWORD false) otherwise.
330  */
331 template isHashFunction(alias hasher, T)
332 {
333     private alias wrapper = (T x) => hasher(x);
334     enum bool isHashFunction = is(typeof(wrapper(T.init)) == size_t);
335 }
336 
337 ///
338 @nogc nothrow pure @safe unittest
339 {
340     static assert(isHashFunction!(hash, int));
341 }