1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 /** 6 * Non-cryptographic, lookup hash functions. 7 * 8 * Copyright: Eugene Wissner 2018-2020. 9 * License: $(LINK2 https://www.mozilla.org/en-US/MPL/2.0/, 10 * Mozilla Public License, v. 2.0). 11 * Authors: $(LINK2 mailto:info@caraus.de, Eugene Wissner) 12 * Source: $(LINK2 https://github.com/caraus-ecms/tanya/blob/master/source/tanya/hash/lookup.d, 13 * tanya/hash/lookup.d) 14 */ 15 module tanya.hash.lookup; 16 17 import tanya.meta.trait; 18 import tanya.range.primitive; 19 20 private struct Hasher 21 { 22 static if (size_t.sizeof == 4) 23 { 24 enum uint offsetBasis = 2166136261; 25 enum uint prime = 16777619; 26 } 27 else static if (size_t.sizeof == 8) 28 { 29 enum ulong offsetBasis = 14695981039346656037UL; 30 enum ulong prime = 1099511628211UL; 31 } 32 else static if (size_t.sizeof == 16) 33 { 34 enum size_t offsetBasis = (size_t(0x6c62272e07bb0142UL) << 64) + 0x62b821756295c58dUL; 35 enum size_t prime = (size_t(1) << 88) + (1 << 8) + 0x3b; 36 } 37 else 38 { 39 static assert(false, "FNV requires at least 32-bit hash length"); 40 } 41 42 size_t hash = offsetBasis; 43 44 void opCall(T)(auto ref T key) 45 { 46 static if (is(typeof(key.toHash()) == size_t)) 47 { 48 opCall(key.toHash()); // Combine user-defined hashes 49 } 50 else static if (isScalarType!T || isPointer!T) 51 { 52 // Treat as an array of words 53 static if (T.sizeof % size_t.sizeof == 0 54 && T.alignof >= size_t.alignof) 55 alias CastT = size_t; 56 // (64-bit or 128-bit) Treat as an array of ints 57 else static if (T.sizeof % uint.sizeof == 0 58 && T.alignof >= uint.alignof) 59 alias CastT = uint; 60 // Treat as an array of bytes 61 else 62 alias CastT = ubyte; 63 add((() @trusted => (cast(const CastT*) &key)[0 .. T.sizeof / CastT.sizeof])()); 64 } 65 else static if (isArray!T && isScalarType!(ElementType!T)) 66 { 67 // Treat as an array of words 68 static if (ElementType!T.sizeof % size_t.sizeof == 0 69 && ElementType!T.alignof >= size_t.alignof) 70 alias CastT = size_t; 71 // (64-bit or 128-bit) Treat as an array of ints 72 else static if (ElementType!T.sizeof % uint.sizeof == 0 73 && ElementType!T.alignof >= uint.alignof) 74 alias CastT = uint; 75 // Treat as an array of bytes 76 else 77 alias CastT = ubyte; 78 add(cast(const CastT[]) key); 79 } 80 else static if (is(T == typeof(null))) 81 { 82 add(key); 83 } 84 else static if (isInputRange!T && !isInfinite!T) 85 { 86 foreach (e; key) 87 { 88 opCall(e); 89 } 90 } 91 else 92 { 93 static assert(false, "Hash function is not available"); 94 } 95 } 96 97 void add(scope const ubyte[] key) @nogc nothrow pure @safe 98 { 99 // FNV-1a 100 foreach (c; key) 101 { 102 this.hash = (this.hash ^ c) * prime; 103 } 104 } 105 106 void add(scope const size_t[] key) @nogc nothrow pure @safe 107 { 108 static if (size_t.sizeof == 4) 109 { 110 // Partial MurmurHash3_x86_32 (no finalization) 111 enum uint c1 = 0xcc9e2d51; 112 enum uint c2 = 0x1b873593; 113 alias h1 = hash; 114 foreach (x; key) 115 { 116 auto k1 = x * c1; 117 k1 = (k1 << 15) | (k1 >> (32 - 15)); 118 k1 *= c2; 119 120 h1 ^= k1; 121 h1 = (h1 << 13) | (h1 >> (32 - 13)); 122 h1 = h1 * 5 + 0xe6546b64; 123 } 124 } 125 else static if (size_t.sizeof == 8) 126 { 127 // Partial 64-bit MurmurHash64A (no finalization) 128 alias h = hash; 129 enum ulong m = 0xc6a4a7935bd1e995UL; 130 foreach (x; key) 131 { 132 auto k = x * m; 133 k ^= k >>> 47; 134 k *= m; 135 136 h ^= k; 137 h *= m; 138 } 139 } 140 else static if (size_t.sizeof == 16) 141 { 142 // Partial MurmurHash3_x64_128 (no finalization) 143 // treating each size_t as a pair of ulong. 144 ulong h1 = cast(ulong) hash; 145 ulong h2 = cast(ulong) (hash >> 64); 146 147 enum ulong c1 = 0x87c37b91114253d5UL; 148 enum ulong c2 = 0x4cf5ad432745937fUL; 149 150 foreach (x; key) 151 { 152 auto k1 = cast(ulong) x; 153 auto k2 = cast(ulong) (x >> 64); 154 155 k1 *= c1; k1 = (k1 << 32) | (k1 >> (64 - 31)); k1 *= c2; h1 ^= k1; 156 h1 = (h1 << 27) | (h1 >> (64 - 27)); h1 += h2; h1 = h1*5+0x52dce729; 157 k2 *= c2; k2 = (k2 << 33) | (k2 >> (64 - 33)); k2 *= c1; h2 ^= k2; 158 h2 = (h2 << 31) | (h2 >> (64 - 31)); h2 += h1; h2 = h2*5+0x38495ab5; 159 } 160 161 hash = cast(size_t) h1 + ((cast(size_t) h2) << 64); 162 } 163 else 164 { 165 static assert(0, "Hash length must be either 32, 64, or 128 bits."); 166 } 167 } 168 169 static if (size_t.sizeof != uint.sizeof) 170 void add(scope const uint[] key) @nogc nothrow pure @trusted 171 { 172 static if (size_t.sizeof == 8) 173 { 174 // Partial 32-bit MurmurHash64B (no finalization) 175 enum uint m = 0x5bd1e995; 176 enum r = 24; 177 178 uint h1 = cast(uint) hash; 179 uint h2 = cast(uint) (hash >> 32); 180 const(uint)* data = key.ptr; 181 auto len = key.length; 182 183 for (; len >= 2; data += 2, len -= 2) 184 { 185 uint k1 = data[0]; 186 k1 *= m; k1 ^= k1 >> r; k1 *= m; 187 h1 *= m; h1 ^= k1; 188 189 uint k2 = data[1]; 190 k2 *= m; k2 ^= k2 >> r; k2 *= m; 191 h2 *= m; h2 ^= k2; 192 } 193 if (len) 194 { 195 uint k1 = data[0]; 196 k1 *= m; k1 ^= k1 >> r; k1 *= m; 197 h1 *= m; h1 ^= k1; 198 } 199 hash = cast(ulong) h1 + ((cast(ulong) h2) << 32); 200 } 201 else static if (size_t.sizeof == 16) 202 { 203 // Partial MurmurHash3_x86_128 (no finalization) 204 enum uint c1 = 0x239b961b; 205 enum uint c2 = 0xab0e9789; 206 enum uint c3 = 0x38b34ae5; 207 enum uint c4 = 0xa1e38b93; 208 209 uint h1 = cast(uint) hash; 210 uint h2 = cast(uint) (hash >> 32); 211 uint h3 = cast(uint) (hash >> 64); 212 uint h4 = cast(uint) (hash >> 96); 213 const(uint)* data = key.ptr; 214 auto len = key.length; 215 216 for (; len >= 4; data += 4, len -= 4) 217 { 218 uint k1 = data[0]; 219 uint k2 = data[1]; 220 uint k3 = data[2]; 221 uint k4 = data[3]; 222 223 h1 = (h1 << 19) | (h1 >> (32 - 19)); h1 += h2; h1 = h1*5+0x561ccd1b; 224 k2 *= c2; k2 = (k2 << 16) | (k2 >> (32 - 16)); k2 *= c3; h2 ^= k2; 225 h2 = (h2 << 17) | (h2 >> (32 - 17)); h2 += h3; h2 = h2*5+0x0bcaa747; 226 k3 *= c3; k3 = (k3 << 17) | (k3 >> (32 - 17)); k3 *= c4; h3 ^= k3; 227 h3 = (h3 << 15) | (h3 >> (32 - 15)); h3 += h4; h3 = h3*5+0x96cd1c35; 228 k4 *= c4; k4 = (k4 << 18) | (k4 >> (32 - 18)); k4 *= c1; h4 ^= k4; 229 h4 = (h4 << 13) | (h4 >> (32 - 13)); h4 += h1; h4 = h4*5+0x32ac3b17; 230 } 231 uint k1, k2, k3; 232 switch (len) // 0, 1, 2, 3 233 { 234 case 3: 235 k3 = data[2]; 236 k3 *= c3; k3 = (k3 << 17) | (k3 >> (32 - 17)); k3 *= c4; h3 ^= k3; 237 goto case; 238 case 2: 239 k2 = data[1]; 240 k2 *= c2; k2 = (k2 << 16) | (k2 >> (32 - 16)); k2 *= c3; h2 ^= k2; 241 goto case; 242 case 1: 243 k1 = data[0]; 244 k1 *= c1; k1 = (k1 << 15) | (k1 >> (32 - 15)); k1 *= c2; h1 ^= k1; 245 break; 246 } 247 hash = cast(size_t) h1 + 248 ((cast(size_t) h2) << 32) + 249 ((cast(size_t) h3) << 64) + 250 ((cast(size_t) h4) << 96); 251 } 252 else 253 { 254 static assert(0, "Hash length must be either 32, 64, or 128 bits."); 255 } 256 } 257 } 258 259 /** 260 * Takes an argument of an arbitrary type $(D_PARAM T) and calculates the hash 261 * value. 262 * 263 * Hash calculation is supported for all scalar types. Aggregate types, like 264 * $(D_KEYWORD struct)s, should implement `toHash`-function: 265 * --- 266 * size_t toHash() const 267 * { 268 * return hash; 269 * } 270 * --- 271 * 272 * For pointers and for scalar types implicitly convertible to `size_t` this 273 * is an identity operation (i.e. the value is cast to `size_t` and returned 274 * unaltered). Integer types wider than `size_t` are XOR folded down to 275 * `size_t`. Other scalar types use an architecture-dependent hash function 276 * based on their width and alignment. 277 * If the type provides a `toHash`-function, only `toHash()` is called and its 278 * result is returned. 279 * 280 * This function also accepts input ranges that contain hashable elements. 281 * Individual values are combined then and the resulting hash is returned. 282 * 283 * Params: 284 * T = Hashable type. 285 * key = Hashable value. 286 * 287 * Returns: Calculated hash value. 288 * 289 * See_Also: $(LINK http://www.isthe.com/chongo/tech/comp/fnv/). 290 */ 291 size_t hash(T)(auto ref T key) 292 { 293 static if (is(typeof(key.toHash()) == size_t)) 294 { 295 return key.toHash(); 296 } 297 else static if ((isIntegral!T || isSomeChar!T || isBoolean!T) 298 && T.sizeof <= size_t.sizeof) 299 { 300 return cast(size_t) key; 301 } 302 else static if (isIntegral!T && T.sizeof > size_t.sizeof) 303 { 304 return cast(size_t) (key ^ (key >>> (size_t.sizeof * 8))); 305 } 306 else static if (isPointer!T || is(T : typeof(null))) 307 { 308 return (() @trusted => cast(size_t) key)(); 309 } 310 else 311 { 312 Hasher hasher; 313 hasher(key); 314 return hasher.hash; 315 } 316 } 317 318 /** 319 * Determines whether $(D_PARAM hasher) is hash function for $(D_PARAM T), i.e. 320 * it is callable with a value of type $(D_PARAM T) and returns a 321 * $(D_PSYMBOL size_t) value. 322 * 323 * Params: 324 * hasher = Hash function candidate. 325 * T = Type to test the hash function with. 326 * 327 * Returns: $(D_KEYWORD true) if $(D_PARAM hasher) is a hash function for 328 * $(D_PARAM T), $(D_KEYWORD false) otherwise. 329 */ 330 template isHashFunction(alias hasher, T) 331 { 332 private alias wrapper = (T x) => hasher(x); 333 enum bool isHashFunction = is(typeof(wrapper(T.init)) == size_t); 334 } 335 336 /// 337 @nogc nothrow pure @safe unittest 338 { 339 static assert(isHashFunction!(hash, int)); 340 }