1 /* This Source Code Form is subject to the terms of the Mozilla Public
2  * License, v. 2.0. If a copy of the MPL was not distributed with this
3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 
5 /**
6  * This module implements a $(D_PSYMBOL Set) container that stores unique
7  * values without any particular order.
8  *
9  * Copyright: Eugene Wissner 2017-2020.
10  * License: $(LINK2 https://www.mozilla.org/en-US/MPL/2.0/,
11  *                  Mozilla Public License, v. 2.0).
12  * Authors: $(LINK2 mailto:info@caraus.de, Eugene Wissner)
13  * Source: $(LINK2 https://github.com/caraus-ecms/tanya/blob/master/source/tanya/container/set.d,
14  *                 tanya/container/set.d)
15  */
16 module tanya.container.set;
17 
18 import tanya.container.array;
19 import tanya.container.entry;
20 import tanya.hash.lookup;
21 import tanya.memory.allocator;
22 import tanya.memory.lifetime;
23 import tanya.meta.trait;
24 import tanya.meta.transform;
25 import tanya.range.primitive;
26 
27 /**
28  * Bidirectional range that iterates over the $(D_PSYMBOL Set)'s values.
29  *
30  * Params:
31  *  T = Type of the internal hash storage.
32  */
33 struct Range(T)
34 {
35     private alias E = CopyConstness!(T, T.Key);
36     static if (isMutable!T)
37     {
38         private alias DataRange = T.array.Range;
39     }
40     else
41     {
42         private alias DataRange = T.array.ConstRange;
43     }
44     private DataRange dataRange;
45 
46     @disable this();
47 
48     private this(DataRange dataRange)
49     {
50         while (!dataRange.empty && dataRange.front.status != BucketStatus.used)
51         {
52             dataRange.popFront();
53         }
54         while (!dataRange.empty && dataRange.back.status != BucketStatus.used)
55         {
56             dataRange.popBack();
57         }
58         this.dataRange = dataRange;
59     }
60 
61     @property Range save()
62     {
63         return this;
64     }
65 
66     @property bool empty() const
67     {
68         return this.dataRange.empty();
69     }
70 
71     void popFront()
72     in
73     {
74         assert(!empty);
75         assert(this.dataRange.front.status == BucketStatus.used);
76     }
77     out
78     {
79         assert(empty || this.dataRange.back.status == BucketStatus.used);
80     }
81     do
82     {
83         do
84         {
85             this.dataRange.popFront();
86         }
87         while (!empty && dataRange.front.status != BucketStatus.used);
88     }
89 
90     void popBack()
91     in
92     {
93         assert(!empty);
94         assert(this.dataRange.back.status == BucketStatus.used);
95     }
96     out
97     {
98         assert(empty || this.dataRange.back.status == BucketStatus.used);
99     }
100     do
101     {
102         do
103         {
104             this.dataRange.popBack();
105         }
106         while (!empty && dataRange.back.status != BucketStatus.used);
107     }
108 
109     @property ref inout(E) front() inout
110     in
111     {
112         assert(!empty);
113         assert(this.dataRange.front.status == BucketStatus.used);
114     }
115     do
116     {
117         return this.dataRange.front.key;
118     }
119 
120     @property ref inout(E) back() inout
121     in
122     {
123         assert(!empty);
124         assert(this.dataRange.back.status == BucketStatus.used);
125     }
126     do
127     {
128         return this.dataRange.back.key;
129     }
130 
131     Range opIndex()
132     {
133         return typeof(return)(this.dataRange[]);
134     }
135 
136     Range!(const T) opIndex() const
137     {
138         return typeof(return)(this.dataRange[]);
139     }
140 }
141 
142 /**
143  * Set is a data structure that stores unique values without any particular
144  * order.
145  *
146  * This $(D_PSYMBOL Set) is implemented using closed hashing. Hash collisions
147  * are resolved with linear probing.
148  *
149  * $(D_PARAM T) should be hashable with $(D_PARAM hasher). $(D_PARAM hasher) is
150  * a callable that accepts an argument of type $(D_PARAM T) and returns a hash
151  * value for it ($(D_KEYWORD size_t)).
152  *
153  * Params:
154  *  T      = Element type.
155  *  hasher = Hash function for $(D_PARAM T).
156  */
157 struct Set(T, alias hasher = hash)
158 if (isHashFunction!(hasher, T))
159 {
160     private alias HashArray = .HashArray!(hasher, T);
161     private alias Buckets = HashArray.Buckets;
162 
163     private HashArray data;
164 
165     /// The range types for $(D_PSYMBOL Set).
166     alias Range = .Range!HashArray;
167 
168     /// ditto
169     alias ConstRange = .Range!(const HashArray);
170 
171     invariant
172     {
173         assert(this.data.lengthIndex < primes.length);
174         assert(this.data.array.length == 0
175                 || this.data.array.length == primes[this.data.lengthIndex]);
176     }
177 
178     /**
179      * Constructor.
180      *
181      * Params:
182      *  n         = Minimum number of buckets.
183      *  allocator = Allocator.
184      *
185      * Precondition: $(D_INLINECODE allocator !is null).
186      */
187     this(size_t n, shared Allocator allocator = defaultAllocator)
188     in
189     {
190         assert(allocator !is null);
191     }
192     do
193     {
194         this(allocator);
195         this.data.rehash(n);
196     }
197 
198     ///
199     @nogc nothrow pure @safe unittest
200     {
201         auto set = Set!int(5);
202         assert(set.capacity == 7);
203     }
204 
205     /// ditto
206     this(shared Allocator allocator)
207     in
208     {
209         assert(allocator !is null);
210     }
211     do
212     {
213         this.data = HashArray(allocator);
214     }
215 
216     /**
217      * Initializes this $(D_PARAM Set) from another one.
218      *
219      * If $(D_PARAM init) is passed by reference, it will be copied.
220      * If $(D_PARAM init) is passed by value, it will be moved.
221      *
222      * Params:
223      *  S         = Source set type.
224      *  init      = Source set.
225      *  allocator = Allocator.
226      *
227      * Precondition: $(D_INLINECODE allocator !is null).
228      */
229     this(S)(ref S init, shared Allocator allocator = defaultAllocator)
230     if (is(Unqual!S == Set))
231     in
232     {
233         assert(allocator !is null);
234     }
235     do
236     {
237         this.data = HashArray(init.data, allocator);
238     }
239 
240     /// ditto
241     this(S)(S init, shared Allocator allocator = defaultAllocator)
242     if (is(S == Set))
243     in
244     {
245         assert(allocator !is null);
246     }
247     do
248     {
249         this.data.move(init.data, allocator);
250     }
251 
252     /**
253      * Initializes the set from a forward range.
254      *
255      * Params:
256      *  R         = Range type.
257      *  range     = Forward range.
258      *  allocator = Allocator.
259      *
260      * Precondition: $(D_INLINECODE allocator !is null).
261      */
262     this(R)(scope R range, shared Allocator allocator = defaultAllocator)
263     if (isForwardRange!R
264      && isImplicitlyConvertible!(ElementType!R, T)
265      && !isInfinite!R)
266     in
267     {
268         assert(allocator !is null);
269     }
270     do
271     {
272         this(allocator);
273         insert(range);
274     }
275 
276     ///
277     @nogc nothrow pure @safe unittest
278     {
279         int[2] range = [1, 2];
280         Set!int set = Set!int(range[]);
281 
282         assert(1 in set);
283         assert(2 in set);
284     }
285 
286     /**
287      * Initializes the set from a static array.
288      *
289      * Params:
290      *  n         = Array size.
291      *  array     = Static array.
292      *  allocator = Allocator.
293      *
294      * Precondition: $(D_INLINECODE allocator !is null).
295      */
296     this(size_t n)(T[n] array, shared Allocator allocator = defaultAllocator)
297     in
298     {
299         assert(allocator !is null);
300     }
301     do
302     {
303         this(allocator);
304         insert(array[]);
305     }
306 
307     ///
308     @nogc nothrow pure @safe unittest
309     {
310         Set!int set = Set!int([1, 2]);
311 
312         assert(1 in set);
313         assert(2 in set);
314     }
315 
316     /**
317      * Assigns another set.
318      *
319      * If $(D_PARAM that) is passed by reference, it will be copied.
320      * If $(D_PARAM that) is passed by value, it will be moved.
321      *
322      * Params:
323      *  S    = Content type.
324      *  that = The value should be assigned.
325      *
326      * Returns: $(D_KEYWORD this).
327      */
328     ref typeof(this) opAssign(S)(ref S that)
329     if (is(Unqual!S == Set))
330     {
331         this.data = that.data;
332         return this;
333     }
334 
335     /// ditto
336     ref typeof(this) opAssign(S)(S that) @trusted
337     if (is(S == Set))
338     {
339         this.data.swap(that.data);
340         return this;
341     }
342 
343     /**
344      * Returns: Used allocator.
345      *
346      * Postcondition: $(D_INLINECODE allocator !is null)
347      */
348     @property shared(Allocator) allocator() const
349     out (allocator)
350     {
351         assert(allocator !is null);
352     }
353     do
354     {
355         return this.data.array.allocator;
356     }
357 
358     /**
359      * Maximum amount of elements this $(D_PSYMBOL Set) can hold without
360      * resizing and rehashing. Note that it doesn't mean that the
361      * $(D_PSYMBOL Set) will hold $(I exactly) $(D_PSYMBOL capacity) elements.
362      * $(D_PSYMBOL capacity) tells the size of the container under a best-case
363      * distribution of elements.
364      *
365      * Returns: $(D_PSYMBOL Set) capacity.
366      */
367     @property size_t capacity() const
368     {
369         return this.data.capacity;
370     }
371 
372     ///
373     @nogc nothrow pure @safe unittest
374     {
375         Set!int set;
376         assert(set.capacity == 0);
377 
378         set.insert(8);
379         assert(set.capacity == 3);
380     }
381 
382     /**
383      * Iterates over the $(D_PSYMBOL Set) and counts the elements.
384      *
385      * Returns: Count of elements within the $(D_PSYMBOL Set).
386      */
387     @property size_t length() const
388     {
389         return this.data.length;
390     }
391 
392     ///
393     @nogc nothrow pure @safe unittest
394     {
395         Set!int set;
396         assert(set.length == 0);
397 
398         set.insert(8);
399         assert(set.length == 1);
400     }
401 
402     /**
403      * Tells whether the container contains any elements.
404      *
405      * Returns: Whether the container is empty.
406      */
407     @property bool empty() const
408     {
409         return length == 0;
410     }
411 
412     ///
413     @nogc nothrow pure @safe unittest
414     {
415         Set!int set;
416         assert(set.empty);
417         set.insert(5);
418         assert(!set.empty);
419     }
420 
421     /**
422      * Removes all elements.
423      */
424     void clear()
425     {
426         this.data.clear();
427     }
428 
429     ///
430     @nogc nothrow pure @safe unittest
431     {
432         Set!int set;
433         set.insert(5);
434         assert(!set.empty);
435         set.clear();
436         assert(set.empty);
437     }
438 
439     /**
440      * Returns current bucket count in the container.
441      *
442      * Bucket count equals to the number of the elements can be saved in the
443      * container in the best case scenario for key distribution, i.d. every key
444      * has a unique hash value. In a worse case the bucket count can be less
445      * than the number of elements stored in the container.
446      *
447      * Returns: Current bucket count.
448      *
449      * See_Also: $(D_PSYMBOL rehash).
450      */
451     @property size_t bucketCount() const
452     {
453         return this.data.bucketCount;
454     }
455 
456     /// The maximum number of buckets the container can have.
457     enum size_t maxBucketCount = primes[$ - 1];
458 
459     /**
460      * Inserts a new element.
461      *
462      * Params:
463      *  value = Element value.
464      *
465      * Returns: Amount of new elements inserted.
466      */
467     size_t insert()(ref T value)
468     {
469         auto e = ((ref v) @trusted => &this.data.insert(v))(value);
470         if (e.status != BucketStatus.used)
471         {
472             e.moveKey(value);
473             return 1;
474         }
475         return 0;
476     }
477 
478     size_t insert()(T value)
479     {
480         auto e = ((ref v) @trusted => &this.data.insert(v))(value);
481         if (e.status != BucketStatus.used)
482         {
483             e.key = value;
484             return 1;
485         }
486         return 0;
487     }
488 
489     ///
490     @nogc nothrow pure @safe unittest
491     {
492         Set!int set;
493         assert(8 !in set);
494 
495         assert(set.insert(8) == 1);
496         assert(set.length == 1);
497         assert(8 in set);
498 
499         assert(set.insert(8) == 0);
500         assert(set.length == 1);
501         assert(8 in set);
502 
503         assert(set.remove(8));
504         assert(set.insert(8) == 1);
505     }
506 
507     /**
508      * Inserts the value from a forward range into the set.
509      *
510      * Params:
511      *  R     = Range type.
512      *  range = Forward range.
513      *
514      * Returns: The number of new elements inserted.
515      */
516     size_t insert(R)(scope R range)
517     if (isForwardRange!R
518      && isImplicitlyConvertible!(ElementType!R, T)
519      && !isInfinite!R)
520     {
521         size_t count;
522         foreach (e; range)
523         {
524             count += insert(e);
525         }
526         return count;
527     }
528 
529     ///
530     @nogc nothrow pure @safe unittest
531     {
532         Set!int set;
533 
534         int[3] range = [2, 1, 2];
535 
536         assert(set.insert(range[]) == 2);
537         assert(1 in set);
538         assert(2 in set);
539     }
540 
541     /**
542      * Removes an element.
543      *
544      * Params:
545      *  value = Element value.
546      *
547      * Returns: Number of elements removed, which is in the container with
548      *          unique values `1` if an element existed, and `0` otherwise.
549      */
550     size_t remove(T value)
551     {
552         return this.data.remove(value);
553     }
554 
555     ///
556     @nogc nothrow pure @safe unittest
557     {
558         Set!int set;
559         set.insert(8);
560 
561         assert(8 in set);
562         assert(set.remove(8) == 1);
563         assert(set.remove(8) == 0);
564         assert(8 !in set);
565     }
566 
567     /**
568      * $(D_KEYWORD in) operator.
569      *
570      * Params:
571      *  U     = Type comparable with the element type, used for the lookup.
572      *  value = Element to be searched for.
573      *
574      * Returns: $(D_KEYWORD true) if the given element exists in the container,
575      *          $(D_KEYWORD false) otherwise.
576      */
577     bool opBinaryRight(string op : "in", U)(auto ref const U value) const
578     if (ifTestable!(U, a => T.init == a))
579     {
580         return value in this.data;
581     }
582 
583     ///
584     @nogc nothrow pure @safe unittest
585     {
586         Set!int set;
587 
588         assert(5 !in set);
589         set.insert(5);
590         assert(5 in set);
591         assert(8 !in set);
592     }
593 
594     /**
595      * Sets the number of buckets in the container to at least $(D_PARAM n)
596      * and rearranges all the elements according to their hash values.
597      *
598      * If $(D_PARAM n) is greater than the current $(D_PSYMBOL bucketCount)
599      * and lower than or equal to $(D_PSYMBOL maxBucketCount), a rehash is
600      * forced.
601      *
602      * If $(D_PARAM n) is greater than $(D_PSYMBOL maxBucketCount),
603      * $(D_PSYMBOL maxBucketCount) is used instead as a new number of buckets.
604      *
605      * If $(D_PARAM n) is less than or equal to the current
606      * $(D_PSYMBOL bucketCount), the function may have no effect.
607      *
608      * Rehashing is automatically performed whenever the container needs space
609      * to insert new elements.
610      *
611      * Params:
612      *  n = Minimum number of buckets.
613      */
614     void rehash(size_t n)
615     {
616         this.data.rehash(n);
617     }
618 
619     /**
620      * Returns a bidirectional range over the container.
621      *
622      * Returns: A bidirectional range that iterates over the container.
623      */
624     Range opIndex()
625     {
626         return typeof(return)(this.data.array[]);
627     }
628 
629     /// ditto
630     ConstRange opIndex() const
631     {
632         return typeof(return)(this.data.array[]);
633     }
634 
635     ///
636     @nogc nothrow pure @safe unittest
637     {
638         Set!int set;
639         assert(set[].empty);
640 
641         set.insert(8);
642         assert(!set[].empty);
643         assert(set[].front == 8);
644         assert(set[].back == 8);
645     }
646 }