1 /**
2  * Hash Set
3  * Copyright: © 2015 Economic Modeling Specialists, Intl.
4  * Authors: Brian Schott
5  * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6  */
7 
8 module containers.hashset;
9 
10 private import containers.internal.hash : generateHash;
11 private import containers.internal.node : shouldAddGCRange;
12 private import std.experimental.allocator.mallocator : Mallocator;
13 
14 /**
15  * Hash Set.
16  * Params:
17  *     T = the element type
18  *     Allocator = the allocator to use. Defaults to `Mallocator`.
19  *     hashFunction = the hash function to use on the elements
20  *     supportGC = true if the container should support holding references to
21  *         GC-allocated memory.
22  */
23 struct HashSet(T, Allocator = Mallocator, alias hashFunction = generateHash!T,
24 	bool supportGC = shouldAddGCRange!T)
25 {
26 	this(this) @disable;
27 
28 	private import std.experimental.allocator.common : stateSize;
29 
30 	static if (stateSize!Allocator != 0)
31 	{
32 		this() @disable;
33 
34 		/**
35 		 * Use the given `allocator` for allocations.
36 		 */
37 		this(Allocator allocator)
38 		in
39 		{
40 			assert(allocator !is null, "Allocator must not be null");
41 		}
42 		body
43 		{
44 			this.allocator = allocator;
45 		}
46 
47 		/**
48 		 * Constructs a HashSet with an initial bucket count of bucketCount.
49 		 * bucketCount must be a power of two.
50 		 */
51 		this(size_t bucketCount, Allocator allocator)
52 		in
53 		{
54 			assert(allocator !is null, "Allocator must not be null");
55 			assert ((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two");
56 		}
57 		body
58 		{
59 			initialize(bucketCount);
60 		}
61 	}
62 	else
63 	{
64 		/**
65 		 * Constructs a HashSet with an initial bucket count of bucketCount.
66 		 * bucketCount must be a power of two.
67 		 */
68 		this(size_t bucketCount)
69 		in
70 		{
71 			assert ((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two");
72 		}
73 		body
74 		{
75 			initialize(bucketCount);
76 		}
77 
78 	}
79 
80 	~this()
81 	{
82 		import std.experimental.allocator : dispose;
83 		import core.memory : GC;
84 		static if (useGC)
85 			GC.removeRange(buckets.ptr);
86 		allocator.dispose(buckets);
87 	}
88 
89 	/**
90 	 * Removes all items from the set
91 	 */
92 	void clear()
93 	{
94 		foreach (ref bucket; buckets)
95 		{
96 			destroy(bucket);
97 			bucket = Bucket.init;
98 		}
99 		_length = 0;
100 	}
101 
102 	/**
103 	 * Removes the given item from the set.
104 	 * Returns: false if the value was not present
105 	 */
106 	bool remove(T value)
107 	{
108 		hash_t hash = hashFunction(value);
109 		size_t index = hashToIndex(hash);
110 		static if (storeHash)
111 			immutable bool removed = buckets[index].remove(ItemNode(hash, value));
112 		else
113 			immutable bool removed = buckets[index].remove(ItemNode(value));
114 		if (removed)
115 			--_length;
116 		return removed;
117 	}
118 
119 	/**
120 	 * Returns: true if value is contained in the set.
121 	 */
122 	bool contains(T value) inout nothrow
123 	{
124 		return (value in this) !is null;
125 	}
126 
127 	/**
128 	 * Supports $(B a in b) syntax
129 	 */
130 	inout(T)* opBinaryRight(string op)(T value) inout nothrow if (op == "in")
131 	{
132 		if (buckets.length == 0 || _length == 0)
133 			return null;
134 		hash_t hash = hashFunction(value);
135 		size_t index = hashToIndex(hash);
136 		return buckets[index].get(value, hash);
137 	}
138 
139 	/**
140 	 * Inserts the given item into the set.
141 	 * Params: value = the value to insert
142 	 * Returns: true if the value was actually inserted, or false if it was
143 	 *     already present.
144 	 */
145 	bool insert(T value)
146 	{
147 		if (buckets.length == 0)
148 			initialize(4);
149 		hash_t hash = hashFunction(value);
150 		size_t index = hashToIndex(hash);
151 		static if (storeHash)
152 			auto r = buckets[index].insert(ItemNode(hash, value));
153 		else
154 			auto r = buckets[index].insert(ItemNode(value));
155 		if (r)
156 			++_length;
157 		if (shouldRehash)
158 			rehash();
159 		return r;
160 	}
161 
162 	/// ditto
163 	alias put = insert;
164 
165 	/**
166 	 * Returns: true if the set has no items
167 	 */
168 	bool empty() const nothrow pure @nogc @safe @property
169 	{
170 		return _length == 0;
171 	}
172 
173 	/**
174 	 * Returns: the number of items in the set
175 	 */
176 	size_t length() const nothrow pure @nogc @safe @property
177 	{
178 		return _length;
179 	}
180 
181 	/**
182 	 * Forward range interface
183 	 */
184 	auto range(this This)() nothrow @nogc @trusted @property
185 	{
186 		return Range!(This)(&this);
187 	}
188 
189 	/// ditto
190 	alias opSlice = range;
191 
192 private:
193 
194 	import containers.internal.node : shouldAddGCRange, fatNodeCapacity;
195 	import containers.internal.storage_type : ContainerStorageType;
196 	import containers.internal.element_type : ContainerElementType;
197 	import containers.internal.mixins : AllocatorState;
198 	import containers.unrolledlist : UnrolledList;
199 	import std.traits : isBasicType, isPointer;
200 
201 	enum ITEMS_PER_NODE = fatNodeCapacity!(ItemNode.sizeof, 1, size_t, 128);
202 
203 	enum bool storeHash = !isBasicType!T;
204 	enum bool useGC = supportGC && shouldAddGCRange!T;
205 
206 	void initialize(size_t bucketCount)
207 	{
208 		import std.experimental.allocator : makeArray;
209 		import core.memory : GC;
210 
211 		makeBuckets(bucketCount);
212 		static if (useGC)
213 			GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof);
214 	}
215 
216 	static struct Range(ThisT)
217 	{
218 		this(ThisT* t)
219 		{
220 			foreach (i, ref bucket; t.buckets)
221 			{
222 				bucketIndex = i;
223 				if (bucket.root !is null)
224 				{
225 					currentNode = cast(Bucket.BucketNode*) bucket.root;
226 					break;
227 				}
228 			}
229 			this.t = t;
230 		}
231 
232 		bool empty() const nothrow @safe @nogc @property
233 		{
234 			return currentNode is null;
235 		}
236 
237 		ET front() nothrow @safe @nogc @property
238 		{
239 			return cast(ET) currentNode.items[nodeIndex].value;
240 		}
241 
242 		void popFront() nothrow @trusted @nogc
243 		{
244 			if (nodeIndex + 1 < currentNode.l)
245 			{
246 				++nodeIndex;
247 				return;
248 			}
249 			else
250 			{
251 				nodeIndex = 0;
252 				if (currentNode.next is null)
253 				{
254 					++bucketIndex;
255 					while (bucketIndex < t.buckets.length && t.buckets[bucketIndex].root is null)
256 						++bucketIndex;
257 					if (bucketIndex < t.buckets.length)
258 						currentNode = cast(Bucket.BucketNode*) t.buckets[bucketIndex].root;
259 					else
260 						currentNode = null;
261 				}
262 				else
263 				{
264 					currentNode = currentNode.next;
265 					assert(currentNode.l > 0);
266 				}
267 			}
268 		}
269 
270 	private:
271 		alias ET = ContainerElementType!(ThisT, T);
272 		ThisT* t;
273 		Bucket.BucketNode* currentNode;
274 		size_t bucketIndex;
275 		size_t nodeIndex;
276 	}
277 
278 	void makeBuckets(size_t bucketCount)
279 	{
280 		import std.experimental.allocator : makeArray;
281 
282 		static if (stateSize!Allocator == 0)
283 			buckets = allocator.makeArray!Bucket(bucketCount);
284 		else
285 		{
286 			import std.conv:emplace;
287 
288 			buckets = cast(Bucket[]) allocator.allocate(Bucket.sizeof * bucketCount);
289 			foreach (ref bucket; buckets)
290 				emplace!Bucket(&bucket, allocator);
291 		}
292 	}
293 
294 	bool shouldRehash() const pure nothrow @safe @nogc
295 	{
296 		immutable float numberOfNodes = cast(float) _length / cast(float) ITEMS_PER_NODE;
297 		return (numberOfNodes / cast(float) buckets.length) > 0.75f;
298 	}
299 
300 	void rehash() @trusted
301 	{
302 		import std.experimental.allocator : makeArray, dispose;
303 		import core.memory : GC;
304 
305 		immutable size_t newLength = buckets.length << 1;
306 		Bucket[] oldBuckets = buckets;
307 		makeBuckets(newLength);
308 		assert (buckets);
309 		assert (buckets.length == newLength);
310 		static if (useGC)
311 			GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof);
312 		foreach (ref const bucket; oldBuckets)
313 		{
314 			for (Bucket.BucketNode* node = cast(Bucket.BucketNode*) bucket.root; node !is null; node = node.next)
315 			{
316 				for (size_t i = 0; i < node.l; ++i)
317 				{
318 					static if (storeHash)
319 					{
320 						immutable size_t hash = node.items[i].hash;
321 						immutable size_t index = hashToIndex(hash);
322 						buckets[index].insert(ItemNode(hash, node.items[i].value));
323 					}
324 					else
325 					{
326 						immutable size_t hash = hashFunction(node.items[i].value);
327 						immutable size_t index = hashToIndex(hash);
328 						buckets[index].insert(ItemNode(node.items[i].value));
329 					}
330 				}
331 			}
332 		}
333 		static if (useGC)
334 			GC.removeRange(oldBuckets.ptr);
335 		allocator.dispose(oldBuckets);
336 	}
337 
338 	size_t hashToIndex(hash_t hash) const pure nothrow @safe
339 	in
340 	{
341 		assert (buckets.length > 0);
342 	}
343 	out (result)
344 	{
345 		import std..string : format;
346 		assert (result < buckets.length, "%d, %d".format(result, buckets.length));
347 	}
348 	body
349 	{
350 		return hash & (buckets.length - 1);
351 	}
352 
353 	static struct Bucket
354 	{
355 		this(this) @disable;
356 
357 		static if (stateSize!Allocator != 0)
358 		{
359 			this(Allocator allocator)
360 			{
361 				this.allocator = allocator;
362 			}
363 			this() @disable;
364 		}
365 
366 		~this()
367 		{
368 			import std.experimental.allocator : dispose;
369 
370 			BucketNode* current = root;
371 			BucketNode* previous;
372 			while (true)
373 			{
374 				if (previous !is null)
375 					allocator.dispose(previous);
376 				previous = current;
377 				if (current is null)
378 					break;
379 				current = current.next;
380 			}
381 		}
382 
383 		static struct BucketNode
384 		{
385 			ContainerStorageType!(T)* get(ItemNode n)
386 			{
387 				foreach (ref item; items[0 .. l])
388 				{
389 					static if (storeHash)
390 					{
391 						static if (isPointer!T)
392 						{
393 							if (item.hash == n.hash && *item.value == *n.value)
394 								return &item.value;
395 						}
396 						else
397 						{
398 							if (item.hash == n.hash && item.value == n.value)
399 								return &item.value;
400 						}
401 					}
402 					else
403 					{
404 						static if (isPointer!T)
405 						{
406 							if (*item.value == *n.value)
407 								return &item.value;
408 						}
409 						else
410 						{
411 							if (item.value == n.value)
412 								return &item.value;
413 						}
414 					}
415 				}
416 				return null;
417 			}
418 
419 			void insert(ItemNode n)
420 			{
421 				items[l] = n;
422 				++l;
423 			}
424 
425 			bool remove(ItemNode n)
426 			{
427 				import std.algorithm : SwapStrategy, remove;
428 
429 				foreach (size_t i, ref node; items[0 .. l])
430 				{
431 					static if (storeHash)
432 					{
433 						static if (isPointer!T)
434 							immutable bool matches = node.hash == n.hash && *node.value == *n.value;
435 						else
436 							immutable bool matches = node.hash == n.hash && node.value == n.value;
437 					}
438 					else
439 					{
440 						static if (isPointer!T)
441 							immutable bool matches = *node.value == *n.value;
442 						else
443 							immutable bool matches = node.value == n.value;
444 					}
445 					if (matches)
446 					{
447 						items[0 .. l].remove!(SwapStrategy.unstable)(i);
448 						l--;
449 						return true;
450 					}
451 				}
452 				return false;
453 			}
454 
455 			BucketNode* next;
456 			size_t l;
457 			ItemNode[ITEMS_PER_NODE] items;
458 		}
459 
460 		bool insert(ItemNode n)
461 		{
462 			import std.experimental.allocator : make;
463 
464 			for (BucketNode* current = root; current !is null; current = current.next)
465 			{
466 				if (current.get(n) !is null)
467 					return false;
468 				if (current.l < current.items.length)
469 				{
470 					current.insert(n);
471 					return true;
472 				}
473 			}
474 			BucketNode* newNode = allocator.make!BucketNode();
475 			newNode.insert(n);
476 			newNode.next = root;
477 			root = newNode;
478 			return true;
479 		}
480 
481 		bool remove(ItemNode n)
482 		{
483 			import std.experimental.allocator : dispose;
484 
485 			BucketNode* current = root;
486 			BucketNode* previous;
487 			while (current !is null)
488 			{
489 				immutable removed = current.remove(n);
490 				if (removed)
491 				{
492 					if (current.l == 0)
493 					{
494 						if (previous !is null)
495 							previous.next = current.next;
496 						else
497 							root = current.next;
498 						allocator.dispose(current);
499 					}
500 					return true;
501 				}
502 				previous = current;
503 				current = current.next;
504 			}
505 			return false;
506 		}
507 
508 		inout(T)* get(T value, size_t hash) inout
509 		{
510 			for (BucketNode* current = cast(BucketNode*) root; current !is null; current = current.next)
511 			{
512 				static if (storeHash)
513 					auto v = current.get(ItemNode(hash, value));
514 				else
515 					auto v = current.get(ItemNode(value));
516 				if (v !is null)
517 					return cast(typeof(return)) v;
518 			}
519 			return null;
520 		}
521 
522 		BucketNode* root;
523 		mixin AllocatorState!Allocator;
524 	}
525 
526 	static struct ItemNode
527 	{
528 		bool opEquals(ref const T v) const
529 		{
530 			static if (isPointer!T)
531 				return *v == *value;
532 			else
533 				return v == value;
534 		}
535 
536 		bool opEquals(ref const ItemNode other) const
537 		{
538 			static if (storeHash)
539 				if (other.hash != hash)
540 					return false;
541 			static if (isPointer!T)
542 				return *other.value == *value;
543 			else
544 				return other.value == value;
545 		}
546 
547 		static if (storeHash)
548 			hash_t hash;
549 		ContainerStorageType!T value;
550 	}
551 
552 	mixin AllocatorState!Allocator;
553 	Bucket[] buckets;
554 	size_t _length;
555 }
556 
557 ///
558 unittest
559 {
560 	import std.array : array;
561 	import std.algorithm : canFind;
562 	import std.uuid : randomUUID;
563 
564 	auto s = HashSet!string(16);
565 	assert(!s.contains("nonsense"));
566 	assert(s.put("test"));
567 	assert(s.contains("test"));
568 	assert(!s.put("test"));
569 	assert(s.contains("test"));
570 	assert(s.length == 1);
571 	assert(!s.contains("nothere"));
572 	s.put("a");
573 	s.put("b");
574 	s.put("c");
575 	s.put("d");
576 	string[] strings = s.range.array;
577 	assert(strings.canFind("a"));
578 	assert(strings.canFind("b"));
579 	assert(strings.canFind("c"));
580 	assert(strings.canFind("d"));
581 	assert(strings.canFind("test"));
582 	assert(*("a" in s) == "a");
583 	assert(*("b" in s) == "b");
584 	assert(*("c" in s) == "c");
585 	assert(*("d" in s) == "d");
586 	assert(*("test" in s) == "test");
587 	assert(strings.length == 5);
588 	assert(s.remove("test"));
589 	assert(s.length == 4);
590 	s.clear();
591 	assert(s.length == 0);
592 	assert(s.empty);
593 	s.put("abcde");
594 	assert(s.length == 1);
595 	foreach (i; 0 .. 10_000)
596 	{
597 		s.put(randomUUID().toString);
598 	}
599 	assert(s.length == 10_001);
600 
601 	// Make sure that there's no range violation slicing an empty set
602 	HashSet!int e;
603 	foreach (i; e[])
604 		assert(i > 0);
605 
606 	enum MAGICAL_NUMBER = 600_000;
607 
608 	HashSet!int f;
609 	foreach (i; 0 .. MAGICAL_NUMBER)
610 		assert(f.insert(i));
611 	import std.range:walkLength;
612 	assert(f.length == f[].walkLength);
613 	foreach (i; 0 .. MAGICAL_NUMBER)
614 		assert(i in f);
615 	foreach (i; 0 .. MAGICAL_NUMBER)
616 		assert(f.remove(i));
617 	foreach (i; 0 .. MAGICAL_NUMBER)
618 		assert(!f.remove(i));
619 
620 	HashSet!int g;
621 	foreach (i; 0 .. MAGICAL_NUMBER)
622 		assert(g.insert(i));
623 
624 	static struct AStruct
625 	{
626 		int a;
627 		int b;
628 	}
629 
630 	HashSet!(AStruct*, Mallocator, a => a.a) fred;
631 	fred.insert(new AStruct(10, 10));
632 	auto h = new AStruct(10, 10);
633 	assert(h in fred);
634 }