1 /**
2  * Hash Set
3  * Copyright: © 2014 Economic Modeling Specialists, Intl.
4  * Authors: Brian Schott
5  * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6  */
7 
8 module containers.hashset;
9 
10 import containers.internal.hash : generateHash;
11 import containers.internal.node : shouldAddGCRange;
12 
13 /**
14  * Hash Set.
15  * Params:
16  *     T = the element type
17  *     hashFunction = the hash function to use on the elements
18  */
19 struct HashSet(T, alias hashFunction = generateHash!T, bool supportGC = shouldAddGCRange!T)
20 {
21 	this(this) @disable;
22 
23 	/**
24 	 * Constructs a HashSet with an initial bucket count of bucketCount.
25 	 * bucketCount must be a power of two.
26 	 */
27 	this(size_t bucketCount)
28 	in
29 	{
30 		assert ((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two");
31 	}
32 	body
33 	{
34 		initialize(bucketCount);
35 	}
36 
37 	~this()
38 	{
39 		import std.experimental.allocator.mallocator : Mallocator;
40 		import std.experimental.allocator : dispose;
41 		import core.memory : GC;
42 		static if (supportGC && shouldAddGCRange!T)
43 			GC.removeRange(buckets.ptr);
44 		Mallocator.instance.dispose(buckets);
45 	}
46 
47 	/**
48 	 * Removes all items from the set
49 	 */
50 	void clear()
51 	{
52 		foreach (ref bucket; buckets)
53 		{
54 			destroy(bucket);
55 			bucket = Bucket.init;
56 		}
57 		_length = 0;
58 	}
59 
60 	/**
61 	 * Removes the given item from the set.
62 	 * Returns: false if the value was not present
63 	 */
64 	bool remove(T value)
65 	{
66 		hash_t hash = hashFunction(value);
67 		size_t index = hashToIndex(hash);
68 		static if (storeHash)
69 			immutable bool removed = buckets[index].remove(ItemNode(hash, value));
70 		else
71 			immutable bool removed = buckets[index].remove(ItemNode(value));
72 		if (removed)
73 			--_length;
74 		return removed;
75 	}
76 
77 	/**
78 	 * Returns: true if value is contained in the set.
79 	 */
80 	bool contains(T value) inout nothrow
81 	{
82 		return (value in this) !is null;
83 	}
84 
85 	/**
86 	 * Supports $(B a in b) syntax
87 	 */
88 	inout(T)* opBinaryRight(string op)(T value) inout nothrow if (op == "in")
89 	{
90 		if (buckets.length == 0 || _length == 0)
91 			return null;
92 		hash_t hash = hashFunction(value);
93 		size_t index = hashToIndex(hash);
94 		return buckets[index].get(value, hash);
95 	}
96 
97 	/**
98 	 * Inserts the given item into the set.
99 	 * Params: value = the value to insert
100 	 * Returns: true if the value was actually inserted, or false if it was
101 	 *     already present.
102 	 */
103 	bool insert(T value)
104 	{
105 		if (buckets.length == 0)
106 			initialize(4);
107 		hash_t hash = hashFunction(value);
108 		size_t index = hashToIndex(hash);
109 		static if (storeHash)
110 			auto r = buckets[index].insert(ItemNode(hash, value));
111 		else
112 			auto r = buckets[index].insert(ItemNode(value));
113 		if (r)
114 			++_length;
115 		if (shouldRehash)
116 			rehash();
117 		return r;
118 	}
119 
120 	/// ditto
121 	alias put = insert;
122 
123 	/**
124 	 * Returns: true if the set has no items
125 	 */
126 	bool empty() const nothrow pure @nogc @safe @property
127 	{
128 		return _length == 0;
129 	}
130 
131 	/**
132 	 * Returns: the number of items in the set
133 	 */
134 	size_t length() const nothrow pure @nogc @safe @property
135 	{
136 		return _length;
137 	}
138 
139 	/**
140 	 * Forward range interface
141 	 */
142 	auto range(this This)() nothrow @nogc @trusted @property
143 	{
144 		return Range!(This)(&this);
145 	}
146 
147 	/// ditto
148 	alias opSlice = range;
149 
150 private:
151 
152 	import containers.internal.node : shouldAddGCRange, fatNodeCapacity;
153 	import containers.internal.storage_type : ContainerStorageType;
154 	import containers.internal.element_type : ContainerElementType;
155 	import containers.unrolledlist : UnrolledList;
156 	import std.traits : isBasicType, isPointer;
157 
158 	enum ITEMS_PER_NODE = fatNodeCapacity!(ItemNode.sizeof, 1, size_t, 128);
159 
160 	enum bool storeHash = !isBasicType!T;
161 
162 	void initialize(size_t bucketCount)
163 	{
164 		import std.experimental.allocator : makeArray;
165 		import std.experimental.allocator.mallocator : Mallocator;
166 		import core.memory : GC;
167 
168 		buckets = Mallocator.instance.makeArray!Bucket(bucketCount);
169 		static if (supportGC && shouldAddGCRange!T)
170 			GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof);
171 	}
172 
173 	static struct Range(ThisT)
174 	{
175 		this(ThisT* t)
176 		{
177 			foreach (i, ref bucket; t.buckets)
178 			{
179 				bucketIndex = i;
180 				if (bucket.root !is null)
181 				{
182 					currentNode = cast(Bucket.BucketNode*) bucket.root;
183 					break;
184 				}
185 			}
186 			this.t = t;
187 		}
188 
189 		bool empty() const nothrow @safe @nogc @property
190 		{
191 			return currentNode is null;
192 		}
193 
194 		ET front() nothrow @safe @nogc @property
195 		{
196 			return cast(ET) currentNode.items[nodeIndex].value;
197 		}
198 
199 		void popFront() nothrow @trusted @nogc
200 		{
201 			if (nodeIndex + 1 < currentNode.l)
202 			{
203 				++nodeIndex;
204 				return;
205 			}
206 			else
207 			{
208 				nodeIndex = 0;
209 				if (currentNode.next is null)
210 				{
211 					++bucketIndex;
212 					while (bucketIndex < t.buckets.length && t.buckets[bucketIndex].root is null)
213 						++bucketIndex;
214 					if (bucketIndex < t.buckets.length)
215 						currentNode = cast(Bucket.BucketNode*) t.buckets[bucketIndex].root;
216 					else
217 						currentNode = null;
218 				}
219 				else
220 				{
221 					currentNode = currentNode.next;
222 					assert(currentNode.l > 0);
223 				}
224 			}
225 		}
226 
227 	private:
228 		alias ET = ContainerElementType!(ThisT, T);
229 		ThisT* t;
230 		Bucket.BucketNode* currentNode;
231 		size_t bucketIndex;
232 		size_t nodeIndex;
233 	}
234 
235 	bool shouldRehash() const pure nothrow @safe @nogc
236 	{
237 		immutable float numberOfNodes = cast(float) _length / cast(float) ITEMS_PER_NODE;
238 		return (numberOfNodes / cast(float) buckets.length) > 0.75f;
239 	}
240 
241 	void rehash() @trusted
242 	{
243 		import std.experimental.allocator : makeArray, dispose;
244 		import std.experimental.allocator.mallocator : Mallocator;
245 		import core.memory : GC;
246 
247 		immutable size_t newLength = buckets.length << 1;
248 		Bucket[] oldBuckets = buckets;
249 		buckets = Mallocator.instance.makeArray!Bucket(newLength);
250 		assert (buckets);
251 		assert (buckets.length == newLength);
252 		static if (supportGC && shouldAddGCRange!T)
253 			GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof);
254 		foreach (ref const bucket; oldBuckets)
255 		{
256 			for (Bucket.BucketNode* node = cast(Bucket.BucketNode*) bucket.root; node !is null; node = node.next)
257 			{
258 				for (size_t i = 0; i < node.l; ++i)
259 				{
260 					static if (storeHash)
261 					{
262 						immutable size_t hash = node.items[i].hash;
263 						immutable size_t index = hashToIndex(hash);
264 						buckets[index].insert(ItemNode(hash, node.items[i].value));
265 					}
266 					else
267 					{
268 						immutable size_t hash = hashFunction(node.items[i].value);
269 						immutable size_t index = hashToIndex(hash);
270 						buckets[index].insert(ItemNode(node.items[i].value));
271 					}
272 				}
273 			}
274 		}
275 		static if (supportGC && shouldAddGCRange!T)
276 			GC.removeRange(oldBuckets.ptr);
277 		Mallocator.instance.dispose(oldBuckets);
278 	}
279 
280 	size_t hashToIndex(hash_t hash) const pure nothrow @safe
281 	in
282 	{
283 		assert (buckets.length > 0);
284 	}
285 	out (result)
286 	{
287 		import std.string : format;
288 		assert (result < buckets.length, "%d, %d".format(result, buckets.length));
289 	}
290 	body
291 	{
292 		return hash & (buckets.length - 1);
293 	}
294 
295 	static struct Bucket
296 	{
297 		this(this) @disable;
298 
299 		~this()
300 		{
301 			import std.experimental.allocator : dispose;
302 			import std.experimental.allocator.mallocator : Mallocator;
303 
304 			BucketNode* current = root;
305 			BucketNode* previous;
306 			while (true)
307 			{
308 				if (previous !is null)
309 					Mallocator.instance.dispose(previous);
310 				previous = current;
311 				if (current is null)
312 					break;
313 				current = current.next;
314 			}
315 		}
316 
317 		static struct BucketNode
318 		{
319 			ContainerStorageType!(T)* get(ItemNode n)
320 			{
321 				foreach (ref item; items[0 .. l])
322 				{
323 					static if (storeHash)
324 					{
325 						static if (isPointer!T)
326 						{
327 							if (item.hash == n.hash && *item.value == *n.value)
328 								return &item.value;
329 						}
330 						else
331 						{
332 							if (item.hash == n.hash && item.value == n.value)
333 								return &item.value;
334 						}
335 					}
336 					else
337 					{
338 						static if (isPointer!T)
339 						{
340 							if (*item.value == *n.value)
341 								return &item.value;
342 						}
343 						else
344 						{
345 							if (item.value == n.value)
346 								return &item.value;
347 						}
348 					}
349 				}
350 				return null;
351 			}
352 
353 			void insert(ItemNode n)
354 			{
355 				items[l] = n;
356 				++l;
357 			}
358 
359 			bool remove(ItemNode n)
360 			{
361 				import std.algorithm : SwapStrategy, remove;
362 
363 				foreach (size_t i, ref node; items[0 .. l])
364 				{
365 					static if (storeHash)
366 					{
367 						static if (isPointer!T)
368 							immutable bool matches = node.hash == n.hash && *node.value == *n.value;
369 						else
370 							immutable bool matches = node.hash == n.hash && node.value == n.value;
371 					}
372 					else
373 					{
374 						static if (isPointer!T)
375 							immutable bool matches = *node.value == *n.value;
376 						else
377 							immutable bool matches = node.value == n.value;
378 					}
379 					if (matches)
380 					{
381 						items[0 .. l].remove!(SwapStrategy.unstable)(i);
382 						l--;
383 						return true;
384 					}
385 				}
386 				return false;
387 			}
388 
389 			BucketNode* next;
390 			size_t l;
391 			ItemNode[ITEMS_PER_NODE] items;
392 		}
393 
394 		bool insert(ItemNode n)
395 		{
396 			import std.experimental.allocator : make;
397 			import std.experimental.allocator.mallocator : Mallocator;
398 
399 			for (BucketNode* current = root; current !is null; current = current.next)
400 			{
401 				if (current.get(n) !is null)
402 					return false;
403 				if (current.l < current.items.length)
404 				{
405 					current.insert(n);
406 					return true;
407 				}
408 			}
409 			BucketNode* newNode = Mallocator.instance.make!BucketNode();
410 			newNode.insert(n);
411 			newNode.next = root;
412 			root = newNode;
413 			return true;
414 		}
415 
416 		bool remove(ItemNode n)
417 		{
418 			import std.experimental.allocator : dispose;
419 			import std.experimental.allocator.mallocator : Mallocator;
420 
421 			BucketNode* current = root;
422 			BucketNode* previous;
423 			while (current !is null)
424 			{
425 				immutable removed = current.remove(n);
426 				if (removed)
427 				{
428 					if (current.l == 0)
429 					{
430 						if (previous !is null)
431 							previous.next = current.next;
432 						else
433 							root = current.next;
434 						Mallocator.instance.dispose(current);
435 					}
436 					return true;
437 				}
438 				previous = current;
439 				current = current.next;
440 			}
441 			return false;
442 		}
443 
444 		inout(T)* get(T value, size_t hash) inout
445 		{
446 			for (BucketNode* current = cast(BucketNode*) root; current !is null; current = current.next)
447 			{
448 				static if (storeHash)
449 					auto v = current.get(ItemNode(hash, value));
450 				else
451 					auto v = current.get(ItemNode(value));
452 				if (v !is null)
453 					return cast(typeof(return)) v;
454 			}
455 			return null;
456 		}
457 
458 		BucketNode* root;
459 	}
460 
461 	static struct ItemNode
462 	{
463 		bool opEquals(ref const T v) const
464 		{
465 			static if (isPointer!T)
466 				return *v == *value;
467 			else
468 				return v == value;
469 		}
470 
471 		bool opEquals(ref const ItemNode other) const
472 		{
473 			static if (storeHash)
474 				if (other.hash != hash)
475 					return false;
476 			static if (isPointer!T)
477 				return *other.value == *value;
478 			else
479 				return other.value == value;
480 		}
481 
482 		static if (storeHash)
483 			hash_t hash;
484 		ContainerStorageType!T value;
485 	}
486 
487 	Bucket[] buckets;
488 	size_t _length;
489 }
490 
491 ///
492 unittest
493 {
494 	import std.array : array;
495 	import std.algorithm : canFind;
496 	import std.uuid : randomUUID;
497 
498 	auto s = HashSet!string(16);
499 	assert(!s.contains("nonsense"));
500 	assert(s.put("test"));
501 	assert(s.contains("test"));
502 	assert(!s.put("test"));
503 	assert(s.contains("test"));
504 	assert(s.length == 1);
505 	assert(!s.contains("nothere"));
506 	s.put("a");
507 	s.put("b");
508 	s.put("c");
509 	s.put("d");
510 	string[] strings = s.range.array;
511 	assert(strings.canFind("a"));
512 	assert(strings.canFind("b"));
513 	assert(strings.canFind("c"));
514 	assert(strings.canFind("d"));
515 	assert(strings.canFind("test"));
516 	assert(*("a" in s) == "a");
517 	assert(*("b" in s) == "b");
518 	assert(*("c" in s) == "c");
519 	assert(*("d" in s) == "d");
520 	assert(*("test" in s) == "test");
521 	assert(strings.length == 5);
522 	assert(s.remove("test"));
523 	assert(s.length == 4);
524 	s.clear();
525 	assert(s.length == 0);
526 	assert(s.empty);
527 	s.put("abcde");
528 	assert(s.length == 1);
529 	foreach (i; 0 .. 10_000)
530 	{
531 		s.put(randomUUID().toString);
532 	}
533 	assert(s.length == 10_001);
534 
535 	// Make sure that there's no range violation slicing an empty set
536 	HashSet!int e;
537 	foreach (i; e[])
538 		assert(i > 0);
539 
540 	enum MAGICAL_NUMBER = 600_000;
541 
542 	HashSet!int f;
543 	foreach (i; 0 .. MAGICAL_NUMBER)
544 		assert(f.insert(i));
545 	import std.range:walkLength;
546 	assert(f.length == f[].walkLength);
547 	foreach (i; 0 .. MAGICAL_NUMBER)
548 		assert(i in f);
549 	foreach (i; 0 .. MAGICAL_NUMBER)
550 		assert(f.remove(i));
551 	foreach (i; 0 .. MAGICAL_NUMBER)
552 		assert(!f.remove(i));
553 
554 	HashSet!int g;
555 	foreach (i; 0 .. MAGICAL_NUMBER)
556 		assert(g.insert(i));
557 
558 	static struct AStruct
559 	{
560 		int a;
561 		int b;
562 	}
563 
564 	HashSet!(AStruct*, a => a.a) fred;
565 	fred.insert(new AStruct(10, 10));
566 	auto h = new AStruct(10, 10);
567 	assert(h in fred);
568 }