1 /**
2  * Hash Set
3  * Copyright: © 2014 Economic Modeling Specialists, Intl.
4  * Authors: Brian Schott
5  * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6  */
7 
8 module containers.hashset;
9 
10 import containers.internal.hash : generateHash;
11 import containers.internal.node : shouldAddGCRange;
12 
13 /**
14  * Hash Set.
15  * Params:
16  *     T = the element type
17  *     hashFunction = the hash function to use on the elements
18  */
19 struct HashSet(T, alias hashFunction = generateHash!T, bool supportGC = shouldAddGCRange!T)
20 {
21 	this(this) @disable;
22 
23 	/**
24 	 * Constructs a HashSet with an initial bucket count of bucketCount.
25 	 * bucketCount must be a power of two.
26 	 */
27 	this(size_t bucketCount)
28 	in
29 	{
30 		assert ((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two");
31 	}
32 	body
33 	{
34 		initialize(bucketCount);
35 	}
36 
37 	~this()
38 	{
39 		import std.allocator : Mallocator, deallocate;
40 		foreach (ref bucket; buckets)
41 			typeid(typeof(bucket)).destroy(&bucket);
42 		static if (supportGC && shouldAddGCRange!T)
43 			GC.removeRange(buckets.ptr);
44 		Mallocator.it.deallocate(buckets);
45 	}
46 
47 	/**
48 	 * Removes all items from the set
49 	 */
50 	void clear()
51 	{
52 		foreach (ref bucket; buckets)
53 			destroy(bucket);
54 		_length = 0;
55 	}
56 
57 	/**
58 	 * Removes the given item from the set.
59 	 * Returns: false if the value was not present
60 	 */
61 	bool remove(T value)
62 	{
63 		hash_t hash = generateHash(value);
64 		size_t index = hashToIndex(hash);
65 		static if (storeHash)
66 			immutable removed = buckets[index].remove(Node(hash, value));
67 		else
68 			immutable removed = buckets[index].remove(Node(value));
69 		if (removed)
70 			--_length;
71 		return removed;
72 	}
73 
74 	/**
75 	 * Returns: true if value is contained in the set.
76 	 */
77 	bool contains(T value) inout nothrow
78 	{
79 		return (value in this) !is null;
80 	}
81 
82 	/**
83 	 * Supports $(B a in b) syntax
84 	 */
85 	inout(T)* opBinaryRight(string op)(T value) inout nothrow if (op == "in")
86 	{
87 		if (buckets.length == 0 || _length == 0)
88 			return null;
89 		hash_t hash = generateHash(value);
90 		size_t index = hashToIndex(hash);
91 		return buckets[index].get(value, hash);
92 	}
93 
94 	/**
95 	 * Inserts the given item into the set.
96 	 * Params: value = the value to insert
97 	 * Returns: true if the value was actually inserted, or false if it was
98 	 *     already present.
99 	 */
100 	bool insert(T value)
101 	{
102 		if (buckets.length == 0)
103 			initialize(4);
104 		hash_t hash = generateHash(value);
105 		size_t index = hashToIndex(hash);
106 		static if (storeHash)
107 			auto r = buckets[index].insert(Node(hash, value));
108 		else
109 			auto r = buckets[index].insert(Node(value));
110 		if (r)
111 			++_length;
112 		return r;
113 	}
114 
115 	/// ditto
116 	alias put = insert;
117 
118 	/**
119 	 * Returns: true if the set has no items
120 	 */
121 	bool empty() const nothrow pure @nogc @safe @property
122 	{
123 		return _length == 0;
124 	}
125 
126 	/**
127 	 * Returns: the number of items in the set
128 	 */
129 	size_t length() const nothrow pure @nogc @safe @property
130 	{
131 		return _length;
132 	}
133 
134 	/**
135 	 * Forward range interface
136 	 */
137 	auto range(this This)() nothrow @nogc @trusted @property
138 	{
139 		return Range!(This)(&this);
140 	}
141 
142 	/// ditto
143 	alias opSlice = range;
144 
145 private:
146 
147 	import containers.internal.node : shouldAddGCRange;
148 	import containers.internal.storage_type : ContainerStorageType;
149 	import containers.internal.element_type : ContainerElementType;
150 	import containers.unrolledlist : UnrolledList;
151 	import core.memory : GC;
152 	import std.allocator : allocate, deallocate, Mallocator;
153 	import std.conv : emplace;
154 	import std.traits : isBasicType;
155 
156 	enum bool storeHash = !isBasicType!T;
157 
158 	void initialize(size_t bucketCount)
159 	{
160 		buckets = cast(Bucket[]) Mallocator.it.allocate(bucketCount * Bucket.sizeof);
161 		assert (buckets.length == bucketCount);
162 		foreach (ref bucket; buckets)
163 			emplace(&bucket);
164 		static if (supportGC && shouldAddGCRange!T)
165 			GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof);
166 	}
167 
168 	static struct Range(ThisT)
169 	{
170 		this(ThisT* t)
171 		{
172 			foreach (i, ref bucket; t.buckets)
173 			{
174 				bucketIndex = i;
175 				if (bucket.root !is null)
176 				{
177 					currentNode = cast(Bucket.BucketNode*) bucket.root;
178 					break;
179 				}
180 			}
181 			this.t = t;
182 		}
183 
184 		bool empty() const nothrow @safe @nogc @property
185 		{
186 			return currentNode is null;
187 		}
188 
189 		ET front() nothrow @safe @nogc @property
190 		{
191 			return cast(ET) currentNode.items[nodeIndex].value;
192 		}
193 
194 		void popFront() nothrow @trusted @nogc
195 		{
196 			if (nodeIndex + 1 < currentNode.l)
197 			{
198 				++nodeIndex;
199 				return;
200 			}
201 			else
202 			{
203 				if (currentNode.next is null)
204 				{
205 					++bucketIndex;
206 					while (bucketIndex < t.buckets.length && t.buckets[bucketIndex].root is null)
207 						++bucketIndex;
208 					nodeIndex = 0;
209 					if (bucketIndex < t.buckets.length)
210 						currentNode = cast(Bucket.BucketNode*) t.buckets[bucketIndex].root;
211 					else
212 						currentNode = null;
213 				}
214 				else
215 					currentNode = currentNode.next;
216 			}
217 		}
218 
219 	private:
220 		alias ET = ContainerElementType!(ThisT, T);
221 		ThisT* t;
222 		Bucket.BucketNode* currentNode;
223 		size_t bucketIndex;
224 		size_t nodeIndex;
225 	}
226 
227 	bool shouldRehash() const pure nothrow @safe
228 	{
229 		return (cast(float) _length / cast(float) buckets.length) > 0.75;
230 	}
231 
232 	void rehash() @trusted
233 	{
234 		immutable size_t newLength = buckets.length << 1;
235 		immutable size_t newSize = newLength * Bucket.sizeof;
236 		Bucket[] oldBuckets = buckets;
237 		buckets = cast(Bucket[]) Mallocator.it.allocate(newSize);
238 		assert (buckets);
239 		assert (buckets.length == newLength);
240 		foreach (ref bucket; buckets)
241 			emplace(&bucket);
242 		static if (supportGC && shouldAddGCRange!T)
243 			GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof);
244 		foreach (ref const bucket; oldBuckets)
245 		{
246 			for (Bucket.BucketNode* node = cast(Bucket.BucketNode*) bucket.root; node !is null; node = node.next)
247 			{
248 				for (size_t i = 0; i < node.l; ++i)
249 				{
250 					static if (storeHash)
251 					{
252 						immutable size_t index = hashToIndex(node.items[i].hash);
253 						buckets[index].insert(Node(node.items[i].hash, node.items[i].value));
254 					}
255 					else
256 					{
257 						immutable size_t hash = generateHash(node.items[i].value);
258 						immutable size_t index = hashToIndex(hash);
259 						buckets[index].insert(Node(node.items[i].value));
260 					}
261 				}
262 			}
263 		}
264 		foreach (ref bucket; oldBuckets)
265 			typeid(Bucket).destroy(&bucket);
266 		static if (supportGC && shouldAddGCRange!T)
267 			GC.removeRange(oldBuckets.ptr);
268 		Mallocator.it.deallocate(oldBuckets);
269 	}
270 
271 	size_t hashToIndex(hash_t hash) const pure nothrow @safe
272 	in
273 	{
274 		assert (buckets.length > 0);
275 	}
276 	out (result)
277 	{
278 		import std.string : format;
279 		assert (result < buckets.length, "%d, %d".format(result, buckets.length));
280 	}
281 	body
282 	{
283 		return hash & (buckets.length - 1);
284 	}
285 
286 	static struct Bucket
287 	{
288 		~this()
289 		{
290 			BucketNode* current = root;
291 			BucketNode* previous;
292 			while (true)
293 			{
294 				if (previous !is null)
295 				{
296 					typeid(BucketNode).destroy(&previous);
297 					deallocate(Mallocator.it, previous);
298 				}
299 				previous = current;
300 				if (current is null)
301 					break;
302 				current = current.next;
303 			}
304 		}
305 
306 		static struct BucketNode
307 		{
308 			ContainerStorageType!(T)* get(Node n)
309 			{
310 				for (size_t i = 0; i < l; ++i)
311 				{
312 					static if (storeHash)
313 					{
314 						if (items[i].hash == n.hash && items[i].value == n.value)
315 							return &items[i].value;
316 					}
317 					else
318 					{
319 						if (items[i].value == n.value)
320 							return &items[i].value;
321 					}
322 				}
323 				return null;
324 			}
325 
326 			void insert(Node n)
327 			{
328 				items[l] = n;
329 				++l;
330 			}
331 
332 			bool remove(Node n)
333 			{
334 				import std.algorithm : SwapStrategy, remove;
335 
336 				foreach (size_t i, ref node; items)
337 				{
338 					static if (storeHash)
339 						immutable bool matches = node.hash == n.hash && node.value == n.value;
340 					else
341 						immutable bool matches = node.value == n.value;
342 					if (matches)
343 					{
344 						items[].remove!(SwapStrategy.unstable)(i);
345 						l--;
346 						return true;
347 					}
348 				}
349 				return false;
350 			}
351 
352 			import containers.internal.node : fatNodeCapacity;
353 
354 			BucketNode* next;
355 			size_t l;
356 			Node[fatNodeCapacity!(Node.sizeof, 1, size_t, 128)] items;
357 		}
358 
359 		bool insert(Node n)
360 		{
361 			BucketNode* prev;
362 			BucketNode* current;
363 			for (current = root; current !is null; prev = current)
364 			{
365 				if (current.l >= current.items.length)
366 				{
367 					current = current.next;
368 					continue;
369 				}
370 				if (current.get(n))
371 					return false;
372 				current.insert(n);
373 				return true;
374 			}
375 			BucketNode* newNode = cast(BucketNode*) Mallocator.it.allocate(BucketNode.sizeof);
376 			*newNode = BucketNode.init;
377 			newNode.insert(n);
378 			if (prev is null)
379 				root = newNode;
380 			else
381 				prev.next = newNode;
382 			return true;
383 		}
384 
385 		bool remove(Node n)
386 		{
387 			import std.allocator : deallocate;
388 
389 			BucketNode* current = root;
390 			BucketNode* previous;
391 			while (current !is null)
392 			{
393 				immutable removed = current.remove(n);
394 				if (removed)
395 				{
396 					if (current.l == 0)
397 					{
398 						if (previous !is null)
399 							previous.next = current.next;
400 						else
401 							root = null;
402 						typeid(BucketNode).destroy(&current);
403 						deallocate(Mallocator.it, current);
404 					}
405 					return true;
406 				}
407 				previous = current;
408 				current = current.next;
409 			}
410 			return false;
411 		}
412 
413 		inout(T)* get(T value, size_t hash) inout
414 		{
415 			for (BucketNode* current = cast(BucketNode*) root; current !is null; current = current.next)
416 			{
417 				static if (storeHash)
418 					auto v = current.get(Node(hash, value));
419 				else
420 					auto v = current.get(Node(value));
421 				if (v !is null)
422 					return cast(typeof(return)) v;
423 			}
424 			return null;
425 		}
426 
427 		BucketNode* root;
428 	}
429 
430 	struct Node
431 	{
432 		bool opEquals(ref const T v) const
433 		{
434 			return v == value;
435 		}
436 
437 		bool opEquals(ref const Node other) const
438 		{
439 			static if (storeHash)
440 				if (other.hash != hash)
441 					return false;
442 			return other.value == value;
443 		}
444 
445 		static if (storeHash)
446 			hash_t hash;
447 		ContainerStorageType!T value;
448 	}
449 
450 	Bucket[] buckets;
451 	size_t _length;
452 }
453 
454 ///
455 unittest
456 {
457 	import std.array : array;
458 	import std.algorithm : canFind;
459 	import std.uuid : randomUUID;
460 
461 	auto s = HashSet!string(16);
462 	assert(!s.contains("nonsense"));
463 	assert(s.put("test"));
464 	assert(s.contains("test"));
465 	assert(!s.put("test"));
466 	assert(s.contains("test"));
467 	assert(s.length == 1);
468 	assert(!s.contains("nothere"));
469 	s.put("a");
470 	s.put("b");
471 	s.put("c");
472 	s.put("d");
473 	string[] strings = s.range.array;
474 	assert(strings.canFind("a"));
475 	assert(strings.canFind("b"));
476 	assert(strings.canFind("c"));
477 	assert(strings.canFind("d"));
478 	assert(strings.canFind("test"));
479 	assert(*("a" in s) == "a");
480 	assert(*("b" in s) == "b");
481 	assert(*("c" in s) == "c");
482 	assert(*("d" in s) == "d");
483 	assert(*("test" in s) == "test");
484 	assert(strings.length == 5);
485 	assert(s.remove("test"));
486 	assert(s.length == 4);
487 	s.clear();
488 	assert(s.length == 0);
489 	assert(s.empty);
490 	s.put("abcde");
491 	assert(s.length == 1);
492 	foreach (i; 0 .. 10_000)
493 	{
494 		s.put(randomUUID().toString);
495 	}
496 	assert(s.length == 10_001);
497 
498 	// Make sure that there's no range violation slicing an empty set
499 	HashSet!int e;
500 	foreach (i; e[])
501 		assert(i > 0);
502 
503 	HashSet!int f;
504 	foreach (i; 0 .. 100)
505 		assert(f.insert(i));
506 	foreach (i; 0 .. 100)
507 		assert(f.remove(i));
508 	foreach (i; 0 .. 100)
509 		assert(!f.remove(i));
510 
511 	HashSet!int g;
512 	foreach (i; 0 .. 100)
513 		assert(g.insert(i));
514 }