1 /**
2  * Hash Set
3  * Copyright: © 2014 Economic Modeling Specialists, Intl.
4  * Authors: Brian Schott
5  * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6  */
7 
8 module containers.hashset;
9 
10 import containers.internal.hash : generateHash;
11 import containers.internal.node : shouldAddGCRange;
12 
13 /**
14  * Hash Set.
15  * Params:
16  *     T = the element type
17  *     hashFunction = the hash function to use on the elements
18  */
19 struct HashSet(T, alias hashFunction = generateHash!T, bool supportGC = shouldAddGCRange!T)
20 {
21 	this(this) @disable;
22 
23 	/**
24 	 * Constructs a HashSet with an initial bucket count of bucketCount.
25 	 * bucketCount must be a power of two.
26 	 */
27 	this(size_t bucketCount)
28 	in
29 	{
30 		assert ((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two");
31 	}
32 	body
33 	{
34 		initialize(bucketCount);
35 	}
36 
37 	~this()
38 	{
39 		import std.allocator : Mallocator, deallocate;
40 		foreach (ref bucket; buckets)
41 			typeid(typeof(bucket)).destroy(&bucket);
42 		static if (supportGC && shouldAddGCRange!T)
43 			GC.removeRange(buckets.ptr);
44 		Mallocator.it.deallocate(buckets);
45 	}
46 
47 	/**
48 	 * Removes all items from the set
49 	 */
50 	void clear()
51 	{
52 		foreach (ref bucket; buckets)
53 			destroy(bucket);
54 		_length = 0;
55 	}
56 
57 	/**
58 	 * Removes the given item from the set.
59 	 * Returns: false if the value was not present
60 	 */
61 	bool remove(T value)
62 	{
63 		hash_t hash = generateHash(value);
64 		size_t index = hashToIndex(hash);
65 		if (buckets[index].empty)
66 			return false;
67 		static if (storeHash)
68 			bool removed = buckets[index].remove(Node(hash, value));
69 		else
70 			bool removed = buckets[index].remove(Node(value));
71 		if (removed)
72 			--_length;
73 		return removed;
74 	}
75 
76 	/**
77 	 * Returns: true if value is contained in the set.
78 	 */
79 	bool contains(T value) inout nothrow
80 	{
81 		if (buckets.length == 0)
82 			return false;
83 		hash_t hash = generateHash(value);
84 		size_t index = hashToIndex(hash);
85 		if (buckets[index].empty)
86 			return false;
87 		foreach (ref item; buckets[index].range)
88 		{
89 			static if (storeHash)
90 			{
91 				if (item.hash == hash && item.value == value)
92 					return true;
93 			}
94 			else
95 			{
96 				if (item.value == value)
97 					return true;
98 			}
99 		}
100 		return false;
101 	}
102 
103 	/**
104 	 * Supports $(B a in b) syntax
105 	 */
106 	bool opBinaryRight(string op)(T value) inout nothrow if (op == "in")
107 	{
108 		return contains(value);
109 	}
110 
111 	/**
112 	 * Inserts the given item into the set.
113 	 * Params: value = the value to insert
114 	 * Returns: true if the value was actually inserted, or false if it was
115 	 *     already present.
116 	 */
117 	bool insert(T value)
118 	{
119 		if (buckets.length == 0)
120 			initialize(4);
121 		hash_t hash = generateHash(value);
122 		size_t index = hashToIndex(hash);
123 		if (buckets[index].empty)
124 		{
125 	insert:
126 			static if (storeHash)
127 				buckets[index].insert(Node(hash, value));
128 			else
129 				buckets[index].insert(Node(value));
130 			++_length;
131 			if (shouldRehash())
132 				rehash();
133 			return true;
134 		}
135 		foreach (ref item; buckets[index].range)
136 		{
137 			static if (storeHash)
138 			{
139 				if (item.hash == hash && item.value == value)
140 					return false;
141 			}
142 			else
143 			{
144 				if (item.value == value)
145 					return false;
146 			}
147 		}
148 		goto insert;
149 	}
150 
151 	/// ditto
152 	alias put = insert;
153 
154 	/**
155 	 * Returns: true if the set has no items
156 	 */
157 	bool empty() inout nothrow pure @nogc @safe @property
158 	{
159 		return _length == 0;
160 	}
161 
162 	/**
163 	 * Returns: the number of items in the set
164 	 */
165 	size_t length() inout nothrow pure @nogc @safe @property
166 	{
167 		return _length;
168 	}
169 
170 	/**
171 	 * Forward range interface
172 	 */
173 	Range range() inout nothrow @nogc @safe @property
174 	{
175 		return Range(buckets);
176 	}
177 
178 	/// ditto
179 	alias opSlice = range;
180 
181 private:
182 
183 	import containers.internal.node : shouldAddGCRange;
184 	import containers.unrolledlist : UnrolledList;
185 	import std.allocator : Mallocator, allocate;
186 	import std.traits : isBasicType;
187 	import core.memory : GC;
188 
189 	enum bool storeHash = !isBasicType!T;
190 
191 	void initialize(size_t bucketCount)
192 	{
193 		import std.conv : emplace;
194 		buckets = cast(Bucket[]) Mallocator.it.allocate(
195 			bucketCount * Bucket.sizeof);
196 		assert (buckets.length == bucketCount);
197 		foreach (ref bucket; buckets)
198 			emplace(&bucket);
199 		static if (supportGC && shouldAddGCRange!T)
200 			GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof);
201 	}
202 
203 	static struct Range
204 	{
205 		this(const(Bucket)[] buckets)
206 		{
207 			this.buckets = buckets;
208 			if (buckets.length)
209 			{
210 				r = buckets[i].range;
211 				while (i < buckets.length && r.empty)
212 				{
213 					i++;
214 					r = buckets[i].range;
215 				}
216 			}
217 			else
218 				r = typeof(buckets[i].range()).init;
219 		}
220 
221 		bool empty() const nothrow @safe @nogc @property
222 		{
223 			return i >= buckets.length;
224 		}
225 
226 		T front() const nothrow @safe @nogc @property
227 		{
228 			return r.front.value;
229 		}
230 
231 		void popFront()
232 		{
233 			r.popFront();
234 			while (r.empty)
235 			{
236 				i++;
237 				if (i >= buckets.length)
238 					return;
239 				r = buckets[i].range;
240 			}
241 		}
242 
243 		const(Bucket)[] buckets;
244 		typeof(Bucket.range()) r;
245 		size_t i;
246 	}
247 
248 	alias Bucket = UnrolledList!(Node, supportGC);
249 
250 	bool shouldRehash() const pure nothrow @safe
251 	{
252 		return (cast(float) _length / cast(float) buckets.length) > 0.75;
253 	}
254 
255 	void rehash() @trusted
256 	{
257 		import std.allocator : allocate, deallocate;
258 		import std.conv : emplace;
259 		immutable size_t newLength = buckets.length << 1;
260 		immutable size_t newSize = newLength * Bucket.sizeof;
261 		Bucket[] oldBuckets = buckets;
262 		buckets = cast(Bucket[]) Mallocator.it.allocate(newSize);
263 		assert (buckets);
264 		assert (buckets.length == newLength);
265 		auto newAllocator = allocate!(HashSetAllocatorType!T)(Mallocator.it);
266 		assert (newAllocator);
267 		foreach (ref bucket; buckets)
268 			emplace(&bucket);
269 		static if (supportGC && shouldAddGCRange!T)
270 			GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof);
271 		foreach (ref const bucket; oldBuckets)
272 		{
273 			foreach (node; bucket.range)
274 			{
275 				static if (storeHash)
276 				{
277 					size_t index = hashToIndex(node.hash);
278 					buckets[index].put(Node(node.hash, node.value));
279 				}
280 				else
281 				{
282 					size_t hash = generateHash(node.value);
283 					size_t index = hashToIndex(hash);
284 					buckets[index].put(Node(node.value));
285 				}
286 			}
287 		}
288 		foreach (ref bucket; oldBuckets)
289 			typeid(Bucket).destroy(&bucket);
290 		static if (supportGC && shouldAddGCRange!T)
291 			GC.removeRange(oldBuckets.ptr);
292 		Mallocator.it.deallocate(oldBuckets);
293 	}
294 
295 	size_t hashToIndex(hash_t hash) const pure nothrow @safe
296 	in
297 	{
298 		assert (buckets.length > 0);
299 	}
300 	out (result)
301 	{
302 		import std.string : format;
303 		assert (result < buckets.length, "%d, %d".format(result, buckets.length));
304 	}
305 	body
306 	{
307 		return hash & (buckets.length - 1);
308 	}
309 
310 	struct Node
311 	{
312 		bool opEquals(ref const T v) const
313 		{
314 			return v == value;
315 		}
316 
317 		bool opEquals(ref const Node other) const
318 		{
319 			static if (storeHash)
320 				if (other.hash != hash)
321 					return false;
322 			return other.value == value;
323 		}
324 
325 		static if (storeHash)
326 			hash_t hash;
327 		T value;
328 	}
329 
330 	Bucket[] buckets;
331 	size_t _length;
332 }
333 
334 ///
335 unittest
336 {
337 	import std.array : array;
338 	import std.algorithm : canFind;
339 	import std.uuid : randomUUID;
340 	auto s = HashSet!string(16);
341 	assert (!s.contains("nonsense"));
342 	s.put("test");
343 	s.put("test");
344 	assert (s.contains("test"));
345 	assert (s.length == 1);
346 	assert (!s.contains("nothere"));
347 	s.put("a");
348 	s.put("b");
349 	s.put("c");
350 	s.put("d");
351 	string[] strings = s.range.array;
352 	assert (strings.canFind("a"));
353 	assert (strings.canFind("b"));
354 	assert (strings.canFind("c"));
355 	assert (strings.canFind("d"));
356 	assert (strings.canFind("test"));
357 	assert (strings.length == 5);
358 	assert (s.remove("test"));
359 	assert (s.length == 4);
360 	s.clear();
361 	assert (s.length == 0);
362 	assert (s.empty);
363 	s.put("abcde");
364 	assert (s.length == 1);
365 	foreach (i; 0 .. 10_000)
366 	{
367 		s.put(randomUUID().toString);
368 	}
369 	assert (s.length == 10_001);
370 
371 	// Make sure that there's no range violation slicing an empty set
372 	HashSet!int e;
373 	foreach (i; e[])
374 		assert (i > 0);
375 }
376 
377 private:
378 
379 template HashSetAllocatorType(T)
380 {
381 	import memory.allocators;
382 	enum size_t hashSetNodeSize = (void*).sizeof + T.sizeof + hash_t.sizeof;
383 	enum size_t hashSetBlockSize = 512;
384 	alias HashSetAllocatorType = NodeAllocator!(hashSetNodeSize, hashSetBlockSize);
385 }