1 /**
2  * Hash Map
3  * Copyright: © 2015 Economic Modeling Specialists, Intl.
4  * Authors: Brian Schott
5  * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6  */
7 
8 module containers.hashmap;
9 
10 private import containers.internal.hash : generateHash;
11 private import containers.internal.node : shouldAddGCRange;
12 private import std.experimental.allocator.mallocator : Mallocator;
13 
14 /**
15  * Associative array / hash map.
16  * Params:
17  *     K = the key type
18  *     V = the value type
19  *     Allocator = the allocator type to use. Defaults to `Mallocator`
20  *     hashFunction = the hash function to use on the keys
21  *     supportGC = true if the container should support holding references to
22  *         GC-allocated memory.
23  */
24 struct HashMap(K, V, Allocator = Mallocator, alias hashFunction = generateHash!K,
25 	bool supportGC = shouldAddGCRange!K || shouldAddGCRange!V)
26 {
27 	this(this) @disable;
28 
29 	private import std.experimental.allocator.common : stateSize;
30 
31 	static if (stateSize!Allocator != 0)
32 	{
33 		this() @disable;
34 
35 		/**
36 		 * Use the given `allocator` for allocations.
37 		 */
38 		this(Allocator allocator)
39 		in
40 		{
41 			assert(allocator !is null, "Allocator must not be null");
42 		}
43 		body
44 		{
45 			this.allocator = allocator;
46 		}
47 
48 		/**
49 		 * Constructs an HashMap with an initial bucket count of bucketCount. bucketCount
50 		 * must be a power of two.
51 		 */
52 		this(size_t bucketCount, Allocator allocator)
53 		in
54 		{
55 			assert(allocator !is null, "Allocator must not be null");
56 			assert((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two");
57 		}
58 		body
59 		{
60 			this.allocator = allocator;
61 			initialize(bucketCount);
62 		}
63 
64 		invariant
65 		{
66 			assert(allocator !is null);
67 		}
68 	}
69 	else
70 	{
71 		/**
72 		 * Constructs an HashMap with an initial bucket count of bucketCount. bucketCount
73 		 * must be a power of two.
74 		 */
75 		this(size_t bucketCount)
76 		in
77 		{
78 			assert((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two");
79 		}
80 		body
81 		{
82 			initialize(bucketCount);
83 		}
84 	}
85 
86 	~this()
87 	{
88 		import std.experimental.allocator : dispose;
89 		static if (useGC)
90 			GC.removeRange(buckets.ptr);
91 		allocator.dispose(buckets);
92 	}
93 
94 	/**
95 	 * Supports $(B aa[key]) syntax.
96 	 */
97 	auto opIndex(this This)(K key)
98 	{
99 		import std.conv : text;
100 
101 		alias CET = ContainerElementType!(This, V);
102 
103 		if (buckets.length == 0)
104 			throw new Exception("'" ~ text(key) ~ "' not found in HashMap");
105 		size_t hash = generateHash(key);
106 		size_t index = hashToIndex(hash);
107 		foreach (r; buckets[index].range)
108 		{
109 			static if (storeHash)
110 			{
111 				if (r.hash == hash && r == key)
112 					return cast(CET) r.value;
113 			}
114 			else
115 			{
116 				if (r == key)
117 					return cast(CET) r.value;
118 			}
119 		}
120 		throw new Exception("'" ~ text(key) ~ "' not found in HashMap");
121 	}
122 
123 	/**
124 	 * Gets the value for the given key, or returns `defaultValue` if the given
125 	 * key is not present.
126 	 *
127 	 * Params:
128 	 *     key = the key to look up
129 	 *     value = the default value
130 	 * Returns: the value indexed by `key`, if present, or `defaultValue` otherwise.
131 	 */
132 	auto get(this This)(K key, lazy V defaultValue)
133 	{
134 		alias CET = ContainerElementType!(This, V);
135 
136 		if (_length == 0)
137 			return defaultValue;
138 		size_t hash = generateHash(key);
139 		size_t index = hashToIndex(hash);
140 		foreach (r; buckets[index].range)
141 		{
142 			static if (storeHash)
143 			{
144 				if (r.hash == hash && r == key)
145 					return cast(CET) r.value;
146 			}
147 			else
148 			{
149 				if (r == key)
150 					return cast(CET) r.value;
151 			}
152 		}
153 		return defaultValue;
154 	}
155 
156 	/**
157 	 * Supports $(B aa[key] = value;) syntax.
158 	 */
159 	void opIndexAssign(V value, K key)
160 	{
161 		insert(key, value);
162 	}
163 
164 	/**
165 	 * Supports $(B key in aa) syntax.
166 	 */
167 	bool opBinaryRight(string op)(K key) const nothrow if (op == "in")
168 	{
169 		if (_length == 0)
170 			return false;
171 		size_t hash = generateHash(key);
172 		size_t index = hashToIndex(hash);
173 		foreach (ref node; buckets[index].range)
174 		{
175 			static if (storeHash)
176 			{
177 				if (node.hash == hash && node == key)
178 					return true;
179 			}
180 			else
181 			{
182 				if (node == key)
183 					return true;
184 			}
185 		}
186 		return false;
187 	}
188 
189 	/**
190 	 * Removes the value associated with the given key
191 	 * Returns: true if a value was actually removed.
192 	 */
193 	bool remove(K key)
194 	{
195 		if (buckets.length == 0)
196 			return false;
197 		size_t hash = generateHash(key);
198 		size_t index = hashToIndex(hash);
199 		static if (storeHash)
200 			bool removed = buckets[index].remove(Node(hash, key));
201 		else
202 			bool removed = buckets[index].remove(Node(key));
203 		if (removed)
204 			_length--;
205 		return removed;
206 	}
207 
208 	/**
209 	 * Returns: the number of key/value pairs in this container.
210 	 */
211 	size_t length() const nothrow pure @property @safe @nogc
212 	{
213 		return _length;
214 	}
215 
216 	/**
217 	 * Returns: `true` if there are no items in this container.
218 	 */
219 	bool empty() const nothrow pure @property @safe @nogc
220 	{
221 		return _length == 0;
222 	}
223 
224 	/**
225 	 * Returns: a GC-allocated array filled with the keys contained in this map.
226 	 */
227 	K[] keys() const @property
228 	out(result)
229 	{
230 		assert (result.length == _length);
231 	}
232 	body
233 	{
234 		import std.array : appender;
235 		auto app = appender!(K[])();
236 		foreach (ref const bucket; buckets)
237 		{
238 			foreach (item; bucket.range)
239 				app.put(item.key);
240 		}
241 		return app.data;
242 	}
243 
244 	/**
245 	 * Returns: a GC-allocated array containing the values contained in this map.
246 	 */
247 	auto values(this This)() const @property
248 	out(result)
249 	{
250 		assert (result.length == _length);
251 	}
252 	body
253 	{
254 		import std.array : appender;
255 		auto app = appender!(ContainerElementType!(This, V)[])();
256 		foreach (ref const bucket; buckets)
257 		{
258 			foreach (item; bucket.range)
259 				app.put(item.value);
260 		}
261 		return app.data;
262 	}
263 
264 	/**
265 	 * Support for $(D foreach(key, value; aa) { ... }) syntax;
266 	 */
267 	int opApply(this This)(int delegate(ref K, ref V) del)
268 	{
269 		int result = 0;
270 		foreach (ref bucket; buckets)
271 		{
272 			foreach (ref node; bucket.range)
273 			{
274 				result = del(node.key, node.value);
275 				if (result != 0)
276 					return result;
277 			}
278 		}
279 		return result;
280 	}
281 
282 private:
283 
284 	import std.experimental.allocator : make;
285 	import std.traits : isBasicType;
286 	import containers.unrolledlist : UnrolledList;
287 	import containers.internal.storage_type : ContainerStorageType;
288 	import containers.internal.element_type : ContainerElementType;
289 	import containers.internal.mixins : AllocatorState;
290 	import core.memory : GC;
291 
292 	enum bool storeHash = !isBasicType!K;
293 	enum bool useGC = supportGC && (shouldAddGCRange!K || shouldAddGCRange!V);
294 
295 	void initialize(size_t bucketCount = 4)
296 	{
297 		import std.conv : emplace;
298 
299 		buckets = (cast(Bucket*) allocator.allocate(bucketCount * Bucket.sizeof))[0 .. bucketCount];
300 		static if (useGC)
301 			GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof);
302 		foreach (ref bucket; buckets)
303 		{
304 			static if (stateSize!Allocator == 0)
305 				emplace(&bucket);
306 			else
307 				emplace(&bucket, allocator);
308 		}
309 	}
310 
311 	void insert(K key, V value)
312 	{
313 		if (buckets.length == 0)
314 			initialize();
315 		size_t hash = generateHash(key);
316 		size_t index = hashToIndex(hash);
317 		foreach (ref item; buckets[index].range)
318 		{
319 			static if (storeHash)
320 			{
321 				if (item.hash == hash && item.key == key)
322 				{
323 					item.value = value;
324 					return;
325 				}
326 			}
327 			else
328 			{
329 				if (item.key == key)
330 				{
331 					item.value = value;
332 					return;
333 				}
334 			}
335 		}
336 		static if (storeHash)
337 			buckets[index].put(Node(hash, key, value));
338 		else
339 			buckets[index].put(Node(key, value));
340 		_length++;
341 		if (shouldRehash)
342 			rehash();
343 	}
344 
345 	/**
346 	 * Returns: true if the load factor has been exceeded
347 	 */
348 	bool shouldRehash() const pure nothrow @safe @nogc
349 	{
350 		return cast(float) _length / cast(float) buckets.length > 0.75;
351 	}
352 
353 	/**
354 	 * Rehash the map.
355 	 */
356 	void rehash() @trusted
357 	{
358 //		import std.experimental.allocator : make, dispose;
359 		import std.conv : emplace;
360 		immutable size_t newLength = buckets.length << 1;
361 		immutable size_t newSize = newLength * Bucket.sizeof;
362 		Bucket[] oldBuckets = buckets;
363 		assert (oldBuckets.ptr == buckets.ptr);
364 		buckets = cast(Bucket[]) allocator.allocate(newSize);
365 		static if (useGC)
366 			GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof);
367 		assert (buckets);
368 		assert (buckets.length == newLength);
369 		foreach (ref bucket; buckets)
370 		{
371 			static if (stateSize!Allocator == 0)
372 				emplace(&bucket);
373 			else
374 				emplace(&bucket, allocator);
375 		}
376 
377 		foreach (ref bucket; oldBuckets)
378 		{
379 			foreach (node; bucket.range)
380 			{
381 				static if (storeHash)
382 				{
383 					size_t index = hashToIndex(node.hash);
384 					buckets[index].put(Node(node.hash, node.key, node.value));
385 				}
386 				else
387 				{
388 					size_t hash = generateHash(node.key);
389 					size_t index = hashToIndex(hash);
390 					buckets[index].put(Node(node.key, node.value));
391 				}
392 			}
393 			typeid(typeof(bucket)).destroy(&bucket);
394 		}
395 		static if (useGC)
396 			GC.removeRange(oldBuckets.ptr);
397 		allocator.deallocate(cast(void[]) oldBuckets);
398 	}
399 
400 	size_t hashToIndex(size_t hash) const pure nothrow @safe @nogc
401 	in
402 	{
403 		assert (buckets.length > 0);
404 	}
405 	out (result)
406 	{
407 		assert (result < buckets.length);
408 	}
409 	body
410 	{
411 		return hash & (buckets.length - 1);
412 	}
413 
414 	size_t hashIndex(K key) const
415 	out (result)
416 	{
417 		assert (result < buckets.length);
418 	}
419 	body
420 	{
421 		return hashToIndex(generateHash(key));
422 	}
423 
424 	struct Node
425 	{
426 		bool opEquals(ref const K key) const
427 		{
428 			return key == this.key;
429 		}
430 
431 		bool opEquals(ref const Node n) const
432 		{
433 			static if (storeHash)
434 				return this.hash == n.hash && this.key == n.key;
435 			else
436 				return this.key == n.key;
437 		}
438 
439 		static if (storeHash)
440 			size_t hash;
441 		ContainerStorageType!K key;
442 		ContainerStorageType!V value;
443 	}
444 
445 	mixin AllocatorState!Allocator;
446 	alias Bucket = UnrolledList!(Node, Allocator, useGC);
447 	Bucket[] buckets;
448 	size_t _length;
449 }
450 
451 ///
452 unittest
453 {
454 	import std.uuid : randomUUID;
455 	auto hm = HashMap!(string, int)(16);
456 	assert (hm.length == 0);
457 	assert (!hm.remove("abc"));
458 	hm["answer"] = 42;
459 	assert (hm.length == 1);
460 	assert ("answer" in hm);
461 	hm.remove("answer");
462 	assert (hm.length == 0);
463 	hm["one"] = 1;
464 	hm["one"] = 1;
465 	assert (hm.length == 1);
466 	assert (hm["one"] == 1);
467 	foreach (i; 0 .. 1000)
468 	{
469 		hm[randomUUID().toString] = i;
470 	}
471 	assert (hm.length == 1001);
472 	assert (hm.keys().length == hm.length);
473 	assert (hm.values().length == hm.length);
474 	foreach (ref string k, ref int v; hm) {}
475 
476 	auto hm2 = HashMap!(char, char)(4);
477 	hm2['a'] = 'a';
478 
479 	HashMap!(int, int) hm3;
480 	assert (hm3.get(100, 20) == 20);
481 	hm3[100] = 1;
482 	assert (hm3.get(100, 20) == 1);
483 }