1 /**
2  * Open-Addressed Hash Set
3  * Copyright: © 2015 Economic Modeling Specialists, Intl.
4  * Authors: Brian Schott
5  * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6  */
7 module containers.openhashset;
8 
9 private import containers.internal.hash : generateHash;
10 private import containers.internal.node : shouldAddGCRange;
11 private import std.experimental.allocator.mallocator : Mallocator;
12 private import std.experimental.allocator.common : stateSize;
13 
14 /**
15  * Simple open-addressed hash set. Use this instead of HashSet when the size and
16  * quantity of the data to be inserted is small.
17  *
18  * Params:
19  *     T = the element type of the hash set
20  *     Allocator = the allocator to use. Defaults to `Mallocator`.
21  *     hashFunction = the hash function to use
22  *     supportGC = if true, calls to GC.addRange and GC.removeRange will be used
23  *         to ensure that the GC does not accidentally free memory owned by this
24  *         container.
25  */
26 struct OpenHashSet(T, Allocator = Mallocator,
27 	alias hashFunction = generateHash!T, bool supportGC = shouldAddGCRange!T)
28 {
29 	/**
30 	 * Disallow copy construction
31 	 */
32 	this(this) @disable;
33 
34 	static if (stateSize!Allocator != 0)
35 	{
36 		this() @disable;
37 
38 		/**
39 		 * Use the given `allocator` for allocations.
40 		 */
41 		this(Allocator allocator)
42 		in
43 		{
44 			assert(allocator !is null, "Allocator must not be null");
45 		}
46 		body
47 		{
48 			this.allocator = allocator;
49 		}
50 
51 		/**
52 		 * Initializes the hash set with the given initial capacity.
53 		 *
54 		 * Params:
55 		 *     initialCapacity = the initial capacity for the hash set
56 		 */
57 		this(size_t initialCapacity, Allocator allocator)
58 		in
59 		{
60 			assert(allocator !is null, "Allocator must not be null");
61 			assert ((initialCapacity & initialCapacity - 1) == 0, "initialCapacity must be a power of 2");
62 		}
63 		body
64 		{
65 			this.allocator = allocator;
66 			initialize(initialCapacity);
67 		}
68 	}
69 	else
70 	{
71 		/**
72 		 * Initializes the hash set with the given initial capacity.
73 		 *
74 		 * Params:
75 		 *     initialCapacity = the initial capacity for the hash set
76 		 */
77 		this(size_t initialCapacity)
78 		in
79 		{
80 			assert ((initialCapacity & initialCapacity - 1) == 0, "initialCapacity must be a power of 2");
81 		}
82 		body
83 		{
84 			initialize(initialCapacity);
85 		}
86 	}
87 
88 	~this()
89 	{
90 		static if (useGC)
91 			GC.removeRange(nodes.ptr);
92 		allocator.deallocate(nodes);
93 	}
94 
95 	/**
96 	 * Removes all items from the hash set.
97 	 */
98 	void clear()
99 	{
100 		if (empty)
101 			return;
102 		foreach (ref node; nodes)
103 		{
104 			typeid(typeof(node)).destroy(&node);
105 			node.used = false;
106 		}
107 		_length = 0;
108 	}
109 
110 	///
111 	bool empty() const pure nothrow @nogc @safe @property
112 	{
113 		return _length == 0;
114 	}
115 
116 	///
117 	size_t length() const pure nothrow @nogc @safe @property
118 	{
119 		return _length;
120 	}
121 
122 	/**
123 	 * Returns:
124 	 *     $(B true) if the hash set contains the given item, false otherwise.
125 	 */
126 	bool contains(T item) const nothrow @safe
127 	{
128 		if (empty)
129 			return false;
130 		immutable size_t hash = hashFunction(item);
131 		size_t index = toIndex(nodes, item, hash);
132 		if (index == size_t.max)
133 			return false;
134 		return nodes[index].hash == hash && nodes[index].data == item;
135 	}
136 
137 	/// ditto
138 	bool opBinaryRight(string op)(T item) inout nothrow if (op == "in")
139 	{
140 		return contains(item);
141 	}
142 
143 	/**
144 	 * Inserts the gien item into the set.
145 	 *
146 	 * Returns:
147 	 *     $(B true) if the item was inserted, false if it was already present.
148 	 */
149 	bool insert(T item)
150 	{
151 		if (nodes.length == 0)
152 			initialize(DEFAULT_INITIAL_CAPACITY);
153 		immutable size_t hash = hashFunction(item);
154 		size_t index = toIndex(nodes, item, hash);
155 		if (index == size_t.max)
156 		{
157 			grow();
158 			index = toIndex(nodes, item, hash);
159 		}
160 		else if (nodes[index].used && nodes[index].hash == hash && nodes[index].data == item)
161 			return false;
162 		nodes[index].used = true;
163 		nodes[index].hash = hash;
164 		nodes[index].data = item;
165 		_length++;
166 		return true;
167 	}
168 
169 	/// ditto
170 	bool opOpAssign(string op)(T item) if (op == "~")
171 	{
172 		return insert(item);
173 	}
174 
175 	/**
176 	 * Params:
177 	 *     item = the item to remove
178 	 * Returns:
179 	 *     $(B true) if the item was removed, $(B false) if it was not present
180 	 */
181 	bool remove(T item)
182 	{
183 		if (empty)
184 			return false;
185 		immutable size_t hash = hashFunction(item);
186 		size_t index = toIndex(nodes, item, hash);
187 		if (index == size_t.max)
188 			return false;
189 		nodes[index].used = false;
190 		destroy(nodes[index].data);
191 		_length--;
192 		return true;
193 	}
194 
195 	/**
196 	 * Returns:
197 	 *     A range over the set.
198 	 */
199 	auto range(this This)() nothrow pure @nogc @safe
200 	{
201 		return Range!(This)(nodes);
202 	}
203 
204 	/// ditto
205 	alias opSlice = range;
206 
207 private:
208 
209 	import containers.internal.storage_type : ContainerStorageType;
210 	import containers.internal.element_type : ContainerElementType;
211 	import containers.internal.mixins : AllocatorState;
212 	import core.memory : GC;
213 
214 	enum DEFAULT_INITIAL_CAPACITY = 8;
215 	enum bool useGC = supportGC && shouldAddGCRange!T;
216 
217 	static struct Range(ThisT)
218 	{
219 		ET front()
220 		{
221 			return cast(typeof(return)) nodes[index].data;
222 		}
223 
224 		bool empty() const pure nothrow @safe @nogc @property
225 		{
226 			return index >= nodes.length;
227 		}
228 
229 		void popFront() pure nothrow @safe @nogc
230 		{
231 			index++;
232 			while (index < nodes.length && !nodes[index].used)
233 				index++;
234 		}
235 
236 	private:
237 
238 		alias ET = ContainerElementType!(ThisT, T);
239 
240 		this(const Node[] nodes)
241 		{
242 			this.nodes = nodes;
243 			while (true)
244 			{
245 				if (index >= nodes.length || nodes[index].used)
246 					break;
247 				index++;
248 			}
249 		}
250 
251 		size_t index;
252 		const Node[] nodes;
253 	}
254 
255 	void grow()
256 	{
257 		immutable size_t newCapacity = nodes.length << 1;
258 		Node[] newNodes = (cast (Node*) allocator.allocate(newCapacity * Node.sizeof))
259 			[0 .. newCapacity];
260 		newNodes[] = Node.init;
261 		static if (useGC)
262 			GC.addRange(newNodes.ptr, newNodes.length, typeid(typeof(nodes)));
263 		foreach (ref node; nodes)
264 		{
265 			immutable size_t newIndex = toIndex(newNodes, node.data, node.hash);
266 			newNodes[newIndex] = node;
267 		}
268 		static if (useGC)
269 			GC.removeRange(nodes.ptr);
270 		allocator.deallocate(nodes);
271 		nodes = newNodes;
272 	}
273 
274 	void initialize(size_t nodeCount)
275 	{
276 		nodes = (cast (Node*) allocator.allocate(nodeCount * Node.sizeof))[0 .. nodeCount];
277 		nodes[] = Node.init;
278 		_length = 0;
279 	}
280 
281 	/**
282 	 * Returns:
283 	 *     size_t.max if the item was not found
284 	 */
285 	static size_t toIndex(const Node[] n, T item, size_t hash) nothrow @safe
286 	{
287 		immutable size_t bucketMask = (n.length - 1);
288 		immutable size_t index = hash & bucketMask;
289 		size_t i = index;
290 		while (n[i].used && n[i].data != item)
291 		{
292 			i = (i + 1) & bucketMask;
293 			if (i == index)
294 				return size_t.max;
295 		}
296 		return i;
297 	}
298 
299 	Node[] nodes;
300 	size_t _length;
301 	mixin AllocatorState!Allocator;
302 
303 	struct Node
304 	{
305 		ContainerStorageType!T data;
306 		bool used;
307 		size_t hash;
308 	}
309 }
310 
311 unittest
312 {
313 	import std..string : format;
314 	import std.algorithm : equal, sort;
315 	import std.range : iota;
316 	import std.array : array;
317 	OpenHashSet!int ints;
318 	assert (ints.empty);
319 	assert (equal(ints[], cast(int[]) []));
320 	ints.clear();
321 	ints.insert(10);
322 	assert (!ints.empty);
323 	assert (ints.length == 1);
324 	assert (equal(ints[], [10]));
325 	assert (ints.contains(10));
326 	ints.clear();
327 	assert (ints.length == 0);
328 	assert (ints.empty);
329 	ints ~= 0;
330 	assert (!ints.empty);
331 	assert (ints.length == 1);
332 	assert (equal(ints[], [0]));
333 	ints.clear();
334 	assert (ints.length == 0);
335 	assert (ints.empty);
336 	foreach (i; 0 .. 100)
337 		ints ~= i;
338 	assert (ints.length == 100, "%d".format(ints.length));
339 	assert (!ints.empty);
340 	foreach (i; 0 .. 100)
341 		assert (i in ints);
342 	assert (equal(ints[].array().sort(), iota(0, 100)));
343 	assert (ints.insert(10) == false);
344 	auto ohs = OpenHashSet!int(8);
345 	assert (!ohs.remove(1000));
346 	assert (ohs.contains(99) == false);
347 	assert (ohs.insert(10) == true);
348 	assert (ohs.insert(10) == false);
349 	foreach (i; 0 .. 7)
350 		ohs.insert(i);
351 	assert (ohs.contains(6));
352 	assert (!ohs.contains(100));
353 	assert (!ohs.remove(9999));
354 	assert (ohs.remove(0));
355 	assert (ohs.remove(1));
356 }