1 /**
2  * Open-Addressed Hash Set
3  * Copyright: © 2014 Economic Modeling Specialists, Intl.
4  * Authors: Brian Schott
5  * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6  */
7 
8 module containers.openhashset;
9 
10 import containers.internal.hash : generateHash;
11 import containers.internal.node : shouldAddGCRange;
12 
13 /**
14  * Simple open-addressed hash set. Use this instead of HashSet when the size and
15  * quantity of the data to be inserted is small.
16  *
17  * Params:
18  *     T = the element type of the hash set
19  *     hashFunction = the hash function to use
20  *     supportGC = if true, calls to GC.addRange and GC.removeRange will be used
21  *         to ensure that the GC does not accidentally free memory owned by this
22  *         container.
23  */
24 struct OpenHashSet(T, alias hashFunction = generateHash!T, bool supportGC = shouldAddGCRange!T)
25 {
26 	/**
27 	 * Disallow copy construction
28 	 */
29 	this(this) @disable;
30 
31 	/**
32 	 * Initializes the hash set with the given initial capacity.
33 	 *
34 	 * Params:
35 	 *     initialCapacity = the initial capacity for the hash set
36 	 */
37 	this(size_t initialCapacity)
38 	in
39 	{
40 		assert ((initialCapacity & initialCapacity - 1) == 0, "initialCapacity must be a power of 2");
41 	}
42 	body
43 	{
44 		initialize(initialCapacity);
45 	}
46 
47 	~this()
48 	{
49 		foreach (ref node; nodes)
50 			typeid(typeof(node)).destroy(&node);
51 		static if (supportGC)
52 			GC.removeRange(nodes.ptr);
53 		Mallocator.it.deallocate(nodes);
54 	}
55 
56 	/**
57 	 * Removes all items from the hash set.
58 	 */
59 	void clear()
60 	{
61 		if (empty)
62 			return;
63 		foreach (ref node; nodes)
64 		{
65 			typeid(typeof(node)).destroy(&node);
66 			node.used = false;
67 		}
68 		_length = 0;
69 	}
70 
71 	///
72 	bool empty() const pure nothrow @nogc @safe @property
73 	{
74 		return _length == 0;
75 	}
76 
77 	///
78 	size_t length() const pure nothrow @nogc @safe @property
79 	{
80 		return _length;
81 	}
82 
83 	/**
84 	 * Returns:
85 	 *     $(B true) if the hash set contains the given item, false otherwise.
86 	 */
87 	bool contains(T item) const nothrow @safe
88 	{
89 		if (empty)
90 			return false;
91 		immutable size_t hash = hashFunction(item);
92 		size_t index = toIndex(nodes, item, hash);
93 		if (index == size_t.max)
94 			return false;
95 		return nodes[index].hash == hash && nodes[index].data == item;
96 	}
97 
98 	/// ditto
99 	bool opBinaryRight(string op)(T item) inout nothrow if (op == "in")
100 	{
101 		return contains(item);
102 	}
103 
104 	/**
105 	 * Inserts the gien item into the set.
106 	 *
107 	 * Returns:
108 	 *     $(B true) if the item was inserted, false if it was already present.
109 	 */
110 	bool insert(T item)
111 	{
112 		if (nodes.length == 0)
113 			initialize(DEFAULT_INITIAL_CAPACITY);
114 		immutable size_t hash = hashFunction(item);
115 		size_t index = toIndex(nodes, item, hash);
116 		if (index == size_t.max)
117 		{
118 			grow();
119 			index = toIndex(nodes, item, hash);
120 		}
121 		else if (nodes[index].used && nodes[index].hash == hash && nodes[index].data == item)
122 			return false;
123 		nodes[index].used = true;
124 		nodes[index].hash = hash;
125 		nodes[index].data = item;
126 		_length++;
127 		return true;
128 	}
129 
130 	/// ditto
131 	bool opOpAssign(string op)(T item) if (op == "~")
132 	{
133 		return insert(item);
134 	}
135 
136 	/**
137 	 * Params:
138 	 *     item = the item to remove
139 	 * Returns:
140 	 *     $(B true) if the item was removed, $(B false) if it was not present
141 	 */
142 	bool remove(T item)
143 	{
144 		if (empty)
145 			return false;
146 		immutable size_t hash = hashFunction(item);
147 		size_t index = toIndex(nodes, item, hash);
148 		if (index == size_t.max)
149 			return false;
150 		nodes[index].used = false;
151 		destroy(nodes[index].data);
152 		_length--;
153 		return true;
154 	}
155 
156 	/**
157 	 * Returns:
158 	 *     A range over the set.
159 	 */
160 	auto range(this This)() nothrow pure @nogc @safe
161 	{
162 		return Range!(This)(nodes);
163 	}
164 
165 	/// ditto
166 	alias opSlice = range;
167 
168 private:
169 
170 	import containers.internal.storage_type : ContainerStorageType;
171 	import containers.internal.element_type : ContainerElementType;
172 	import std.allocator:Mallocator;
173 	import core.memory : GC;
174 
175 	enum DEFAULT_INITIAL_CAPACITY = 8;
176 
177 	static struct Range(ThisT)
178 	{
179 		ET front()
180 		{
181 			return cast(typeof(return)) nodes[index].data;
182 		}
183 
184 		bool empty() const pure nothrow @safe @nogc @property
185 		{
186 			return index >= nodes.length;
187 		}
188 
189 		void popFront() pure nothrow @safe @nogc
190 		{
191 			index++;
192 			while (index < nodes.length && !nodes[index].used)
193 				index++;
194 		}
195 
196 	private:
197 
198 		alias ET = ContainerElementType!(ThisT, T);
199 
200 		this(const Node[] nodes)
201 		{
202 			this.nodes = nodes;
203 			while (true)
204 			{
205 				if (index >= nodes.length || nodes[index].used)
206 					break;
207 				index++;
208 			}
209 		}
210 
211 		size_t index;
212 		const Node[] nodes;
213 	}
214 
215 	void grow()
216 	{
217 		immutable size_t newCapacity = nodes.length << 1;
218 		Node[] newNodes = (cast (Node*) Mallocator.it.allocate(newCapacity * Node.sizeof))
219 			[0 .. newCapacity];
220 		newNodes[] = Node.init;
221 		static if (supportGC)
222 			GC.addRange(newNodes.ptr, newNodes.length, typeid(typeof(nodes)));
223 		foreach (ref node; nodes)
224 		{
225 			immutable size_t newIndex = toIndex(newNodes, node.data, node.hash);
226 			newNodes[newIndex] = node;
227 		}
228 		static if (supportGC)
229 			GC.removeRange(nodes.ptr);
230 		Mallocator.it.deallocate(nodes);
231 		nodes = newNodes;
232 	}
233 
234 	void initialize(size_t nodeCount)
235 	{
236 		nodes = (cast (Node*) Mallocator.it.allocate(nodeCount * Node.sizeof))
237 			[0 .. nodeCount];
238 		nodes[] = Node.init;
239 		_length = 0;
240 	}
241 
242 	/**
243 	 * Returns:
244 	 *     size_t.max if the
245 	 */
246 	static size_t toIndex(const Node[] n, T item, size_t hash) nothrow @safe
247 	{
248 		immutable size_t bucketMask = (n.length - 1);
249 		immutable size_t index = hash & bucketMask;
250 		size_t i = index;
251 		while (n[i].used && n[i].data != item)
252 		{
253 			i = (i + 1) & bucketMask;
254 			if (i == index)
255 				return size_t.max;
256 		}
257 		return i;
258 	}
259 
260 	Node[] nodes;
261 	size_t _length;
262 
263 	struct Node
264 	{
265 		ContainerStorageType!T data;
266 		bool used;
267 		size_t hash;
268 	}
269 }
270 
271 unittest
272 {
273 	import std.string : format;
274 	import std.algorithm : equal, sort;
275 	import std.range : iota;
276 	import std.array : array;
277 	OpenHashSet!int ints;
278 	assert (ints.empty);
279 	assert (equal(ints[], cast(int[]) []));
280 	ints.clear();
281 	ints.insert(10);
282 	assert (!ints.empty);
283 	assert (ints.length == 1);
284 	assert (equal(ints[], [10]));
285 	assert (ints.contains(10));
286 	ints.clear();
287 	assert (ints.length == 0);
288 	assert (ints.empty);
289 	ints ~= 0;
290 	assert (!ints.empty);
291 	assert (ints.length == 1);
292 	assert (equal(ints[], [0]));
293 	ints.clear();
294 	assert (ints.length == 0);
295 	assert (ints.empty);
296 	foreach (i; 0 .. 100)
297 		ints ~= i;
298 	assert (ints.length == 100, "%d".format(ints.length));
299 	assert (!ints.empty);
300 	foreach (i; 0 .. 100)
301 		assert (i in ints);
302 	assert (equal(ints[].array().sort(), iota(0, 100)));
303 	assert (ints.insert(10) == false);
304 	auto ohs = OpenHashSet!int(8);
305 	assert (!ohs.remove(1000));
306 	assert (ohs.contains(99) == false);
307 	assert (ohs.insert(10) == true);
308 	assert (ohs.insert(10) == false);
309 	foreach (i; 0 .. 7)
310 		ohs.insert(i);
311 	assert (ohs.contains(6));
312 	assert (!ohs.contains(100));
313 	assert (!ohs.remove(9999));
314 	assert (ohs.remove(0));
315 	assert (ohs.remove(1));
316 }