1 /**
2  * Open-Addressed Hash Set
3  * Copyright: © 2014 Economic Modeling Specialists, Intl.
4  * Authors: Brian Schott
5  * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6  */
7 module containers.openhashset;
8 
9 import containers.internal.hash : generateHash;
10 import containers.internal.node : shouldAddGCRange;
11 
12 /**
13  * Simple open-addressed hash set. Use this instead of HashSet when the size and
14  * quantity of the data to be inserted is small.
15  *
16  * Params:
17  *     T = the element type of the hash set
18  *     hashFunction = the hash function to use
19  *     supportGC = if true, calls to GC.addRange and GC.removeRange will be used
20  *         to ensure that the GC does not accidentally free memory owned by this
21  *         container.
22  */
23 struct OpenHashSet(T, alias hashFunction = generateHash!T, bool supportGC = shouldAddGCRange!T)
24 {
25 	/**
26 	 * Disallow copy construction
27 	 */
28 	this(this) @disable;
29 
30 	/**
31 	 * Initializes the hash set with the given initial capacity.
32 	 *
33 	 * Params:
34 	 *     initialCapacity = the initial capacity for the hash set
35 	 */
36 	this(size_t initialCapacity)
37 	in
38 	{
39 		assert ((initialCapacity & initialCapacity - 1) == 0, "initialCapacity must be a power of 2");
40 	}
41 	body
42 	{
43 		initialize(initialCapacity);
44 	}
45 
46 	~this()
47 	{
48 		foreach (ref node; nodes)
49 			typeid(typeof(node)).destroy(&node);
50 		static if (supportGC)
51 			GC.removeRange(nodes.ptr);
52 		Mallocator.instance.deallocate(nodes);
53 	}
54 
55 	/**
56 	 * Removes all items from the hash set.
57 	 */
58 	void clear()
59 	{
60 		if (empty)
61 			return;
62 		foreach (ref node; nodes)
63 		{
64 			typeid(typeof(node)).destroy(&node);
65 			node.used = false;
66 		}
67 		_length = 0;
68 	}
69 
70 	///
71 	bool empty() const pure nothrow @nogc @safe @property
72 	{
73 		return _length == 0;
74 	}
75 
76 	///
77 	size_t length() const pure nothrow @nogc @safe @property
78 	{
79 		return _length;
80 	}
81 
82 	/**
83 	 * Returns:
84 	 *     $(B true) if the hash set contains the given item, false otherwise.
85 	 */
86 	bool contains(T item) const nothrow @safe
87 	{
88 		if (empty)
89 			return false;
90 		immutable size_t hash = hashFunction(item);
91 		size_t index = toIndex(nodes, item, hash);
92 		if (index == size_t.max)
93 			return false;
94 		return nodes[index].hash == hash && nodes[index].data == item;
95 	}
96 
97 	/// ditto
98 	bool opBinaryRight(string op)(T item) inout nothrow if (op == "in")
99 	{
100 		return contains(item);
101 	}
102 
103 	/**
104 	 * Inserts the gien item into the set.
105 	 *
106 	 * Returns:
107 	 *     $(B true) if the item was inserted, false if it was already present.
108 	 */
109 	bool insert(T item)
110 	{
111 		if (nodes.length == 0)
112 			initialize(DEFAULT_INITIAL_CAPACITY);
113 		immutable size_t hash = hashFunction(item);
114 		size_t index = toIndex(nodes, item, hash);
115 		if (index == size_t.max)
116 		{
117 			grow();
118 			index = toIndex(nodes, item, hash);
119 		}
120 		else if (nodes[index].used && nodes[index].hash == hash && nodes[index].data == item)
121 			return false;
122 		nodes[index].used = true;
123 		nodes[index].hash = hash;
124 		nodes[index].data = item;
125 		_length++;
126 		return true;
127 	}
128 
129 	/// ditto
130 	bool opOpAssign(string op)(T item) if (op == "~")
131 	{
132 		return insert(item);
133 	}
134 
135 	/**
136 	 * Params:
137 	 *     item = the item to remove
138 	 * Returns:
139 	 *     $(B true) if the item was removed, $(B false) if it was not present
140 	 */
141 	bool remove(T item)
142 	{
143 		if (empty)
144 			return false;
145 		immutable size_t hash = hashFunction(item);
146 		size_t index = toIndex(nodes, item, hash);
147 		if (index == size_t.max)
148 			return false;
149 		nodes[index].used = false;
150 		destroy(nodes[index].data);
151 		_length--;
152 		return true;
153 	}
154 
155 	/**
156 	 * Returns:
157 	 *     A range over the set.
158 	 */
159 	auto range(this This)() nothrow pure @nogc @safe
160 	{
161 		return Range!(This)(nodes);
162 	}
163 
164 	/// ditto
165 	alias opSlice = range;
166 
167 private:
168 
169 	import containers.internal.storage_type : ContainerStorageType;
170 	import containers.internal.element_type : ContainerElementType;
171 	import std.experimental.allocator.mallocator : Mallocator;
172 	import core.memory : GC;
173 
174 	enum DEFAULT_INITIAL_CAPACITY = 8;
175 
176 	static struct Range(ThisT)
177 	{
178 		ET front()
179 		{
180 			return cast(typeof(return)) nodes[index].data;
181 		}
182 
183 		bool empty() const pure nothrow @safe @nogc @property
184 		{
185 			return index >= nodes.length;
186 		}
187 
188 		void popFront() pure nothrow @safe @nogc
189 		{
190 			index++;
191 			while (index < nodes.length && !nodes[index].used)
192 				index++;
193 		}
194 
195 	private:
196 
197 		alias ET = ContainerElementType!(ThisT, T);
198 
199 		this(const Node[] nodes)
200 		{
201 			this.nodes = nodes;
202 			while (true)
203 			{
204 				if (index >= nodes.length || nodes[index].used)
205 					break;
206 				index++;
207 			}
208 		}
209 
210 		size_t index;
211 		const Node[] nodes;
212 	}
213 
214 	void grow()
215 	{
216 		immutable size_t newCapacity = nodes.length << 1;
217 		Node[] newNodes = (cast (Node*) Mallocator.instance.allocate(newCapacity * Node.sizeof))
218 			[0 .. newCapacity];
219 		newNodes[] = Node.init;
220 		static if (supportGC)
221 			GC.addRange(newNodes.ptr, newNodes.length, typeid(typeof(nodes)));
222 		foreach (ref node; nodes)
223 		{
224 			immutable size_t newIndex = toIndex(newNodes, node.data, node.hash);
225 			newNodes[newIndex] = node;
226 		}
227 		static if (supportGC)
228 			GC.removeRange(nodes.ptr);
229 		Mallocator.instance.deallocate(nodes);
230 		nodes = newNodes;
231 	}
232 
233 	void initialize(size_t nodeCount)
234 	{
235 		nodes = (cast (Node*) Mallocator.instance.allocate(nodeCount * Node.sizeof))
236 			[0 .. nodeCount];
237 		nodes[] = Node.init;
238 		_length = 0;
239 	}
240 
241 	/**
242 	 * Returns:
243 	 *     size_t.max if the
244 	 */
245 	static size_t toIndex(const Node[] n, T item, size_t hash) nothrow @safe
246 	{
247 		immutable size_t bucketMask = (n.length - 1);
248 		immutable size_t index = hash & bucketMask;
249 		size_t i = index;
250 		while (n[i].used && n[i].data != item)
251 		{
252 			i = (i + 1) & bucketMask;
253 			if (i == index)
254 				return size_t.max;
255 		}
256 		return i;
257 	}
258 
259 	Node[] nodes;
260 	size_t _length;
261 
262 	struct Node
263 	{
264 		ContainerStorageType!T data;
265 		bool used;
266 		size_t hash;
267 	}
268 }
269 
270 unittest
271 {
272 	import std.string : format;
273 	import std.algorithm : equal, sort;
274 	import std.range : iota;
275 	import std.array : array;
276 	OpenHashSet!int ints;
277 	assert (ints.empty);
278 	assert (equal(ints[], cast(int[]) []));
279 	ints.clear();
280 	ints.insert(10);
281 	assert (!ints.empty);
282 	assert (ints.length == 1);
283 	assert (equal(ints[], [10]));
284 	assert (ints.contains(10));
285 	ints.clear();
286 	assert (ints.length == 0);
287 	assert (ints.empty);
288 	ints ~= 0;
289 	assert (!ints.empty);
290 	assert (ints.length == 1);
291 	assert (equal(ints[], [0]));
292 	ints.clear();
293 	assert (ints.length == 0);
294 	assert (ints.empty);
295 	foreach (i; 0 .. 100)
296 		ints ~= i;
297 	assert (ints.length == 100, "%d".format(ints.length));
298 	assert (!ints.empty);
299 	foreach (i; 0 .. 100)
300 		assert (i in ints);
301 	assert (equal(ints[].array().sort(), iota(0, 100)));
302 	assert (ints.insert(10) == false);
303 	auto ohs = OpenHashSet!int(8);
304 	assert (!ohs.remove(1000));
305 	assert (ohs.contains(99) == false);
306 	assert (ohs.insert(10) == true);
307 	assert (ohs.insert(10) == false);
308 	foreach (i; 0 .. 7)
309 		ohs.insert(i);
310 	assert (ohs.contains(6));
311 	assert (!ohs.contains(100));
312 	assert (!ohs.remove(9999));
313 	assert (ohs.remove(0));
314 	assert (ohs.remove(1));
315 }