1 /**
2  * Immutable hash set.
3  * Copyright: © 2015 Economic Modeling Specialists, Intl.
4  * Authors: Brian Schott
5  * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6  */
7 
8 module containers.immutablehashset;
9 
10 /**
11  * The immutable hash set is useful for constructing a read-only collection that
12  * supports quickly determining if an element is present.
13  *
14  * Because the set does not support inserting, it only takes up as much memory
15  * as is necessary to contain the elements provided at construction. Memory is
16  * managed my malloc/free.
17  */
18 struct ImmutableHashSet(T, alias hashFunction)
19 {
20 	///
21 	@disable this();
22 	///
23 	@disable this(this);
24 
25 	/**
26 	 * Constructs an immutable hash set from the given values. The values must
27 	 * not have any duplicates.
28 	 */
29 	this(const T[] values) immutable
30 	in
31 	{
32 		import std.algorithm : sort, uniq;
33 		import std.array : array;
34 		assert (values.dup.sort().uniq().array().length == values.length);
35 	}
36 	body
37 	{
38 		empty = values.length == 0;
39 		length = values.length;
40 		if (empty)
41 			return;
42 		immutable float a = (cast(float) values.length) / .75f;
43 		size_t bucketCount = 1;
44 		while (bucketCount <= cast(size_t) a)
45 			bucketCount <<= 1;
46 		Node[][] mutableBuckets = cast(Node[][]) Mallocator.instance.allocate((Node[]).sizeof * bucketCount);
47 		Node[] mutableNodes = cast(Node[]) Mallocator.instance.allocate(Node.sizeof * values.length);
48 
49 		size_t[] lengths = cast(size_t[]) Mallocator.instance.allocate(size_t.sizeof * bucketCount);
50 		lengths[] = 0;
51 		scope(exit) Mallocator.instance.deallocate(lengths);
52 
53 		size_t[] indexes = cast(size_t[]) Mallocator.instance.allocate(size_t.sizeof * values.length);
54 		scope(exit) Mallocator.instance.deallocate(indexes);
55 
56 		size_t[] hashes = cast(size_t[]) Mallocator.instance.allocate(size_t.sizeof * values.length);
57 		scope(exit) Mallocator.instance.deallocate(hashes);
58 
59 		foreach (i, ref value; values)
60 		{
61 			hashes[i] = hashFunction(value);
62 			indexes[i] = hashes[i] & (mutableBuckets.length - 1);
63 			lengths[indexes[i]]++;
64 		}
65 
66 		size_t j = 0;
67 		foreach (i, l; lengths)
68 		{
69 			mutableBuckets[i] = mutableNodes[j .. j + l];
70 			j += l;
71 		}
72 
73 		lengths[] = 0;
74 		foreach (i; 0 .. values.length)
75 		{
76 			immutable l = lengths[indexes[i]];
77 			static if (hasMember!(Node, "hash"))
78 				mutableBuckets[indexes[i]][l].hash = hashes[i];
79 			mutableBuckets[indexes[i]][l].value = values[i];
80 			lengths[indexes[i]]++;
81 		}
82 		buckets = cast(immutable) mutableBuckets;
83 		nodes = cast(immutable) mutableNodes;
84 		static if (shouldAddGCRange!T)
85 		{
86 			GC.addRange(buckets.ptr, buckets.length * (Node[]).sizeof);
87 			foreach (ref b; buckets)
88 				GC.addRange(b.ptr, b.length * Node.sizeof);
89 			GC.addRange(nodes.ptr, nodes.length * Node.sizeof);
90 		}
91 	}
92 
93 	~this()
94 	{
95 		static if (shouldAddGCRange!T)
96 		{
97 			GC.removeRange(buckets.ptr);
98 			foreach (ref b; buckets)
99 				GC.removeRange(b.ptr);
100 			GC.removeRange(nodes.ptr);
101 		}
102 		Mallocator.instance.deallocate(cast(void[]) buckets);
103 		Mallocator.instance.deallocate(cast(void[]) nodes);
104 	}
105 
106 	/**
107 	 * Returns: A GC-allocated array containing the contents of this set.
108 	 */
109 	immutable(T)[] opSlice() immutable @safe
110 	{
111 		if (empty)
112 			return [];
113 		T[] values = new T[](nodes.length);
114 		foreach (i, ref v; values)
115 		{
116 			v = nodes[i].value;
117 		}
118 		return values;
119 	}
120 
121 	/**
122 	 * Returns: true if this set contains the given value.
123 	 */
124 	bool contains(T value) immutable
125 	{
126 		if (empty)
127 			return false;
128 		immutable size_t hash = hashFunction(value);
129 		immutable size_t index = hash & (buckets.length - 1);
130 		if (buckets[index].length == 0)
131 			return false;
132 		foreach (ref node; buckets[index])
133 		{
134 			static if (hasMember!(Node, "hash"))
135 				if (hash != node.hash)
136 					continue;
137 			if (node.value != value)
138 				continue;
139 			return true;
140 		}
141 		return false;
142 	}
143 
144 	/**
145 	 * The number of items in the set.
146 	 */
147 	size_t length;
148 
149 	/**
150 	 * True if the set is empty.
151 	 */
152 	bool empty;
153 
154 private:
155 
156 	import std.experimental.allocator.mallocator : Mallocator;
157 	import std.traits : isBasicType, hasMember;
158 	import containers.internal.node : shouldAddGCRange;
159 	import core.memory : GC;
160 
161 	static struct Node
162 	{
163 		T value;
164 		static if (!isBasicType!T)
165 			size_t hash;
166 	}
167 
168 	Node[][] buckets;
169 	Node[] nodes;
170 }
171 
172 ///
173 unittest
174 {
175 	auto ihs1 = immutable ImmutableHashSet!(int, a => a)([1, 3, 5, 19, 31, 40, 17]);
176 	assert (ihs1.contains(1));
177 	assert (ihs1.contains(3));
178 	assert (ihs1.contains(5));
179 	assert (ihs1.contains(19));
180 	assert (ihs1.contains(31));
181 	assert (ihs1.contains(40));
182 	assert (ihs1.contains(17));
183 	assert (!ihs1.contains(100));
184 	assert (ihs1[].length == 7);
185 
186 	auto ihs2 = immutable ImmutableHashSet!(int, a => a)([]);
187 	assert (ihs2.length == 0);
188 	assert (ihs2.empty);
189 	assert (ihs2[].length == 0);
190 	assert (!ihs2.contains(42));
191 }