1 /** 2 * Open-Addressed Hash Set 3 * Copyright: © 2014 Economic Modeling Specialists, Intl. 4 * Authors: Brian Schott 5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 */ 7 module containers.openhashset; 8 9 import containers.internal.hash : generateHash; 10 import containers.internal.node : shouldAddGCRange; 11 12 /** 13 * Simple open-addressed hash set. Use this instead of HashSet when the size and 14 * quantity of the data to be inserted is small. 15 * 16 * Params: 17 * T = the element type of the hash set 18 * hashFunction = the hash function to use 19 * supportGC = if true, calls to GC.addRange and GC.removeRange will be used 20 * to ensure that the GC does not accidentally free memory owned by this 21 * container. 22 */ 23 struct OpenHashSet(T, alias hashFunction = generateHash!T, bool supportGC = shouldAddGCRange!T) 24 { 25 /** 26 * Disallow copy construction 27 */ 28 this(this) @disable; 29 30 /** 31 * Initializes the hash set with the given initial capacity. 32 * 33 * Params: 34 * initialCapacity = the initial capacity for the hash set 35 */ 36 this(size_t initialCapacity) 37 in 38 { 39 assert ((initialCapacity & initialCapacity - 1) == 0, "initialCapacity must be a power of 2"); 40 } 41 body 42 { 43 initialize(initialCapacity); 44 } 45 46 ~this() 47 { 48 foreach (ref node; nodes) 49 typeid(typeof(node)).destroy(&node); 50 static if (supportGC) 51 GC.removeRange(nodes.ptr); 52 Mallocator.instance.deallocate(nodes); 53 } 54 55 /** 56 * Removes all items from the hash set. 57 */ 58 void clear() 59 { 60 if (empty) 61 return; 62 foreach (ref node; nodes) 63 { 64 typeid(typeof(node)).destroy(&node); 65 node.used = false; 66 } 67 _length = 0; 68 } 69 70 /// 71 bool empty() const pure nothrow @nogc @safe @property 72 { 73 return _length == 0; 74 } 75 76 /// 77 size_t length() const pure nothrow @nogc @safe @property 78 { 79 return _length; 80 } 81 82 /** 83 * Returns: 84 * $(B true) if the hash set contains the given item, false otherwise. 85 */ 86 bool contains(T item) const nothrow @safe 87 { 88 if (empty) 89 return false; 90 immutable size_t hash = hashFunction(item); 91 size_t index = toIndex(nodes, item, hash); 92 if (index == size_t.max) 93 return false; 94 return nodes[index].hash == hash && nodes[index].data == item; 95 } 96 97 /// ditto 98 bool opBinaryRight(string op)(T item) inout nothrow if (op == "in") 99 { 100 return contains(item); 101 } 102 103 /** 104 * Inserts the gien item into the set. 105 * 106 * Returns: 107 * $(B true) if the item was inserted, false if it was already present. 108 */ 109 bool insert(T item) 110 { 111 if (nodes.length == 0) 112 initialize(DEFAULT_INITIAL_CAPACITY); 113 immutable size_t hash = hashFunction(item); 114 size_t index = toIndex(nodes, item, hash); 115 if (index == size_t.max) 116 { 117 grow(); 118 index = toIndex(nodes, item, hash); 119 } 120 else if (nodes[index].used && nodes[index].hash == hash && nodes[index].data == item) 121 return false; 122 nodes[index].used = true; 123 nodes[index].hash = hash; 124 nodes[index].data = item; 125 _length++; 126 return true; 127 } 128 129 /// ditto 130 bool opOpAssign(string op)(T item) if (op == "~") 131 { 132 return insert(item); 133 } 134 135 /** 136 * Params: 137 * item = the item to remove 138 * Returns: 139 * $(B true) if the item was removed, $(B false) if it was not present 140 */ 141 bool remove(T item) 142 { 143 if (empty) 144 return false; 145 immutable size_t hash = hashFunction(item); 146 size_t index = toIndex(nodes, item, hash); 147 if (index == size_t.max) 148 return false; 149 nodes[index].used = false; 150 destroy(nodes[index].data); 151 _length--; 152 return true; 153 } 154 155 /** 156 * Returns: 157 * A range over the set. 158 */ 159 auto range(this This)() nothrow pure @nogc @safe 160 { 161 return Range!(This)(nodes); 162 } 163 164 /// ditto 165 alias opSlice = range; 166 167 private: 168 169 import containers.internal.storage_type : ContainerStorageType; 170 import containers.internal.element_type : ContainerElementType; 171 import std.experimental.allocator.mallocator : Mallocator; 172 import core.memory : GC; 173 174 enum DEFAULT_INITIAL_CAPACITY = 8; 175 176 static struct Range(ThisT) 177 { 178 ET front() 179 { 180 return cast(typeof(return)) nodes[index].data; 181 } 182 183 bool empty() const pure nothrow @safe @nogc @property 184 { 185 return index >= nodes.length; 186 } 187 188 void popFront() pure nothrow @safe @nogc 189 { 190 index++; 191 while (index < nodes.length && !nodes[index].used) 192 index++; 193 } 194 195 private: 196 197 alias ET = ContainerElementType!(ThisT, T); 198 199 this(const Node[] nodes) 200 { 201 this.nodes = nodes; 202 while (true) 203 { 204 if (index >= nodes.length || nodes[index].used) 205 break; 206 index++; 207 } 208 } 209 210 size_t index; 211 const Node[] nodes; 212 } 213 214 void grow() 215 { 216 immutable size_t newCapacity = nodes.length << 1; 217 Node[] newNodes = (cast (Node*) Mallocator.instance.allocate(newCapacity * Node.sizeof)) 218 [0 .. newCapacity]; 219 newNodes[] = Node.init; 220 static if (supportGC) 221 GC.addRange(newNodes.ptr, newNodes.length, typeid(typeof(nodes))); 222 foreach (ref node; nodes) 223 { 224 immutable size_t newIndex = toIndex(newNodes, node.data, node.hash); 225 newNodes[newIndex] = node; 226 } 227 static if (supportGC) 228 GC.removeRange(nodes.ptr); 229 Mallocator.instance.deallocate(nodes); 230 nodes = newNodes; 231 } 232 233 void initialize(size_t nodeCount) 234 { 235 nodes = (cast (Node*) Mallocator.instance.allocate(nodeCount * Node.sizeof)) 236 [0 .. nodeCount]; 237 nodes[] = Node.init; 238 _length = 0; 239 } 240 241 /** 242 * Returns: 243 * size_t.max if the 244 */ 245 static size_t toIndex(const Node[] n, T item, size_t hash) nothrow @safe 246 { 247 immutable size_t bucketMask = (n.length - 1); 248 immutable size_t index = hash & bucketMask; 249 size_t i = index; 250 while (n[i].used && n[i].data != item) 251 { 252 i = (i + 1) & bucketMask; 253 if (i == index) 254 return size_t.max; 255 } 256 return i; 257 } 258 259 Node[] nodes; 260 size_t _length; 261 262 struct Node 263 { 264 ContainerStorageType!T data; 265 bool used; 266 size_t hash; 267 } 268 } 269 270 unittest 271 { 272 import std.string : format; 273 import std.algorithm : equal, sort; 274 import std.range : iota; 275 import std.array : array; 276 OpenHashSet!int ints; 277 assert (ints.empty); 278 assert (equal(ints[], cast(int[]) [])); 279 ints.clear(); 280 ints.insert(10); 281 assert (!ints.empty); 282 assert (ints.length == 1); 283 assert (equal(ints[], [10])); 284 assert (ints.contains(10)); 285 ints.clear(); 286 assert (ints.length == 0); 287 assert (ints.empty); 288 ints ~= 0; 289 assert (!ints.empty); 290 assert (ints.length == 1); 291 assert (equal(ints[], [0])); 292 ints.clear(); 293 assert (ints.length == 0); 294 assert (ints.empty); 295 foreach (i; 0 .. 100) 296 ints ~= i; 297 assert (ints.length == 100, "%d".format(ints.length)); 298 assert (!ints.empty); 299 foreach (i; 0 .. 100) 300 assert (i in ints); 301 assert (equal(ints[].array().sort(), iota(0, 100))); 302 assert (ints.insert(10) == false); 303 auto ohs = OpenHashSet!int(8); 304 assert (!ohs.remove(1000)); 305 assert (ohs.contains(99) == false); 306 assert (ohs.insert(10) == true); 307 assert (ohs.insert(10) == false); 308 foreach (i; 0 .. 7) 309 ohs.insert(i); 310 assert (ohs.contains(6)); 311 assert (!ohs.contains(100)); 312 assert (!ohs.remove(9999)); 313 assert (ohs.remove(0)); 314 assert (ohs.remove(1)); 315 }