1 /** 2 * Open-Addressed Hash Set 3 * Copyright: © 2014 Economic Modeling Specialists, Intl. 4 * Authors: Brian Schott 5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 */ 7 8 module containers.openhashset; 9 10 import containers.internal.hash : generateHash; 11 import containers.internal.node : shouldAddGCRange; 12 13 /** 14 * Simple open-addressed hash set. Use this instead of HashSet when the size and 15 * quantity of the data to be inserted is small. 16 * 17 * Params: 18 * T = the element type of the hash set 19 * hashFunction = the hash function to use 20 * supportGC = if true, calls to GC.addRange and GC.removeRange will be used 21 * to ensure that the GC does not accidentally free memory owned by this 22 * container. 23 */ 24 struct OpenHashSet(T, alias hashFunction = generateHash!T, bool supportGC = shouldAddGCRange!T) 25 { 26 /** 27 * Disallow copy construction 28 */ 29 this(this) @disable; 30 31 /** 32 * Initializes the hash set with the given initial capacity. 33 * 34 * Params: 35 * initialCapacity = the initial capacity for the hash set 36 */ 37 this(size_t initialCapacity) 38 in 39 { 40 assert ((initialCapacity & initialCapacity - 1) == 0, "initialCapacity must be a power of 2"); 41 } 42 body 43 { 44 initialize(initialCapacity); 45 } 46 47 ~this() 48 { 49 foreach (ref node; nodes) 50 typeid(typeof(node)).destroy(&node); 51 static if (supportGC) 52 GC.removeRange(nodes.ptr); 53 Mallocator.it.deallocate(nodes); 54 } 55 56 /** 57 * Removes all items from the hash set. 58 */ 59 void clear() 60 { 61 if (empty) 62 return; 63 foreach (ref node; nodes) 64 { 65 typeid(typeof(node)).destroy(&node); 66 node.used = false; 67 } 68 _length = 0; 69 } 70 71 /// 72 bool empty() const pure nothrow @nogc @safe @property 73 { 74 return _length == 0; 75 } 76 77 /// 78 size_t length() const pure nothrow @nogc @safe @property 79 { 80 return _length; 81 } 82 83 /** 84 * Returns: 85 * $(B true) if the hash set contains the given item, false otherwise. 86 */ 87 bool contains(T item) const nothrow @safe 88 { 89 if (empty) 90 return false; 91 immutable size_t hash = hashFunction(item); 92 size_t index = toIndex(nodes, item, hash); 93 if (index == size_t.max) 94 return false; 95 return nodes[index].hash == hash && nodes[index].data == item; 96 } 97 98 /// ditto 99 bool opBinaryRight(string op)(T item) inout nothrow if (op == "in") 100 { 101 return contains(item); 102 } 103 104 /** 105 * Inserts the gien item into the set. 106 * 107 * Returns: 108 * $(B true) if the item was inserted, false if it was already present. 109 */ 110 bool insert(T item) 111 { 112 if (nodes.length == 0) 113 initialize(DEFAULT_INITIAL_CAPACITY); 114 immutable size_t hash = hashFunction(item); 115 size_t index = toIndex(nodes, item, hash); 116 if (index == size_t.max) 117 { 118 grow(); 119 index = toIndex(nodes, item, hash); 120 } 121 else if (nodes[index].used && nodes[index].hash == hash && nodes[index].data == item) 122 return false; 123 nodes[index].used = true; 124 nodes[index].hash = hash; 125 nodes[index].data = item; 126 _length++; 127 return true; 128 } 129 130 /// ditto 131 bool opOpAssign(string op)(T item) if (op == "~") 132 { 133 return insert(item); 134 } 135 136 /** 137 * Params: 138 * item = the item to remove 139 * Returns: 140 * $(B true) if the item was removed, $(B false) if it was not present 141 */ 142 bool remove(T item) 143 { 144 if (empty) 145 return false; 146 immutable size_t hash = hashFunction(item); 147 size_t index = toIndex(nodes, item, hash); 148 if (index == size_t.max) 149 return false; 150 nodes[index].used = false; 151 destroy(nodes[index].data); 152 _length--; 153 return true; 154 } 155 156 /** 157 * Returns: 158 * A range over the set. 159 */ 160 auto range(this This)() nothrow pure @nogc @safe 161 { 162 return Range!(This)(nodes); 163 } 164 165 /// ditto 166 alias opSlice = range; 167 168 private: 169 170 import containers.internal.storage_type : ContainerStorageType; 171 import containers.internal.element_type : ContainerElementType; 172 import std.allocator:Mallocator; 173 import core.memory : GC; 174 175 enum DEFAULT_INITIAL_CAPACITY = 8; 176 177 static struct Range(ThisT) 178 { 179 ET front() 180 { 181 return cast(typeof(return)) nodes[index].data; 182 } 183 184 bool empty() const pure nothrow @safe @nogc @property 185 { 186 return index >= nodes.length; 187 } 188 189 void popFront() pure nothrow @safe @nogc 190 { 191 index++; 192 while (index < nodes.length && !nodes[index].used) 193 index++; 194 } 195 196 private: 197 198 alias ET = ContainerElementType!(ThisT, T); 199 200 this(const Node[] nodes) 201 { 202 this.nodes = nodes; 203 while (true) 204 { 205 if (index >= nodes.length || nodes[index].used) 206 break; 207 index++; 208 } 209 } 210 211 size_t index; 212 const Node[] nodes; 213 } 214 215 void grow() 216 { 217 immutable size_t newCapacity = nodes.length << 1; 218 Node[] newNodes = (cast (Node*) Mallocator.it.allocate(newCapacity * Node.sizeof)) 219 [0 .. newCapacity]; 220 newNodes[] = Node.init; 221 static if (supportGC) 222 GC.addRange(newNodes.ptr, newNodes.length, typeid(typeof(nodes))); 223 foreach (ref node; nodes) 224 { 225 immutable size_t newIndex = toIndex(newNodes, node.data, node.hash); 226 newNodes[newIndex] = node; 227 } 228 static if (supportGC) 229 GC.removeRange(nodes.ptr); 230 Mallocator.it.deallocate(nodes); 231 nodes = newNodes; 232 } 233 234 void initialize(size_t nodeCount) 235 { 236 nodes = (cast (Node*) Mallocator.it.allocate(nodeCount * Node.sizeof)) 237 [0 .. nodeCount]; 238 nodes[] = Node.init; 239 _length = 0; 240 } 241 242 /** 243 * Returns: 244 * size_t.max if the 245 */ 246 static size_t toIndex(const Node[] n, T item, size_t hash) nothrow @safe 247 { 248 immutable size_t bucketMask = (n.length - 1); 249 immutable size_t index = hash & bucketMask; 250 size_t i = index; 251 while (n[i].used && n[i].data != item) 252 { 253 i = (i + 1) & bucketMask; 254 if (i == index) 255 return size_t.max; 256 } 257 return i; 258 } 259 260 Node[] nodes; 261 size_t _length; 262 263 struct Node 264 { 265 ContainerStorageType!T data; 266 bool used; 267 size_t hash; 268 } 269 } 270 271 unittest 272 { 273 import std.string : format; 274 import std.algorithm : equal, sort; 275 import std.range : iota; 276 import std.array : array; 277 OpenHashSet!int ints; 278 assert (ints.empty); 279 assert (equal(ints[], cast(int[]) [])); 280 ints.clear(); 281 ints.insert(10); 282 assert (!ints.empty); 283 assert (ints.length == 1); 284 assert (equal(ints[], [10])); 285 assert (ints.contains(10)); 286 ints.clear(); 287 assert (ints.length == 0); 288 assert (ints.empty); 289 ints ~= 0; 290 assert (!ints.empty); 291 assert (ints.length == 1); 292 assert (equal(ints[], [0])); 293 ints.clear(); 294 assert (ints.length == 0); 295 assert (ints.empty); 296 foreach (i; 0 .. 100) 297 ints ~= i; 298 assert (ints.length == 100, "%d".format(ints.length)); 299 assert (!ints.empty); 300 foreach (i; 0 .. 100) 301 assert (i in ints); 302 assert (equal(ints[].array().sort(), iota(0, 100))); 303 assert (ints.insert(10) == false); 304 auto ohs = OpenHashSet!int(8); 305 assert (!ohs.remove(1000)); 306 assert (ohs.contains(99) == false); 307 assert (ohs.insert(10) == true); 308 assert (ohs.insert(10) == false); 309 foreach (i; 0 .. 7) 310 ohs.insert(i); 311 assert (ohs.contains(6)); 312 assert (!ohs.contains(100)); 313 assert (!ohs.remove(9999)); 314 assert (ohs.remove(0)); 315 assert (ohs.remove(1)); 316 }