1 /** 2 * Hash Set 3 * Copyright: © 2014 Economic Modeling Specialists, Intl. 4 * Authors: Brian Schott 5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 */ 7 8 module containers.hashset; 9 10 import containers.internal.hash : generateHash; 11 import containers.internal.node : shouldAddGCRange; 12 13 /** 14 * Hash Set. 15 * Params: 16 * T = the element type 17 * hashFunction = the hash function to use on the elements 18 */ 19 struct HashSet(T, alias hashFunction = generateHash!T, bool supportGC = shouldAddGCRange!T) 20 { 21 this(this) @disable; 22 23 /** 24 * Constructs a HashSet with an initial bucket count of bucketCount. 25 * bucketCount must be a power of two. 26 */ 27 this(size_t bucketCount) 28 in 29 { 30 assert ((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two"); 31 } 32 body 33 { 34 initialize(bucketCount); 35 } 36 37 ~this() 38 { 39 import std.allocator : Mallocator, deallocate; 40 foreach (ref bucket; buckets) 41 typeid(typeof(bucket)).destroy(&bucket); 42 static if (supportGC && shouldAddGCRange!T) 43 GC.removeRange(buckets.ptr); 44 Mallocator.it.deallocate(buckets); 45 } 46 47 /** 48 * Removes all items from the set 49 */ 50 void clear() 51 { 52 foreach (ref bucket; buckets) 53 destroy(bucket); 54 _length = 0; 55 } 56 57 /** 58 * Removes the given item from the set. 59 * Returns: false if the value was not present 60 */ 61 bool remove(T value) 62 { 63 hash_t hash = generateHash(value); 64 size_t index = hashToIndex(hash); 65 static if (storeHash) 66 immutable removed = buckets[index].remove(Node(hash, value)); 67 else 68 immutable removed = buckets[index].remove(Node(value)); 69 if (removed) 70 --_length; 71 return removed; 72 } 73 74 /** 75 * Returns: true if value is contained in the set. 76 */ 77 bool contains(T value) inout nothrow 78 { 79 return (value in this) !is null; 80 } 81 82 /** 83 * Supports $(B a in b) syntax 84 */ 85 inout(T)* opBinaryRight(string op)(T value) inout nothrow if (op == "in") 86 { 87 if (buckets.length == 0 || _length == 0) 88 return null; 89 hash_t hash = generateHash(value); 90 size_t index = hashToIndex(hash); 91 return buckets[index].get(value, hash); 92 } 93 94 /** 95 * Inserts the given item into the set. 96 * Params: value = the value to insert 97 * Returns: true if the value was actually inserted, or false if it was 98 * already present. 99 */ 100 bool insert(T value) 101 { 102 if (buckets.length == 0) 103 initialize(4); 104 hash_t hash = generateHash(value); 105 size_t index = hashToIndex(hash); 106 static if (storeHash) 107 auto r = buckets[index].insert(Node(hash, value)); 108 else 109 auto r = buckets[index].insert(Node(value)); 110 if (r) 111 ++_length; 112 return r; 113 } 114 115 /// ditto 116 alias put = insert; 117 118 /** 119 * Returns: true if the set has no items 120 */ 121 bool empty() const nothrow pure @nogc @safe @property 122 { 123 return _length == 0; 124 } 125 126 /** 127 * Returns: the number of items in the set 128 */ 129 size_t length() const nothrow pure @nogc @safe @property 130 { 131 return _length; 132 } 133 134 /** 135 * Forward range interface 136 */ 137 auto range(this This)() nothrow @nogc @trusted @property 138 { 139 return Range!(This)(&this); 140 } 141 142 /// ditto 143 alias opSlice = range; 144 145 private: 146 147 import containers.internal.node : shouldAddGCRange; 148 import containers.internal.storage_type : ContainerStorageType; 149 import containers.internal.element_type : ContainerElementType; 150 import containers.unrolledlist : UnrolledList; 151 import core.memory : GC; 152 import std.allocator : allocate, deallocate, Mallocator; 153 import std.conv : emplace; 154 import std.traits : isBasicType; 155 156 enum bool storeHash = !isBasicType!T; 157 158 void initialize(size_t bucketCount) 159 { 160 buckets = cast(Bucket[]) Mallocator.it.allocate(bucketCount * Bucket.sizeof); 161 assert (buckets.length == bucketCount); 162 foreach (ref bucket; buckets) 163 emplace(&bucket); 164 static if (supportGC && shouldAddGCRange!T) 165 GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof); 166 } 167 168 static struct Range(ThisT) 169 { 170 this(ThisT* t) 171 { 172 foreach (i, ref bucket; t.buckets) 173 { 174 bucketIndex = i; 175 if (bucket.root !is null) 176 { 177 currentNode = cast(Bucket.BucketNode*) bucket.root; 178 break; 179 } 180 } 181 this.t = t; 182 } 183 184 bool empty() const nothrow @safe @nogc @property 185 { 186 return currentNode is null; 187 } 188 189 ET front() nothrow @safe @nogc @property 190 { 191 return cast(ET) currentNode.items[nodeIndex].value; 192 } 193 194 void popFront() nothrow @trusted @nogc 195 { 196 if (nodeIndex + 1 < currentNode.l) 197 { 198 ++nodeIndex; 199 return; 200 } 201 else 202 { 203 if (currentNode.next is null) 204 { 205 ++bucketIndex; 206 while (bucketIndex < t.buckets.length && t.buckets[bucketIndex].root is null) 207 ++bucketIndex; 208 nodeIndex = 0; 209 if (bucketIndex < t.buckets.length) 210 currentNode = cast(Bucket.BucketNode*) t.buckets[bucketIndex].root; 211 else 212 currentNode = null; 213 } 214 else 215 currentNode = currentNode.next; 216 } 217 } 218 219 private: 220 alias ET = ContainerElementType!(ThisT, T); 221 ThisT* t; 222 Bucket.BucketNode* currentNode; 223 size_t bucketIndex; 224 size_t nodeIndex; 225 } 226 227 bool shouldRehash() const pure nothrow @safe 228 { 229 return (cast(float) _length / cast(float) buckets.length) > 0.75; 230 } 231 232 void rehash() @trusted 233 { 234 immutable size_t newLength = buckets.length << 1; 235 immutable size_t newSize = newLength * Bucket.sizeof; 236 Bucket[] oldBuckets = buckets; 237 buckets = cast(Bucket[]) Mallocator.it.allocate(newSize); 238 assert (buckets); 239 assert (buckets.length == newLength); 240 foreach (ref bucket; buckets) 241 emplace(&bucket); 242 static if (supportGC && shouldAddGCRange!T) 243 GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof); 244 foreach (ref const bucket; oldBuckets) 245 { 246 for (Bucket.BucketNode* node = cast(Bucket.BucketNode*) bucket.root; node !is null; node = node.next) 247 { 248 for (size_t i = 0; i < node.l; ++i) 249 { 250 static if (storeHash) 251 { 252 immutable size_t index = hashToIndex(node.items[i].hash); 253 buckets[index].insert(Node(node.items[i].hash, node.items[i].value)); 254 } 255 else 256 { 257 immutable size_t hash = generateHash(node.items[i].value); 258 immutable size_t index = hashToIndex(hash); 259 buckets[index].insert(Node(node.items[i].value)); 260 } 261 } 262 } 263 } 264 foreach (ref bucket; oldBuckets) 265 typeid(Bucket).destroy(&bucket); 266 static if (supportGC && shouldAddGCRange!T) 267 GC.removeRange(oldBuckets.ptr); 268 Mallocator.it.deallocate(oldBuckets); 269 } 270 271 size_t hashToIndex(hash_t hash) const pure nothrow @safe 272 in 273 { 274 assert (buckets.length > 0); 275 } 276 out (result) 277 { 278 import std.string : format; 279 assert (result < buckets.length, "%d, %d".format(result, buckets.length)); 280 } 281 body 282 { 283 return hash & (buckets.length - 1); 284 } 285 286 static struct Bucket 287 { 288 ~this() 289 { 290 BucketNode* current = root; 291 BucketNode* previous; 292 while (true) 293 { 294 if (previous !is null) 295 { 296 typeid(BucketNode).destroy(&previous); 297 deallocate(Mallocator.it, previous); 298 } 299 previous = current; 300 if (current is null) 301 break; 302 current = current.next; 303 } 304 } 305 306 static struct BucketNode 307 { 308 ContainerStorageType!(T)* get(Node n) 309 { 310 for (size_t i = 0; i < l; ++i) 311 { 312 static if (storeHash) 313 { 314 if (items[i].hash == n.hash && items[i].value == n.value) 315 return &items[i].value; 316 } 317 else 318 { 319 if (items[i].value == n.value) 320 return &items[i].value; 321 } 322 } 323 return null; 324 } 325 326 void insert(Node n) 327 { 328 items[l] = n; 329 ++l; 330 } 331 332 bool remove(Node n) 333 { 334 import std.algorithm : SwapStrategy, remove; 335 336 foreach (size_t i, ref node; items) 337 { 338 static if (storeHash) 339 immutable bool matches = node.hash == n.hash && node.value == n.value; 340 else 341 immutable bool matches = node.value == n.value; 342 if (matches) 343 { 344 items[].remove!(SwapStrategy.unstable)(i); 345 l--; 346 return true; 347 } 348 } 349 return false; 350 } 351 352 import containers.internal.node : fatNodeCapacity; 353 354 BucketNode* next; 355 size_t l; 356 Node[fatNodeCapacity!(Node.sizeof, 1, size_t, 128)] items; 357 } 358 359 bool insert(Node n) 360 { 361 BucketNode* prev; 362 BucketNode* current; 363 for (current = root; current !is null; prev = current) 364 { 365 if (current.l >= current.items.length) 366 { 367 current = current.next; 368 continue; 369 } 370 if (current.get(n)) 371 return false; 372 current.insert(n); 373 return true; 374 } 375 BucketNode* newNode = cast(BucketNode*) Mallocator.it.allocate(BucketNode.sizeof); 376 *newNode = BucketNode.init; 377 newNode.insert(n); 378 if (prev is null) 379 root = newNode; 380 else 381 prev.next = newNode; 382 return true; 383 } 384 385 bool remove(Node n) 386 { 387 import std.allocator : deallocate; 388 389 BucketNode* current = root; 390 BucketNode* previous; 391 while (current !is null) 392 { 393 immutable removed = current.remove(n); 394 if (removed) 395 { 396 if (current.l == 0) 397 { 398 if (previous !is null) 399 previous.next = current.next; 400 else 401 root = null; 402 typeid(BucketNode).destroy(¤t); 403 deallocate(Mallocator.it, current); 404 } 405 return true; 406 } 407 previous = current; 408 current = current.next; 409 } 410 return false; 411 } 412 413 inout(T)* get(T value, size_t hash) inout 414 { 415 for (BucketNode* current = cast(BucketNode*) root; current !is null; current = current.next) 416 { 417 static if (storeHash) 418 auto v = current.get(Node(hash, value)); 419 else 420 auto v = current.get(Node(value)); 421 if (v !is null) 422 return cast(typeof(return)) v; 423 } 424 return null; 425 } 426 427 BucketNode* root; 428 } 429 430 struct Node 431 { 432 bool opEquals(ref const T v) const 433 { 434 return v == value; 435 } 436 437 bool opEquals(ref const Node other) const 438 { 439 static if (storeHash) 440 if (other.hash != hash) 441 return false; 442 return other.value == value; 443 } 444 445 static if (storeHash) 446 hash_t hash; 447 ContainerStorageType!T value; 448 } 449 450 Bucket[] buckets; 451 size_t _length; 452 } 453 454 /// 455 unittest 456 { 457 import std.array : array; 458 import std.algorithm : canFind; 459 import std.uuid : randomUUID; 460 461 auto s = HashSet!string(16); 462 assert(!s.contains("nonsense")); 463 assert(s.put("test")); 464 assert(s.contains("test")); 465 assert(!s.put("test")); 466 assert(s.contains("test")); 467 assert(s.length == 1); 468 assert(!s.contains("nothere")); 469 s.put("a"); 470 s.put("b"); 471 s.put("c"); 472 s.put("d"); 473 string[] strings = s.range.array; 474 assert(strings.canFind("a")); 475 assert(strings.canFind("b")); 476 assert(strings.canFind("c")); 477 assert(strings.canFind("d")); 478 assert(strings.canFind("test")); 479 assert(*("a" in s) == "a"); 480 assert(*("b" in s) == "b"); 481 assert(*("c" in s) == "c"); 482 assert(*("d" in s) == "d"); 483 assert(*("test" in s) == "test"); 484 assert(strings.length == 5); 485 assert(s.remove("test")); 486 assert(s.length == 4); 487 s.clear(); 488 assert(s.length == 0); 489 assert(s.empty); 490 s.put("abcde"); 491 assert(s.length == 1); 492 foreach (i; 0 .. 10_000) 493 { 494 s.put(randomUUID().toString); 495 } 496 assert(s.length == 10_001); 497 498 // Make sure that there's no range violation slicing an empty set 499 HashSet!int e; 500 foreach (i; e[]) 501 assert(i > 0); 502 503 HashSet!int f; 504 foreach (i; 0 .. 100) 505 assert(f.insert(i)); 506 foreach (i; 0 .. 100) 507 assert(f.remove(i)); 508 foreach (i; 0 .. 100) 509 assert(!f.remove(i)); 510 511 HashSet!int g; 512 foreach (i; 0 .. 100) 513 assert(g.insert(i)); 514 }