1 /** 2 * Hash Set 3 * Copyright: © 2015 Economic Modeling Specialists, Intl. 4 * Authors: Brian Schott 5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 */ 7 8 module containers.hashset; 9 10 private import containers.internal.hash : generateHash; 11 private import containers.internal.node : shouldAddGCRange; 12 private import std.experimental.allocator.mallocator : Mallocator; 13 14 /** 15 * Hash Set. 16 * Params: 17 * T = the element type 18 * Allocator = the allocator to use. Defaults to `Mallocator`. 19 * hashFunction = the hash function to use on the elements 20 * supportGC = true if the container should support holding references to 21 * GC-allocated memory. 22 */ 23 struct HashSet(T, Allocator = Mallocator, alias hashFunction = generateHash!T, 24 bool supportGC = shouldAddGCRange!T) 25 { 26 this(this) @disable; 27 28 private import std.experimental.allocator.common : stateSize; 29 30 static if (stateSize!Allocator != 0) 31 { 32 this() @disable; 33 34 /** 35 * Use the given `allocator` for allocations. 36 */ 37 this(Allocator allocator) 38 in 39 { 40 assert(allocator !is null, "Allocator must not be null"); 41 } 42 body 43 { 44 this.allocator = allocator; 45 } 46 47 /** 48 * Constructs a HashSet with an initial bucket count of bucketCount. 49 * bucketCount must be a power of two. 50 */ 51 this(size_t bucketCount, Allocator allocator) 52 in 53 { 54 assert(allocator !is null, "Allocator must not be null"); 55 assert ((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two"); 56 } 57 body 58 { 59 initialize(bucketCount); 60 } 61 } 62 else 63 { 64 /** 65 * Constructs a HashSet with an initial bucket count of bucketCount. 66 * bucketCount must be a power of two. 67 */ 68 this(size_t bucketCount) 69 in 70 { 71 assert ((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two"); 72 } 73 body 74 { 75 initialize(bucketCount); 76 } 77 78 } 79 80 ~this() 81 { 82 import std.experimental.allocator : dispose; 83 import core.memory : GC; 84 static if (useGC) 85 GC.removeRange(buckets.ptr); 86 allocator.dispose(buckets); 87 } 88 89 /** 90 * Removes all items from the set 91 */ 92 void clear() 93 { 94 foreach (ref bucket; buckets) 95 { 96 destroy(bucket); 97 bucket = Bucket.init; 98 } 99 _length = 0; 100 } 101 102 /** 103 * Removes the given item from the set. 104 * Returns: false if the value was not present 105 */ 106 bool remove(T value) 107 { 108 hash_t hash = hashFunction(value); 109 size_t index = hashToIndex(hash); 110 static if (storeHash) 111 immutable bool removed = buckets[index].remove(ItemNode(hash, value)); 112 else 113 immutable bool removed = buckets[index].remove(ItemNode(value)); 114 if (removed) 115 --_length; 116 return removed; 117 } 118 119 /** 120 * Returns: true if value is contained in the set. 121 */ 122 bool contains(T value) inout nothrow 123 { 124 return (value in this) !is null; 125 } 126 127 /** 128 * Supports $(B a in b) syntax 129 */ 130 inout(T)* opBinaryRight(string op)(T value) inout nothrow if (op == "in") 131 { 132 if (buckets.length == 0 || _length == 0) 133 return null; 134 hash_t hash = hashFunction(value); 135 size_t index = hashToIndex(hash); 136 return buckets[index].get(value, hash); 137 } 138 139 /** 140 * Inserts the given item into the set. 141 * Params: value = the value to insert 142 * Returns: true if the value was actually inserted, or false if it was 143 * already present. 144 */ 145 bool insert(T value) 146 { 147 if (buckets.length == 0) 148 initialize(4); 149 hash_t hash = hashFunction(value); 150 size_t index = hashToIndex(hash); 151 static if (storeHash) 152 auto r = buckets[index].insert(ItemNode(hash, value)); 153 else 154 auto r = buckets[index].insert(ItemNode(value)); 155 if (r) 156 ++_length; 157 if (shouldRehash) 158 rehash(); 159 return r; 160 } 161 162 /// ditto 163 alias put = insert; 164 165 /** 166 * Returns: true if the set has no items 167 */ 168 bool empty() const nothrow pure @nogc @safe @property 169 { 170 return _length == 0; 171 } 172 173 /** 174 * Returns: the number of items in the set 175 */ 176 size_t length() const nothrow pure @nogc @safe @property 177 { 178 return _length; 179 } 180 181 /** 182 * Forward range interface 183 */ 184 auto range(this This)() nothrow @nogc @trusted @property 185 { 186 return Range!(This)(&this); 187 } 188 189 /// ditto 190 alias opSlice = range; 191 192 private: 193 194 import containers.internal.node : shouldAddGCRange, fatNodeCapacity; 195 import containers.internal.storage_type : ContainerStorageType; 196 import containers.internal.element_type : ContainerElementType; 197 import containers.internal.mixins : AllocatorState; 198 import containers.unrolledlist : UnrolledList; 199 import std.traits : isBasicType, isPointer; 200 201 enum ITEMS_PER_NODE = fatNodeCapacity!(ItemNode.sizeof, 1, size_t, 128); 202 203 enum bool storeHash = !isBasicType!T; 204 enum bool useGC = supportGC && shouldAddGCRange!T; 205 206 void initialize(size_t bucketCount) 207 { 208 import std.experimental.allocator : makeArray; 209 import core.memory : GC; 210 211 makeBuckets(bucketCount); 212 static if (useGC) 213 GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof); 214 } 215 216 static struct Range(ThisT) 217 { 218 this(ThisT* t) 219 { 220 foreach (i, ref bucket; t.buckets) 221 { 222 bucketIndex = i; 223 if (bucket.root !is null) 224 { 225 currentNode = cast(Bucket.BucketNode*) bucket.root; 226 break; 227 } 228 } 229 this.t = t; 230 } 231 232 bool empty() const nothrow @safe @nogc @property 233 { 234 return currentNode is null; 235 } 236 237 ET front() nothrow @safe @nogc @property 238 { 239 return cast(ET) currentNode.items[nodeIndex].value; 240 } 241 242 void popFront() nothrow @trusted @nogc 243 { 244 if (nodeIndex + 1 < currentNode.l) 245 { 246 ++nodeIndex; 247 return; 248 } 249 else 250 { 251 nodeIndex = 0; 252 if (currentNode.next is null) 253 { 254 ++bucketIndex; 255 while (bucketIndex < t.buckets.length && t.buckets[bucketIndex].root is null) 256 ++bucketIndex; 257 if (bucketIndex < t.buckets.length) 258 currentNode = cast(Bucket.BucketNode*) t.buckets[bucketIndex].root; 259 else 260 currentNode = null; 261 } 262 else 263 { 264 currentNode = currentNode.next; 265 assert(currentNode.l > 0); 266 } 267 } 268 } 269 270 private: 271 alias ET = ContainerElementType!(ThisT, T); 272 ThisT* t; 273 Bucket.BucketNode* currentNode; 274 size_t bucketIndex; 275 size_t nodeIndex; 276 } 277 278 void makeBuckets(size_t bucketCount) 279 { 280 import std.experimental.allocator : makeArray; 281 282 static if (stateSize!Allocator == 0) 283 buckets = allocator.makeArray!Bucket(bucketCount); 284 else 285 { 286 import std.conv:emplace; 287 288 buckets = cast(Bucket[]) allocator.allocate(Bucket.sizeof * bucketCount); 289 foreach (ref bucket; buckets) 290 emplace!Bucket(&bucket, allocator); 291 } 292 } 293 294 bool shouldRehash() const pure nothrow @safe @nogc 295 { 296 immutable float numberOfNodes = cast(float) _length / cast(float) ITEMS_PER_NODE; 297 return (numberOfNodes / cast(float) buckets.length) > 0.75f; 298 } 299 300 void rehash() @trusted 301 { 302 import std.experimental.allocator : makeArray, dispose; 303 import core.memory : GC; 304 305 immutable size_t newLength = buckets.length << 1; 306 Bucket[] oldBuckets = buckets; 307 makeBuckets(newLength); 308 assert (buckets); 309 assert (buckets.length == newLength); 310 static if (useGC) 311 GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof); 312 foreach (ref const bucket; oldBuckets) 313 { 314 for (Bucket.BucketNode* node = cast(Bucket.BucketNode*) bucket.root; node !is null; node = node.next) 315 { 316 for (size_t i = 0; i < node.l; ++i) 317 { 318 static if (storeHash) 319 { 320 immutable size_t hash = node.items[i].hash; 321 immutable size_t index = hashToIndex(hash); 322 buckets[index].insert(ItemNode(hash, node.items[i].value)); 323 } 324 else 325 { 326 immutable size_t hash = hashFunction(node.items[i].value); 327 immutable size_t index = hashToIndex(hash); 328 buckets[index].insert(ItemNode(node.items[i].value)); 329 } 330 } 331 } 332 } 333 static if (useGC) 334 GC.removeRange(oldBuckets.ptr); 335 allocator.dispose(oldBuckets); 336 } 337 338 size_t hashToIndex(hash_t hash) const pure nothrow @safe 339 in 340 { 341 assert (buckets.length > 0); 342 } 343 out (result) 344 { 345 import std..string : format; 346 assert (result < buckets.length, "%d, %d".format(result, buckets.length)); 347 } 348 body 349 { 350 return hash & (buckets.length - 1); 351 } 352 353 static struct Bucket 354 { 355 this(this) @disable; 356 357 static if (stateSize!Allocator != 0) 358 { 359 this(Allocator allocator) 360 { 361 this.allocator = allocator; 362 } 363 this() @disable; 364 } 365 366 ~this() 367 { 368 import std.experimental.allocator : dispose; 369 370 BucketNode* current = root; 371 BucketNode* previous; 372 while (true) 373 { 374 if (previous !is null) 375 allocator.dispose(previous); 376 previous = current; 377 if (current is null) 378 break; 379 current = current.next; 380 } 381 } 382 383 static struct BucketNode 384 { 385 ContainerStorageType!(T)* get(ItemNode n) 386 { 387 foreach (ref item; items[0 .. l]) 388 { 389 static if (storeHash) 390 { 391 static if (isPointer!T) 392 { 393 if (item.hash == n.hash && *item.value == *n.value) 394 return &item.value; 395 } 396 else 397 { 398 if (item.hash == n.hash && item.value == n.value) 399 return &item.value; 400 } 401 } 402 else 403 { 404 static if (isPointer!T) 405 { 406 if (*item.value == *n.value) 407 return &item.value; 408 } 409 else 410 { 411 if (item.value == n.value) 412 return &item.value; 413 } 414 } 415 } 416 return null; 417 } 418 419 void insert(ItemNode n) 420 { 421 items[l] = n; 422 ++l; 423 } 424 425 bool remove(ItemNode n) 426 { 427 import std.algorithm : SwapStrategy, remove; 428 429 foreach (size_t i, ref node; items[0 .. l]) 430 { 431 static if (storeHash) 432 { 433 static if (isPointer!T) 434 immutable bool matches = node.hash == n.hash && *node.value == *n.value; 435 else 436 immutable bool matches = node.hash == n.hash && node.value == n.value; 437 } 438 else 439 { 440 static if (isPointer!T) 441 immutable bool matches = *node.value == *n.value; 442 else 443 immutable bool matches = node.value == n.value; 444 } 445 if (matches) 446 { 447 items[0 .. l].remove!(SwapStrategy.unstable)(i); 448 l--; 449 return true; 450 } 451 } 452 return false; 453 } 454 455 BucketNode* next; 456 size_t l; 457 ItemNode[ITEMS_PER_NODE] items; 458 } 459 460 bool insert(ItemNode n) 461 { 462 import std.experimental.allocator : make; 463 464 for (BucketNode* current = root; current !is null; current = current.next) 465 { 466 if (current.get(n) !is null) 467 return false; 468 if (current.l < current.items.length) 469 { 470 current.insert(n); 471 return true; 472 } 473 } 474 BucketNode* newNode = allocator.make!BucketNode(); 475 newNode.insert(n); 476 newNode.next = root; 477 root = newNode; 478 return true; 479 } 480 481 bool remove(ItemNode n) 482 { 483 import std.experimental.allocator : dispose; 484 485 BucketNode* current = root; 486 BucketNode* previous; 487 while (current !is null) 488 { 489 immutable removed = current.remove(n); 490 if (removed) 491 { 492 if (current.l == 0) 493 { 494 if (previous !is null) 495 previous.next = current.next; 496 else 497 root = current.next; 498 allocator.dispose(current); 499 } 500 return true; 501 } 502 previous = current; 503 current = current.next; 504 } 505 return false; 506 } 507 508 inout(T)* get(T value, size_t hash) inout 509 { 510 for (BucketNode* current = cast(BucketNode*) root; current !is null; current = current.next) 511 { 512 static if (storeHash) 513 auto v = current.get(ItemNode(hash, value)); 514 else 515 auto v = current.get(ItemNode(value)); 516 if (v !is null) 517 return cast(typeof(return)) v; 518 } 519 return null; 520 } 521 522 BucketNode* root; 523 mixin AllocatorState!Allocator; 524 } 525 526 static struct ItemNode 527 { 528 bool opEquals(ref const T v) const 529 { 530 static if (isPointer!T) 531 return *v == *value; 532 else 533 return v == value; 534 } 535 536 bool opEquals(ref const ItemNode other) const 537 { 538 static if (storeHash) 539 if (other.hash != hash) 540 return false; 541 static if (isPointer!T) 542 return *other.value == *value; 543 else 544 return other.value == value; 545 } 546 547 static if (storeHash) 548 hash_t hash; 549 ContainerStorageType!T value; 550 } 551 552 mixin AllocatorState!Allocator; 553 Bucket[] buckets; 554 size_t _length; 555 } 556 557 /// 558 unittest 559 { 560 import std.array : array; 561 import std.algorithm : canFind; 562 import std.uuid : randomUUID; 563 564 auto s = HashSet!string(16); 565 assert(!s.contains("nonsense")); 566 assert(s.put("test")); 567 assert(s.contains("test")); 568 assert(!s.put("test")); 569 assert(s.contains("test")); 570 assert(s.length == 1); 571 assert(!s.contains("nothere")); 572 s.put("a"); 573 s.put("b"); 574 s.put("c"); 575 s.put("d"); 576 string[] strings = s.range.array; 577 assert(strings.canFind("a")); 578 assert(strings.canFind("b")); 579 assert(strings.canFind("c")); 580 assert(strings.canFind("d")); 581 assert(strings.canFind("test")); 582 assert(*("a" in s) == "a"); 583 assert(*("b" in s) == "b"); 584 assert(*("c" in s) == "c"); 585 assert(*("d" in s) == "d"); 586 assert(*("test" in s) == "test"); 587 assert(strings.length == 5); 588 assert(s.remove("test")); 589 assert(s.length == 4); 590 s.clear(); 591 assert(s.length == 0); 592 assert(s.empty); 593 s.put("abcde"); 594 assert(s.length == 1); 595 foreach (i; 0 .. 10_000) 596 { 597 s.put(randomUUID().toString); 598 } 599 assert(s.length == 10_001); 600 601 // Make sure that there's no range violation slicing an empty set 602 HashSet!int e; 603 foreach (i; e[]) 604 assert(i > 0); 605 606 enum MAGICAL_NUMBER = 600_000; 607 608 HashSet!int f; 609 foreach (i; 0 .. MAGICAL_NUMBER) 610 assert(f.insert(i)); 611 import std.range:walkLength; 612 assert(f.length == f[].walkLength); 613 foreach (i; 0 .. MAGICAL_NUMBER) 614 assert(i in f); 615 foreach (i; 0 .. MAGICAL_NUMBER) 616 assert(f.remove(i)); 617 foreach (i; 0 .. MAGICAL_NUMBER) 618 assert(!f.remove(i)); 619 620 HashSet!int g; 621 foreach (i; 0 .. MAGICAL_NUMBER) 622 assert(g.insert(i)); 623 624 static struct AStruct 625 { 626 int a; 627 int b; 628 } 629 630 HashSet!(AStruct*, Mallocator, a => a.a) fred; 631 fred.insert(new AStruct(10, 10)); 632 auto h = new AStruct(10, 10); 633 assert(h in fred); 634 }