1 /** 2 * Hash Set 3 * Copyright: © 2014 Economic Modeling Specialists, Intl. 4 * Authors: Brian Schott 5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 */ 7 8 module containers.hashset; 9 10 import containers.internal.hash : generateHash; 11 import containers.internal.node : shouldAddGCRange; 12 13 /** 14 * Hash Set. 15 * Params: 16 * T = the element type 17 * hashFunction = the hash function to use on the elements 18 */ 19 struct HashSet(T, alias hashFunction = generateHash!T, bool supportGC = shouldAddGCRange!T) 20 { 21 this(this) @disable; 22 23 /** 24 * Constructs a HashSet with an initial bucket count of bucketCount. 25 * bucketCount must be a power of two. 26 */ 27 this(size_t bucketCount) 28 in 29 { 30 assert ((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two"); 31 } 32 body 33 { 34 initialize(bucketCount); 35 } 36 37 ~this() 38 { 39 import std.experimental.allocator.mallocator : Mallocator; 40 import std.experimental.allocator : dispose; 41 import core.memory : GC; 42 static if (supportGC && shouldAddGCRange!T) 43 GC.removeRange(buckets.ptr); 44 Mallocator.instance.dispose(buckets); 45 } 46 47 /** 48 * Removes all items from the set 49 */ 50 void clear() 51 { 52 foreach (ref bucket; buckets) 53 { 54 destroy(bucket); 55 bucket = Bucket.init; 56 } 57 _length = 0; 58 } 59 60 /** 61 * Removes the given item from the set. 62 * Returns: false if the value was not present 63 */ 64 bool remove(T value) 65 { 66 hash_t hash = hashFunction(value); 67 size_t index = hashToIndex(hash); 68 static if (storeHash) 69 immutable bool removed = buckets[index].remove(ItemNode(hash, value)); 70 else 71 immutable bool removed = buckets[index].remove(ItemNode(value)); 72 if (removed) 73 --_length; 74 return removed; 75 } 76 77 /** 78 * Returns: true if value is contained in the set. 79 */ 80 bool contains(T value) inout nothrow 81 { 82 return (value in this) !is null; 83 } 84 85 /** 86 * Supports $(B a in b) syntax 87 */ 88 inout(T)* opBinaryRight(string op)(T value) inout nothrow if (op == "in") 89 { 90 if (buckets.length == 0 || _length == 0) 91 return null; 92 hash_t hash = hashFunction(value); 93 size_t index = hashToIndex(hash); 94 return buckets[index].get(value, hash); 95 } 96 97 /** 98 * Inserts the given item into the set. 99 * Params: value = the value to insert 100 * Returns: true if the value was actually inserted, or false if it was 101 * already present. 102 */ 103 bool insert(T value) 104 { 105 if (buckets.length == 0) 106 initialize(4); 107 hash_t hash = hashFunction(value); 108 size_t index = hashToIndex(hash); 109 static if (storeHash) 110 auto r = buckets[index].insert(ItemNode(hash, value)); 111 else 112 auto r = buckets[index].insert(ItemNode(value)); 113 if (r) 114 ++_length; 115 if (shouldRehash) 116 rehash(); 117 return r; 118 } 119 120 /// ditto 121 alias put = insert; 122 123 /** 124 * Returns: true if the set has no items 125 */ 126 bool empty() const nothrow pure @nogc @safe @property 127 { 128 return _length == 0; 129 } 130 131 /** 132 * Returns: the number of items in the set 133 */ 134 size_t length() const nothrow pure @nogc @safe @property 135 { 136 return _length; 137 } 138 139 /** 140 * Forward range interface 141 */ 142 auto range(this This)() nothrow @nogc @trusted @property 143 { 144 return Range!(This)(&this); 145 } 146 147 /// ditto 148 alias opSlice = range; 149 150 private: 151 152 import containers.internal.node : shouldAddGCRange, fatNodeCapacity; 153 import containers.internal.storage_type : ContainerStorageType; 154 import containers.internal.element_type : ContainerElementType; 155 import containers.unrolledlist : UnrolledList; 156 import std.traits : isBasicType, isPointer; 157 158 enum ITEMS_PER_NODE = fatNodeCapacity!(ItemNode.sizeof, 1, size_t, 128); 159 160 enum bool storeHash = !isBasicType!T; 161 162 void initialize(size_t bucketCount) 163 { 164 import std.experimental.allocator : makeArray; 165 import std.experimental.allocator.mallocator : Mallocator; 166 import core.memory : GC; 167 168 buckets = Mallocator.instance.makeArray!Bucket(bucketCount); 169 static if (supportGC && shouldAddGCRange!T) 170 GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof); 171 } 172 173 static struct Range(ThisT) 174 { 175 this(ThisT* t) 176 { 177 foreach (i, ref bucket; t.buckets) 178 { 179 bucketIndex = i; 180 if (bucket.root !is null) 181 { 182 currentNode = cast(Bucket.BucketNode*) bucket.root; 183 break; 184 } 185 } 186 this.t = t; 187 } 188 189 bool empty() const nothrow @safe @nogc @property 190 { 191 return currentNode is null; 192 } 193 194 ET front() nothrow @safe @nogc @property 195 { 196 return cast(ET) currentNode.items[nodeIndex].value; 197 } 198 199 void popFront() nothrow @trusted @nogc 200 { 201 if (nodeIndex + 1 < currentNode.l) 202 { 203 ++nodeIndex; 204 return; 205 } 206 else 207 { 208 nodeIndex = 0; 209 if (currentNode.next is null) 210 { 211 ++bucketIndex; 212 while (bucketIndex < t.buckets.length && t.buckets[bucketIndex].root is null) 213 ++bucketIndex; 214 if (bucketIndex < t.buckets.length) 215 currentNode = cast(Bucket.BucketNode*) t.buckets[bucketIndex].root; 216 else 217 currentNode = null; 218 } 219 else 220 { 221 currentNode = currentNode.next; 222 assert(currentNode.l > 0); 223 } 224 } 225 } 226 227 private: 228 alias ET = ContainerElementType!(ThisT, T); 229 ThisT* t; 230 Bucket.BucketNode* currentNode; 231 size_t bucketIndex; 232 size_t nodeIndex; 233 } 234 235 bool shouldRehash() const pure nothrow @safe @nogc 236 { 237 immutable float numberOfNodes = cast(float) _length / cast(float) ITEMS_PER_NODE; 238 return (numberOfNodes / cast(float) buckets.length) > 0.75f; 239 } 240 241 void rehash() @trusted 242 { 243 import std.experimental.allocator : makeArray, dispose; 244 import std.experimental.allocator.mallocator : Mallocator; 245 import core.memory : GC; 246 247 immutable size_t newLength = buckets.length << 1; 248 Bucket[] oldBuckets = buckets; 249 buckets = Mallocator.instance.makeArray!Bucket(newLength); 250 assert (buckets); 251 assert (buckets.length == newLength); 252 static if (supportGC && shouldAddGCRange!T) 253 GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof); 254 foreach (ref const bucket; oldBuckets) 255 { 256 for (Bucket.BucketNode* node = cast(Bucket.BucketNode*) bucket.root; node !is null; node = node.next) 257 { 258 for (size_t i = 0; i < node.l; ++i) 259 { 260 static if (storeHash) 261 { 262 immutable size_t hash = node.items[i].hash; 263 immutable size_t index = hashToIndex(hash); 264 buckets[index].insert(ItemNode(hash, node.items[i].value)); 265 } 266 else 267 { 268 immutable size_t hash = hashFunction(node.items[i].value); 269 immutable size_t index = hashToIndex(hash); 270 buckets[index].insert(ItemNode(node.items[i].value)); 271 } 272 } 273 } 274 } 275 static if (supportGC && shouldAddGCRange!T) 276 GC.removeRange(oldBuckets.ptr); 277 Mallocator.instance.dispose(oldBuckets); 278 } 279 280 size_t hashToIndex(hash_t hash) const pure nothrow @safe 281 in 282 { 283 assert (buckets.length > 0); 284 } 285 out (result) 286 { 287 import std.string : format; 288 assert (result < buckets.length, "%d, %d".format(result, buckets.length)); 289 } 290 body 291 { 292 return hash & (buckets.length - 1); 293 } 294 295 static struct Bucket 296 { 297 this(this) @disable; 298 299 ~this() 300 { 301 import std.experimental.allocator : dispose; 302 import std.experimental.allocator.mallocator : Mallocator; 303 304 BucketNode* current = root; 305 BucketNode* previous; 306 while (true) 307 { 308 if (previous !is null) 309 Mallocator.instance.dispose(previous); 310 previous = current; 311 if (current is null) 312 break; 313 current = current.next; 314 } 315 } 316 317 static struct BucketNode 318 { 319 ContainerStorageType!(T)* get(ItemNode n) 320 { 321 foreach (ref item; items[0 .. l]) 322 { 323 static if (storeHash) 324 { 325 static if (isPointer!T) 326 { 327 if (item.hash == n.hash && *item.value == *n.value) 328 return &item.value; 329 } 330 else 331 { 332 if (item.hash == n.hash && item.value == n.value) 333 return &item.value; 334 } 335 } 336 else 337 { 338 static if (isPointer!T) 339 { 340 if (*item.value == *n.value) 341 return &item.value; 342 } 343 else 344 { 345 if (item.value == n.value) 346 return &item.value; 347 } 348 } 349 } 350 return null; 351 } 352 353 void insert(ItemNode n) 354 { 355 items[l] = n; 356 ++l; 357 } 358 359 bool remove(ItemNode n) 360 { 361 import std.algorithm : SwapStrategy, remove; 362 363 foreach (size_t i, ref node; items[0 .. l]) 364 { 365 static if (storeHash) 366 { 367 static if (isPointer!T) 368 immutable bool matches = node.hash == n.hash && *node.value == *n.value; 369 else 370 immutable bool matches = node.hash == n.hash && node.value == n.value; 371 } 372 else 373 { 374 static if (isPointer!T) 375 immutable bool matches = *node.value == *n.value; 376 else 377 immutable bool matches = node.value == n.value; 378 } 379 if (matches) 380 { 381 items[0 .. l].remove!(SwapStrategy.unstable)(i); 382 l--; 383 return true; 384 } 385 } 386 return false; 387 } 388 389 BucketNode* next; 390 size_t l; 391 ItemNode[ITEMS_PER_NODE] items; 392 } 393 394 bool insert(ItemNode n) 395 { 396 import std.experimental.allocator : make; 397 import std.experimental.allocator.mallocator : Mallocator; 398 399 for (BucketNode* current = root; current !is null; current = current.next) 400 { 401 if (current.get(n) !is null) 402 return false; 403 if (current.l < current.items.length) 404 { 405 current.insert(n); 406 return true; 407 } 408 } 409 BucketNode* newNode = Mallocator.instance.make!BucketNode(); 410 newNode.insert(n); 411 newNode.next = root; 412 root = newNode; 413 return true; 414 } 415 416 bool remove(ItemNode n) 417 { 418 import std.experimental.allocator : dispose; 419 import std.experimental.allocator.mallocator : Mallocator; 420 421 BucketNode* current = root; 422 BucketNode* previous; 423 while (current !is null) 424 { 425 immutable removed = current.remove(n); 426 if (removed) 427 { 428 if (current.l == 0) 429 { 430 if (previous !is null) 431 previous.next = current.next; 432 else 433 root = current.next; 434 Mallocator.instance.dispose(current); 435 } 436 return true; 437 } 438 previous = current; 439 current = current.next; 440 } 441 return false; 442 } 443 444 inout(T)* get(T value, size_t hash) inout 445 { 446 for (BucketNode* current = cast(BucketNode*) root; current !is null; current = current.next) 447 { 448 static if (storeHash) 449 auto v = current.get(ItemNode(hash, value)); 450 else 451 auto v = current.get(ItemNode(value)); 452 if (v !is null) 453 return cast(typeof(return)) v; 454 } 455 return null; 456 } 457 458 BucketNode* root; 459 } 460 461 static struct ItemNode 462 { 463 bool opEquals(ref const T v) const 464 { 465 static if (isPointer!T) 466 return *v == *value; 467 else 468 return v == value; 469 } 470 471 bool opEquals(ref const ItemNode other) const 472 { 473 static if (storeHash) 474 if (other.hash != hash) 475 return false; 476 static if (isPointer!T) 477 return *other.value == *value; 478 else 479 return other.value == value; 480 } 481 482 static if (storeHash) 483 hash_t hash; 484 ContainerStorageType!T value; 485 } 486 487 Bucket[] buckets; 488 size_t _length; 489 } 490 491 /// 492 unittest 493 { 494 import std.array : array; 495 import std.algorithm : canFind; 496 import std.uuid : randomUUID; 497 498 auto s = HashSet!string(16); 499 assert(!s.contains("nonsense")); 500 assert(s.put("test")); 501 assert(s.contains("test")); 502 assert(!s.put("test")); 503 assert(s.contains("test")); 504 assert(s.length == 1); 505 assert(!s.contains("nothere")); 506 s.put("a"); 507 s.put("b"); 508 s.put("c"); 509 s.put("d"); 510 string[] strings = s.range.array; 511 assert(strings.canFind("a")); 512 assert(strings.canFind("b")); 513 assert(strings.canFind("c")); 514 assert(strings.canFind("d")); 515 assert(strings.canFind("test")); 516 assert(*("a" in s) == "a"); 517 assert(*("b" in s) == "b"); 518 assert(*("c" in s) == "c"); 519 assert(*("d" in s) == "d"); 520 assert(*("test" in s) == "test"); 521 assert(strings.length == 5); 522 assert(s.remove("test")); 523 assert(s.length == 4); 524 s.clear(); 525 assert(s.length == 0); 526 assert(s.empty); 527 s.put("abcde"); 528 assert(s.length == 1); 529 foreach (i; 0 .. 10_000) 530 { 531 s.put(randomUUID().toString); 532 } 533 assert(s.length == 10_001); 534 535 // Make sure that there's no range violation slicing an empty set 536 HashSet!int e; 537 foreach (i; e[]) 538 assert(i > 0); 539 540 enum MAGICAL_NUMBER = 600_000; 541 542 HashSet!int f; 543 foreach (i; 0 .. MAGICAL_NUMBER) 544 assert(f.insert(i)); 545 import std.range:walkLength; 546 assert(f.length == f[].walkLength); 547 foreach (i; 0 .. MAGICAL_NUMBER) 548 assert(i in f); 549 foreach (i; 0 .. MAGICAL_NUMBER) 550 assert(f.remove(i)); 551 foreach (i; 0 .. MAGICAL_NUMBER) 552 assert(!f.remove(i)); 553 554 HashSet!int g; 555 foreach (i; 0 .. MAGICAL_NUMBER) 556 assert(g.insert(i)); 557 558 static struct AStruct 559 { 560 int a; 561 int b; 562 } 563 564 HashSet!(AStruct*, a => a.a) fred; 565 fred.insert(new AStruct(10, 10)); 566 auto h = new AStruct(10, 10); 567 assert(h in fred); 568 }