1 /** 2 * Hash Set 3 * Copyright: © 2014 Economic Modeling Specialists, Intl. 4 * Authors: Brian Schott 5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 */ 7 8 module containers.hashset; 9 10 import containers.internal.hash : generateHash; 11 import containers.internal.node : shouldAddGCRange; 12 13 /** 14 * Hash Set. 15 * Params: 16 * T = the element type 17 * hashFunction = the hash function to use on the elements 18 */ 19 struct HashSet(T, alias hashFunction = generateHash!T, bool supportGC = shouldAddGCRange!T) 20 { 21 this(this) @disable; 22 23 /** 24 * Constructs a HashSet with an initial bucket count of bucketCount. 25 * bucketCount must be a power of two. 26 */ 27 this(size_t bucketCount) 28 in 29 { 30 assert ((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two"); 31 } 32 body 33 { 34 initialize(bucketCount); 35 } 36 37 ~this() 38 { 39 import std.allocator : Mallocator, deallocate; 40 foreach (ref bucket; buckets) 41 typeid(typeof(bucket)).destroy(&bucket); 42 static if (supportGC && shouldAddGCRange!T) 43 GC.removeRange(buckets.ptr); 44 Mallocator.it.deallocate(buckets); 45 } 46 47 /** 48 * Removes all items from the set 49 */ 50 void clear() 51 { 52 foreach (ref bucket; buckets) 53 destroy(bucket); 54 _length = 0; 55 } 56 57 /** 58 * Removes the given item from the set. 59 * Returns: false if the value was not present 60 */ 61 bool remove(T value) 62 { 63 hash_t hash = generateHash(value); 64 size_t index = hashToIndex(hash); 65 if (buckets[index].empty) 66 return false; 67 static if (storeHash) 68 bool removed = buckets[index].remove(Node(hash, value)); 69 else 70 bool removed = buckets[index].remove(Node(value)); 71 if (removed) 72 --_length; 73 return removed; 74 } 75 76 /** 77 * Returns: true if value is contained in the set. 78 */ 79 bool contains(T value) inout nothrow 80 { 81 if (buckets.length == 0) 82 return false; 83 hash_t hash = generateHash(value); 84 size_t index = hashToIndex(hash); 85 if (buckets[index].empty) 86 return false; 87 foreach (ref item; buckets[index].range) 88 { 89 static if (storeHash) 90 { 91 if (item.hash == hash && item.value == value) 92 return true; 93 } 94 else 95 { 96 if (item.value == value) 97 return true; 98 } 99 } 100 return false; 101 } 102 103 /** 104 * Supports $(B a in b) syntax 105 */ 106 bool opBinaryRight(string op)(T value) inout nothrow if (op == "in") 107 { 108 return contains(value); 109 } 110 111 /** 112 * Inserts the given item into the set. 113 * Params: value = the value to insert 114 * Returns: true if the value was actually inserted, or false if it was 115 * already present. 116 */ 117 bool insert(T value) 118 { 119 if (buckets.length == 0) 120 initialize(4); 121 hash_t hash = generateHash(value); 122 size_t index = hashToIndex(hash); 123 if (buckets[index].empty) 124 { 125 insert: 126 static if (storeHash) 127 buckets[index].insert(Node(hash, value)); 128 else 129 buckets[index].insert(Node(value)); 130 ++_length; 131 if (shouldRehash()) 132 rehash(); 133 return true; 134 } 135 foreach (ref item; buckets[index].range) 136 { 137 static if (storeHash) 138 { 139 if (item.hash == hash && item.value == value) 140 return false; 141 } 142 else 143 { 144 if (item.value == value) 145 return false; 146 } 147 } 148 goto insert; 149 } 150 151 /// ditto 152 alias put = insert; 153 154 /** 155 * Returns: true if the set has no items 156 */ 157 bool empty() inout nothrow pure @nogc @safe @property 158 { 159 return _length == 0; 160 } 161 162 /** 163 * Returns: the number of items in the set 164 */ 165 size_t length() inout nothrow pure @nogc @safe @property 166 { 167 return _length; 168 } 169 170 /** 171 * Forward range interface 172 */ 173 Range range() inout nothrow @nogc @safe @property 174 { 175 return Range(buckets); 176 } 177 178 /// ditto 179 alias opSlice = range; 180 181 private: 182 183 import containers.internal.node : shouldAddGCRange; 184 import containers.unrolledlist : UnrolledList; 185 import std.allocator : Mallocator, allocate; 186 import std.traits : isBasicType; 187 import core.memory : GC; 188 189 enum bool storeHash = !isBasicType!T; 190 191 void initialize(size_t bucketCount) 192 { 193 import std.conv : emplace; 194 buckets = cast(Bucket[]) Mallocator.it.allocate( 195 bucketCount * Bucket.sizeof); 196 assert (buckets.length == bucketCount); 197 foreach (ref bucket; buckets) 198 emplace(&bucket); 199 static if (supportGC && shouldAddGCRange!T) 200 GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof); 201 } 202 203 static struct Range 204 { 205 this(const(Bucket)[] buckets) 206 { 207 this.buckets = buckets; 208 if (buckets.length) 209 { 210 r = buckets[i].range; 211 while (i < buckets.length && r.empty) 212 { 213 i++; 214 r = buckets[i].range; 215 } 216 } 217 else 218 r = typeof(buckets[i].range()).init; 219 } 220 221 bool empty() const nothrow @safe @nogc @property 222 { 223 return i >= buckets.length; 224 } 225 226 T front() const nothrow @safe @nogc @property 227 { 228 return r.front.value; 229 } 230 231 void popFront() 232 { 233 r.popFront(); 234 while (r.empty) 235 { 236 i++; 237 if (i >= buckets.length) 238 return; 239 r = buckets[i].range; 240 } 241 } 242 243 const(Bucket)[] buckets; 244 typeof(Bucket.range()) r; 245 size_t i; 246 } 247 248 alias Bucket = UnrolledList!(Node, supportGC); 249 250 bool shouldRehash() const pure nothrow @safe 251 { 252 return (cast(float) _length / cast(float) buckets.length) > 0.75; 253 } 254 255 void rehash() @trusted 256 { 257 import std.allocator : allocate, deallocate; 258 import std.conv : emplace; 259 immutable size_t newLength = buckets.length << 1; 260 immutable size_t newSize = newLength * Bucket.sizeof; 261 Bucket[] oldBuckets = buckets; 262 buckets = cast(Bucket[]) Mallocator.it.allocate(newSize); 263 assert (buckets); 264 assert (buckets.length == newLength); 265 auto newAllocator = allocate!(HashSetAllocatorType!T)(Mallocator.it); 266 assert (newAllocator); 267 foreach (ref bucket; buckets) 268 emplace(&bucket); 269 static if (supportGC && shouldAddGCRange!T) 270 GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof); 271 foreach (ref const bucket; oldBuckets) 272 { 273 foreach (node; bucket.range) 274 { 275 static if (storeHash) 276 { 277 size_t index = hashToIndex(node.hash); 278 buckets[index].put(Node(node.hash, node.value)); 279 } 280 else 281 { 282 size_t hash = generateHash(node.value); 283 size_t index = hashToIndex(hash); 284 buckets[index].put(Node(node.value)); 285 } 286 } 287 } 288 foreach (ref bucket; oldBuckets) 289 typeid(Bucket).destroy(&bucket); 290 static if (supportGC && shouldAddGCRange!T) 291 GC.removeRange(oldBuckets.ptr); 292 Mallocator.it.deallocate(oldBuckets); 293 } 294 295 size_t hashToIndex(hash_t hash) const pure nothrow @safe 296 in 297 { 298 assert (buckets.length > 0); 299 } 300 out (result) 301 { 302 import std.string : format; 303 assert (result < buckets.length, "%d, %d".format(result, buckets.length)); 304 } 305 body 306 { 307 return hash & (buckets.length - 1); 308 } 309 310 struct Node 311 { 312 bool opEquals(ref const T v) const 313 { 314 return v == value; 315 } 316 317 bool opEquals(ref const Node other) const 318 { 319 static if (storeHash) 320 if (other.hash != hash) 321 return false; 322 return other.value == value; 323 } 324 325 static if (storeHash) 326 hash_t hash; 327 T value; 328 } 329 330 Bucket[] buckets; 331 size_t _length; 332 } 333 334 /// 335 unittest 336 { 337 import std.array : array; 338 import std.algorithm : canFind; 339 import std.uuid : randomUUID; 340 auto s = HashSet!string(16); 341 assert (!s.contains("nonsense")); 342 s.put("test"); 343 s.put("test"); 344 assert (s.contains("test")); 345 assert (s.length == 1); 346 assert (!s.contains("nothere")); 347 s.put("a"); 348 s.put("b"); 349 s.put("c"); 350 s.put("d"); 351 string[] strings = s.range.array; 352 assert (strings.canFind("a")); 353 assert (strings.canFind("b")); 354 assert (strings.canFind("c")); 355 assert (strings.canFind("d")); 356 assert (strings.canFind("test")); 357 assert (strings.length == 5); 358 assert (s.remove("test")); 359 assert (s.length == 4); 360 s.clear(); 361 assert (s.length == 0); 362 assert (s.empty); 363 s.put("abcde"); 364 assert (s.length == 1); 365 foreach (i; 0 .. 10_000) 366 { 367 s.put(randomUUID().toString); 368 } 369 assert (s.length == 10_001); 370 371 // Make sure that there's no range violation slicing an empty set 372 HashSet!int e; 373 foreach (i; e[]) 374 assert (i > 0); 375 } 376 377 private: 378 379 template HashSetAllocatorType(T) 380 { 381 import memory.allocators; 382 enum size_t hashSetNodeSize = (void*).sizeof + T.sizeof + hash_t.sizeof; 383 enum size_t hashSetBlockSize = 512; 384 alias HashSetAllocatorType = NodeAllocator!(hashSetNodeSize, hashSetBlockSize); 385 }