1 /** 2 * SIMD-accelerated Set 3 * Copyright: © 2015 Economic Modeling Specialists, Intl. 4 * Authors: Brian Schott 5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost License 1.0) 6 */ 7 module containers.simdset; 8 9 private import std.experimental.allocator.mallocator : Mallocator; 10 11 /** 12 * Set implementation that is well suited for small sets and simple items. 13 * 14 * Uses SSE instructions to compare multiple elements simultaneously, but has 15 * linear time complexity. 16 * 17 * Note: Only works on x86_64. Does NOT add GC ranges. Do not store pointers in 18 * this container unless they are also stored somewhere else. 19 * 20 * Params: 21 * T = the element type 22 * Allocator = the allocator to use. Defaults to `Mallocator`. 23 */ 24 version (D_InlineAsm_X86_64) struct SimdSet(T, Allocator = Mallocator) 25 if (T.sizeof == 1 || T.sizeof == 2 || T.sizeof == 4 || T.sizeof == 8) 26 { 27 this(this) @disable; 28 29 private import std.experimental.allocator.common : stateSize; 30 31 static if (stateSize!Allocator != 0) 32 { 33 /// No default construction if an allocator must be provided. 34 this() @disable; 35 36 /** 37 * Use the given `allocator` for allocations. 38 */ 39 this(Allocator allocator) 40 in 41 { 42 assert(allocator !is null, "Allocator must not be null"); 43 } 44 body 45 { 46 this.allocator = allocator; 47 } 48 } 49 50 ~this() 51 { 52 allocator.deallocate(cast(void[]) storage); 53 } 54 55 /** 56 * Params: 57 * item = the item to check 58 * Returns: 59 * true if the set contains the given item 60 */ 61 bool contains(T item) const pure nothrow @nogc 62 { 63 if (_length == 0) 64 return false; 65 bool retVal; 66 immutable remainder = _length % (16 / T.sizeof); 67 ushort mask = remainder == 0 ? 0xffff : (1 << (remainder * T.sizeof)) - 1; 68 //ushort resultMask; 69 ulong ptrStart = cast(ulong) storage.ptr; 70 ulong ptrEnd = ptrStart + storage.length * T.sizeof; 71 static if (T.sizeof == 1) 72 ulong needle = (cast(ubyte) item) * 0x01010101_01010101; 73 else static if (T.sizeof == 2) 74 ulong needle = (cast(ushort) item) * 0x00010001_00010001; 75 else static if (T.sizeof == 4) 76 ulong needle = (cast(ulong) item) * 0x00000001_00000001; 77 else static if (T.sizeof == 8) 78 ulong needle = cast(ulong) item; 79 else 80 static assert(false); 81 mixin(asmSearch()); 82 end: 83 return retVal; 84 } 85 86 /** 87 * Inserts the given item into the set. 88 * 89 * Params: 90 * item = the item to insert 91 * Returns: 92 * true if the item was inserted or false if it was already present 93 */ 94 bool insert(T item) 95 { 96 if (contains(item)) 97 return false; 98 if (storage.length > _length) 99 storage[_length] = item; 100 else 101 { 102 immutable size_t cl = (storage.length * T.sizeof); 103 immutable size_t nl = cl + 16; 104 void[] a = cast(void[]) storage; 105 allocator.reallocate(a, nl); 106 storage = cast(typeof(storage)) a; 107 storage[_length] = item; 108 } 109 _length++; 110 return true; 111 } 112 113 /** 114 * Removes the given item from the set. 115 * 116 * Params: 117 * item = the time to remove 118 * Returns: 119 * true if the item was removed, false if it was not present 120 */ 121 bool remove(T item) 122 { 123 import std.algorithm : countUntil; 124 125 // TODO: Make this more efficient 126 127 ptrdiff_t begin = countUntil(storage, item); 128 if (begin == -1) 129 return false; 130 foreach (i; begin .. _length - 1) 131 storage[i] = storage[i + 1]; 132 _length--; 133 return true; 134 } 135 136 /** 137 * Slice operator 138 */ 139 auto opSlice(this This)() 140 { 141 import containers.internal.element_type : ContainerElementType; 142 return cast(ContainerElementType!(This, T)[]) storage[0 .. _length]; 143 } 144 145 /** 146 * Returns: 147 * the number of items in the set 148 */ 149 size_t length() const pure nothrow @nogc @property 150 { 151 return _length; 152 } 153 154 invariant 155 { 156 assert((storage.length * T.sizeof) % 16 == 0); 157 } 158 159 private: 160 161 import containers.internal.storage_type : ContainerStorageType; 162 private import containers.internal.mixins : AllocatorState; 163 164 static string asmSearch() 165 { 166 import std..string : format; 167 168 static if (T.sizeof == 1) 169 enum instruction = `pcmpeqb`; 170 else static if (T.sizeof == 2) 171 enum instruction = `pcmpeqw`; 172 else static if (T.sizeof == 4) 173 enum instruction = `pcmpeqd`; 174 else static if (T.sizeof == 8) 175 enum instruction = `pcmpeqq`; 176 else 177 static assert(false); 178 179 static if (__VERSION__ >= 2067) 180 string s = `asm pure nothrow @nogc`; 181 else 182 string s = `asm`; 183 184 return s ~ ` 185 { 186 mov R8, ptrStart; 187 mov R9, ptrEnd; 188 sub R8, 16; 189 sub R9, 16; 190 movq XMM0, needle; 191 shufpd XMM0, XMM0, 0; 192 loop: 193 add R8, 16; 194 movdqu XMM1, [R8]; 195 %s XMM1, XMM0; 196 pmovmskb RAX, XMM1; 197 //mov resultMask, AX; 198 mov BX, AX; 199 and BX, mask; 200 cmp R8, R9; 201 cmove AX, BX; 202 popcnt AX, AX; 203 test AX, AX; 204 jnz found; 205 cmp R8, R9; 206 jl loop; 207 mov retVal, 0; 208 jmp end; 209 found: 210 mov retVal, 1; 211 jmp end; 212 }`.format(instruction); 213 } 214 215 mixin AllocatorState!Allocator; 216 ContainerStorageType!(T)[] storage; 217 size_t _length; 218 } 219 220 /// 221 version (D_InlineAsm_X86_64) unittest 222 { 223 import std..string : format; 224 225 void testSimdSet(T)() 226 { 227 SimdSet!T set; 228 assert(set.insert(1)); 229 assert(set.length == 1); 230 assert(set.contains(1)); 231 assert(!set.insert(1)); 232 set.insert(0); 233 set.insert(20); 234 assert(set.contains(1)); 235 assert(set.contains(0)); 236 assert(!set.contains(10)); 237 assert(!set.contains(50)); 238 assert(set.contains(20)); 239 foreach (T i; 28 .. 127) 240 set.insert(i); 241 foreach (T i; 28 .. 127) 242 assert(set.contains(i), "%d".format(i)); 243 foreach (T i; 28 .. 127) 244 assert(set.remove(i)); 245 assert(set.length == 3, "%d".format(set.length)); 246 assert(set.contains(0)); 247 assert(set.contains(1)); 248 assert(set.contains(20)); 249 assert(!set.contains(28)); 250 } 251 252 testSimdSet!ubyte(); 253 testSimdSet!ushort(); 254 testSimdSet!uint(); 255 testSimdSet!ulong(); 256 testSimdSet!byte(); 257 testSimdSet!short(); 258 testSimdSet!int(); 259 testSimdSet!long(); 260 } 261 262 version (D_InlineAsm_X86_64) struct SimdSet(T) if (!(T.sizeof == 1 263 || T.sizeof == 2 || T.sizeof == 4 || T.sizeof == 8)) 264 { 265 import std..string : format; 266 static assert (false, ("Cannot instantiate SimdSet of type %s because its size " 267 ~ "(%d) does not fit evenly into XMM registers.").format(T.stringof, T.sizeof)); 268 }