1 /** 2 * SIMD-accelerated Set 3 * Copyright: © 2015 Economic Modeling Specialists, Intl. 4 * Authors: Brian Schott 5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost License 1.0) 6 */ 7 module containers.simdset; 8 9 /** 10 * Set implementation that is well suited for small sets and simple items. 11 * 12 * Uses SSE instructions to compare multiple elements simultaneously, but has 13 * linear time complexity. Use this container when you need to 14 * 15 * Note: Only works on x86_64. Does NOT add GC ranges. Do not store pointers in 16 * this container unless they are also stored somewhere else. 17 */ 18 version (D_InlineAsm_X86_64) struct SimdSet(T) if (T.sizeof == 1 19 || T.sizeof == 2 || T.sizeof == 4 || T.sizeof == 8) 20 { 21 this(this) @disable; 22 23 ~this() 24 { 25 free(cast(void*) storage.ptr); 26 } 27 28 /** 29 * Params: 30 * item = the item to check 31 * Returns: 32 * true if the set contains the given item 33 */ 34 bool contains(T item) const pure nothrow @nogc 35 { 36 if (_length == 0) 37 return false; 38 bool retVal; 39 immutable remainder = _length % (16 / T.sizeof); 40 ushort mask = remainder == 0 ? 0xffff : (1 << (remainder * T.sizeof)) - 1; 41 //ushort resultMask; 42 ulong ptrStart = cast(ulong) storage.ptr; 43 ulong ptrEnd = ptrStart + storage.length * T.sizeof; 44 static if (T.sizeof == 1) 45 ulong needle = (cast(ubyte) item) * 0x01010101_01010101; 46 else static if (T.sizeof == 2) 47 ulong needle = (cast(ushort) item) * 0x00010001_00010001; 48 else static if (T.sizeof == 4) 49 ulong needle = (cast(ulong) item) * 0x00000001_00000001; 50 else static if (T.sizeof == 8) 51 ulong needle = cast(ulong) item; 52 else 53 static assert(false); 54 mixin(asmSearch()); 55 end: 56 return retVal; 57 } 58 59 /** 60 * Inserts the given item into the set. 61 * 62 * Params: 63 * item = the item to insert 64 * Returns: 65 * true if the item was inserted or false if it was already present 66 */ 67 bool insert(T item) 68 { 69 if (contains(item)) 70 return false; 71 if (storage.length > _length) 72 storage[_length] = item; 73 else 74 { 75 immutable size_t cl = (storage.length * T.sizeof); 76 immutable size_t nl = cl + 16; 77 storage = (cast(T*) realloc(storage.ptr, nl))[0 .. nl / T.sizeof]; 78 storage[_length] = item; 79 } 80 _length++; 81 return true; 82 } 83 84 /** 85 * Removes the given item from the set. 86 * 87 * Params: 88 * item = the time to remove 89 * Returns: 90 * true if the item was removed, false if it was not present 91 */ 92 bool remove(T item) 93 { 94 import std.algorithm : countUntil; 95 96 // TODO: Make this more efficient 97 98 ptrdiff_t begin = countUntil(storage, item); 99 if (begin == -1) 100 return false; 101 foreach (i; begin .. _length - 1) 102 storage[i] = storage[i + 1]; 103 _length--; 104 return true; 105 } 106 107 /** 108 * Returns: 109 * the number of items in the set 110 */ 111 size_t length() const pure nothrow @nogc @property 112 { 113 return _length; 114 } 115 116 invariant 117 { 118 assert((storage.length * T.sizeof) % 16 == 0); 119 } 120 121 private: 122 123 static string asmSearch() 124 { 125 import std.string : format; 126 127 static if (T.sizeof == 1) 128 enum instruction = `pcmpeqb`; 129 else static if (T.sizeof == 2) 130 enum instruction = `pcmpeqw`; 131 else static if (T.sizeof == 4) 132 enum instruction = `pcmpeqd`; 133 else static if (T.sizeof == 8) 134 enum instruction = `pcmpeqq`; 135 else 136 static assert(false); 137 138 static if (__VERSION__ >= 2067) 139 string s = `asm pure nothrow @nogc`; 140 else 141 string s = `asm`; 142 143 return s ~ ` 144 { 145 mov R8, ptrStart; 146 mov R9, ptrEnd; 147 sub R8, 16; 148 sub R9, 16; 149 movq XMM0, needle; 150 shufpd XMM0, XMM0, 0; 151 loop: 152 add R8, 16; 153 movdqu XMM1, [R8]; 154 %s XMM1, XMM0; 155 pmovmskb RAX, XMM1; 156 //mov resultMask, AX; 157 mov BX, AX; 158 and BX, mask; 159 cmp R8, R9; 160 cmove AX, BX; 161 popcnt AX, AX; 162 test AX, AX; 163 jnz found; 164 cmp R8, R9; 165 jl loop; 166 mov retVal, 0; 167 jmp end; 168 found: 169 mov retVal, 1; 170 jmp end; 171 }`.format(instruction); 172 } 173 174 T[] storage; 175 size_t _length; 176 } 177 178 /// 179 version (D_InlineAsm_X86_64) unittest 180 { 181 import std.string : format; 182 183 void testSimdSet(T)() 184 { 185 SimdSet!T set; 186 assert(set.insert(1)); 187 assert(set.length == 1); 188 assert(set.contains(1)); 189 assert(!set.insert(1)); 190 set.insert(0); 191 set.insert(20); 192 assert(set.contains(1)); 193 assert(set.contains(0)); 194 assert(!set.contains(10)); 195 assert(!set.contains(50)); 196 assert(set.contains(20)); 197 foreach (T i; 28 .. 127) 198 set.insert(i); 199 foreach (T i; 28 .. 127) 200 assert(set.contains(i), "%d".format(i)); 201 foreach (T i; 28 .. 127) 202 assert(set.remove(i)); 203 assert(set.length == 3, "%d".format(set.length)); 204 assert(set.contains(0)); 205 assert(set.contains(1)); 206 assert(set.contains(20)); 207 assert(!set.contains(28)); 208 } 209 210 testSimdSet!ubyte(); 211 testSimdSet!ushort(); 212 testSimdSet!uint(); 213 testSimdSet!ulong(); 214 testSimdSet!byte(); 215 testSimdSet!short(); 216 testSimdSet!int(); 217 testSimdSet!long(); 218 } 219 220 version (D_InlineAsm_X86_64) struct SimdSet(T) if (!(T.sizeof == 1 221 || T.sizeof == 2 || T.sizeof == 4 || T.sizeof == 8)) 222 { 223 import std.string : format; 224 static assert (false, ("Cannot instantiate SimdSet of type %s because its size " 225 ~ "(%d) does not fit evenly into XMM registers.").format(T.stringof, T.sizeof)); 226 } 227 228 private extern (C) void* realloc(void*, size_t); 229 private extern (C) void free(void*);