1 /** 2 * SIMD-accelerated Set 3 * Copyright: © 2015 Economic Modeling Specialists, Intl. 4 * Authors: Brian Schott 5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost License 1.0) 6 */ 7 module containers.simdset; 8 9 /** 10 * Set implementation that is well suited for small sets and simple items. 11 * 12 * Uses SSE instructions to compare multiple elements simultaneously, but has 13 * linear time complexity. Use this container when you need to 14 * 15 * Note: Only works on x86_64. Does NOT add GC ranges. Do not store pointers in 16 * this container unless they are also stored somewhere else. 17 */ 18 version (D_InlineAsm_X86_64) struct SimdSet(T) if (T.sizeof == 1 19 || T.sizeof == 2 || T.sizeof == 4 || T.sizeof == 8) 20 { 21 this(this) @disable; 22 23 ~this() 24 { 25 free(storage.ptr); 26 } 27 28 /** 29 * Params: 30 * item = the item to check 31 * Returns: 32 * true if the set contains the given item 33 */ 34 bool contains(T item) const pure nothrow @nogc 35 { 36 if (_length == 0) 37 return false; 38 bool retVal; 39 immutable remainder = _length % (16 / T.sizeof); 40 ushort mask = remainder == 0 ? 0xffff : (1 << (remainder * T.sizeof)) - 1; 41 //ushort resultMask; 42 ulong ptrStart = cast(ulong) storage.ptr; 43 ulong ptrEnd = ptrStart + storage.length * T.sizeof; 44 static if (T.sizeof == 1) 45 ulong needle = (cast(ubyte) item) * 0x01010101_01010101; 46 else static if (T.sizeof == 2) 47 ulong needle = (cast(ushort) item) * 0x00010001_00010001; 48 else static if (T.sizeof == 4) 49 ulong needle = (cast(ulong) item) * 0x00000001_00000001; 50 else static if (T.sizeof == 8) 51 ulong needle = cast(ulong) item; 52 else 53 static assert(false); 54 mixin(asmSearch()); 55 end: 56 //stderr.writeln("storage: ", storage); 57 //stderr.writeln("item: ", item); 58 //stderr.writefln("T.sizeof: %d", T.sizeof); 59 //stderr.writefln("mask: %016b", mask); 60 //stderr.writefln("resultMask: %016b", resultMask); 61 return retVal; 62 } 63 64 /** 65 * Inserts the given item into the set. 66 * 67 * Params: 68 * item = the item to insert 69 * Returns: 70 * true if the item was inserted or false if it was already present 71 */ 72 bool insert(T item) 73 { 74 if (contains(item)) 75 return false; 76 if (storage.length > _length) 77 storage[_length] = item; 78 else 79 { 80 immutable size_t cl = (storage.length * T.sizeof); 81 immutable size_t nl = cl + 16; 82 storage = (cast(T*) realloc(storage.ptr, nl))[0 .. nl / T.sizeof]; 83 storage[_length] = item; 84 } 85 _length++; 86 return true; 87 } 88 89 /** 90 * Removes the given item from the set. 91 * 92 * Params: 93 * item = the time to remove 94 * Returns: 95 * true if the item was removed, false if it was not present 96 */ 97 bool remove(T item) 98 { 99 import std.algorithm : countUntil; 100 101 // TODO: Make this more efficient 102 103 ptrdiff_t begin = countUntil(storage, item); 104 if (begin == -1) 105 return false; 106 foreach (i; begin .. _length - 1) 107 storage[i] = storage[i + 1]; 108 _length--; 109 return true; 110 } 111 112 /** 113 * Returns: 114 * the number of items in the set 115 */ 116 size_t length() const pure nothrow @nogc @property 117 { 118 return _length; 119 } 120 121 invariant 122 { 123 assert((storage.length * T.sizeof) % 16 == 0); 124 } 125 126 private: 127 128 static string asmSearch() 129 { 130 import std.string : format; 131 132 static if (T.sizeof == 1) 133 enum instruction = `pcmpeqb`; 134 else static if (T.sizeof == 2) 135 enum instruction = `pcmpeqw`; 136 else static if (T.sizeof == 4) 137 enum instruction = `pcmpeqd`; 138 else static if (T.sizeof == 8) 139 enum instruction = `pcmpeqq`; 140 else 141 static assert(false); 142 143 static if (__VERSION__ >= 2067) 144 string s = `asm pure nothrow @nogc`; 145 else 146 string s = `asm`; 147 148 return s ~ ` 149 { 150 mov R8, ptrStart; 151 mov R9, ptrEnd; 152 sub R8, 16; 153 sub R9, 16; 154 movq XMM0, needle; 155 shufpd XMM0, XMM0, 0; 156 loop: 157 add R8, 16; 158 movdqu XMM1, [R8]; 159 %s XMM1, XMM0; 160 pmovmskb RAX, XMM1; 161 //mov resultMask, AX; 162 mov BX, AX; 163 and BX, mask; 164 cmp R8, R9; 165 cmove AX, BX; 166 popcnt AX, AX; 167 test AX, AX; 168 jnz found; 169 cmp R8, R9; 170 jl loop; 171 mov retVal, 0; 172 jmp end; 173 found: 174 mov retVal, 1; 175 jmp end; 176 }`.format(instruction); 177 } 178 179 T[] storage; 180 size_t _length; 181 } 182 183 /// 184 version (D_InlineAsm_X86_64) unittest 185 { 186 import std.string : format; 187 188 void testSimdSet(T)() 189 { 190 SimdSet!T set; 191 assert(set.insert(1)); 192 assert(set.length == 1); 193 assert(set.contains(1)); 194 assert(!set.insert(1)); 195 set.insert(0); 196 set.insert(20); 197 assert(set.contains(1)); 198 assert(set.contains(0)); 199 assert(!set.contains(10)); 200 assert(!set.contains(50)); 201 assert(set.contains(20)); 202 foreach (T i; 28 .. 127) 203 set.insert(i); 204 foreach (T i; 28 .. 127) 205 assert(set.contains(i), "%d".format(i)); 206 foreach (T i; 28 .. 127) 207 assert(set.remove(i)); 208 assert(set.length == 3, "%d".format(set.length)); 209 assert(set.contains(0)); 210 assert(set.contains(1)); 211 assert(set.contains(20)); 212 assert(!set.contains(28)); 213 } 214 215 testSimdSet!ubyte(); 216 testSimdSet!ushort(); 217 testSimdSet!uint(); 218 testSimdSet!ulong(); 219 testSimdSet!byte(); 220 testSimdSet!short(); 221 testSimdSet!int(); 222 testSimdSet!long(); 223 } 224 225 version (D_InlineAsm_X86_64) struct SimdSet(T) if (!(T.sizeof == 1 226 || T.sizeof == 2 || T.sizeof == 4 || T.sizeof == 8)) 227 { 228 import std.string : format; 229 static assert (false, ("Cannot instantiate SimdSet of type %s because its size " 230 ~ "(%d) does not fit evenly into XMM registers.").format(T.stringof, T.sizeof)); 231 } 232 233 //unittest 234 //{ 235 // static struct ThreeBytes 236 // { 237 // ubyte one; 238 // ubyte two; 239 // ubyte three; 240 // } 241 // 242 // SimdSet!ThreeBytes shouldNotCompile; 243 //} 244 245 private extern (C) void* realloc(void*, size_t); 246 private extern (C) void free(void*);