1 /**
2  * SIMD-accelerated Set
3  * Copyright: © 2015 Economic Modeling Specialists, Intl.
4  * Authors: Brian Schott
5  * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost License 1.0)
6  */
7 module containers.simdset;
8 
9 /**
10  * Set implementation that is well suited for small sets and simple items.
11  *
12  * Uses SSE instructions to compare multiple elements simultaneously, but has
13  * linear time complexity. Use this container when you need to
14  *
15  * Note: Only works on x86_64. Does NOT add GC ranges. Do not store pointers in
16  * this container unless they are also stored somewhere else.
17  */
18 version (D_InlineAsm_X86_64) struct SimdSet(T) if (T.sizeof == 1
19 	|| T.sizeof == 2 || T.sizeof == 4 || T.sizeof == 8)
20 {
21 	this(this) @disable;
22 
23 	~this()
24 	{
25 		free(storage.ptr);
26 	}
27 
28 	/**
29 	 * Params:
30 	 *     item = the item to check
31 	 * Returns:
32 	 *     true if the set contains the given item
33 	 */
34 	bool contains(T item) const pure nothrow @nogc
35 	{
36 		if (_length == 0)
37 			return false;
38 		bool retVal;
39 		immutable remainder = _length % (16 / T.sizeof);
40 		ushort mask = remainder == 0 ? 0xffff : (1 << (remainder * T.sizeof)) - 1;
41 		//ushort resultMask;
42 		ulong ptrStart = cast(ulong) storage.ptr;
43 		ulong ptrEnd = ptrStart + storage.length * T.sizeof;
44 		static if (T.sizeof == 1)
45 			ulong needle = (cast(ubyte) item) * 0x01010101_01010101;
46 		else static if (T.sizeof == 2)
47 			ulong needle = (cast(ushort) item) * 0x00010001_00010001;
48 		else static if (T.sizeof == 4)
49 			ulong needle = (cast(ulong) item) * 0x00000001_00000001;
50 		else static if (T.sizeof == 8)
51 			ulong needle = cast(ulong) item;
52 		else
53 			static assert(false);
54 		mixin(asmSearch());
55 	end:
56 		//stderr.writeln("storage:   ", storage);
57 		//stderr.writeln("item:      ", item);
58 		//stderr.writefln("T.sizeof:   %d", T.sizeof);
59 		//stderr.writefln("mask:       %016b", mask);
60 		//stderr.writefln("resultMask: %016b", resultMask);
61 		return retVal;
62 	}
63 
64 	/**
65 	 * Inserts the given item into the set.
66 	 *
67 	 * Params:
68 	 *     item = the item to insert
69 	 * Returns:
70 	 *     true if the item was inserted or false if it was already present
71 	 */
72 	bool insert(T item)
73 	{
74 		if (contains(item))
75 			return false;
76 		if (storage.length > _length)
77 			storage[_length] = item;
78 		else
79 		{
80 			immutable size_t cl = (storage.length * T.sizeof);
81 			immutable size_t nl = cl + 16;
82 			storage = (cast(T*) realloc(storage.ptr, nl))[0 .. nl / T.sizeof];
83 			storage[_length] = item;
84 		}
85 		_length++;
86 		return true;
87 	}
88 
89 	/**
90 	 * Removes the given item from the set.
91 	 *
92 	 * Params:
93 	 *     item = the time to remove
94 	 * Returns:
95 	 *     true if the item was removed, false if it was not present
96 	 */
97 	bool remove(T item)
98 	{
99 		import std.algorithm : countUntil;
100 
101 		// TODO: Make this more efficient
102 
103 		ptrdiff_t begin = countUntil(storage, item);
104 		if (begin == -1)
105 			return false;
106 		foreach (i; begin .. _length - 1)
107 			storage[i] = storage[i + 1];
108 		_length--;
109 		return true;
110 	}
111 
112 	/**
113 	 * Returns:
114 	 *     the number of items in the set
115 	 */
116 	size_t length() const pure nothrow @nogc @property
117 	{
118 		return _length;
119 	}
120 
121 	invariant
122 	{
123 		assert((storage.length * T.sizeof) % 16 == 0);
124 	}
125 
126 private:
127 
128 	static string asmSearch()
129 	{
130 		import std.string : format;
131 
132 		static if (T.sizeof == 1)
133 			enum instruction = `pcmpeqb`;
134 		else static if (T.sizeof == 2)
135 			enum instruction = `pcmpeqw`;
136 		else static if (T.sizeof == 4)
137 			enum instruction = `pcmpeqd`;
138 		else static if (T.sizeof == 8)
139 			enum instruction = `pcmpeqq`;
140 		else
141 			static assert(false);
142 
143 		static if (__VERSION__ >= 2067)
144 			string s = `asm pure nothrow @nogc`;
145 		else
146 			string s = `asm`;
147 
148 		return s ~ `
149 		{
150 			mov R8, ptrStart;
151 			mov R9, ptrEnd;
152 			sub R8, 16;
153 			sub R9, 16;
154 			movq XMM0, needle;
155 			shufpd XMM0, XMM0, 0;
156 		loop:
157 			add R8, 16;
158 			movdqu XMM1, [R8];
159 			%s XMM1, XMM0;
160 			pmovmskb RAX, XMM1;
161 			//mov resultMask, AX;
162 			mov BX, AX;
163 			and BX, mask;
164 			cmp R8, R9;
165 			cmove AX, BX;
166 			popcnt AX, AX;
167 			test AX, AX;
168 			jnz found;
169 			cmp R8, R9;
170 			jl loop;
171 			mov retVal, 0;
172 			jmp end;
173 		found:
174 			mov retVal, 1;
175 			jmp end;
176 		}`.format(instruction);
177 	}
178 
179 	T[] storage;
180 	size_t _length;
181 }
182 
183 ///
184 version (D_InlineAsm_X86_64) unittest
185 {
186 	import std.string : format;
187 
188 	void testSimdSet(T)()
189 	{
190 		SimdSet!T set;
191 		assert(set.insert(1));
192 		assert(set.length == 1);
193 		assert(set.contains(1));
194 		assert(!set.insert(1));
195 		set.insert(0);
196 		set.insert(20);
197 		assert(set.contains(1));
198 		assert(set.contains(0));
199 		assert(!set.contains(10));
200 		assert(!set.contains(50));
201 		assert(set.contains(20));
202 		foreach (T i; 28 .. 127)
203 			set.insert(i);
204 		foreach (T i; 28 .. 127)
205 			assert(set.contains(i), "%d".format(i));
206 		foreach (T i; 28 .. 127)
207 			assert(set.remove(i));
208 		assert(set.length == 3, "%d".format(set.length));
209 		assert(set.contains(0));
210 		assert(set.contains(1));
211 		assert(set.contains(20));
212 		assert(!set.contains(28));
213 	}
214 
215 	testSimdSet!ubyte();
216 	testSimdSet!ushort();
217 	testSimdSet!uint();
218 	testSimdSet!ulong();
219 	testSimdSet!byte();
220 	testSimdSet!short();
221 	testSimdSet!int();
222 	testSimdSet!long();
223 }
224 
225 version (D_InlineAsm_X86_64) struct SimdSet(T) if (!(T.sizeof == 1
226 	|| T.sizeof == 2 || T.sizeof == 4 || T.sizeof == 8))
227 {
228 	import std.string : format;
229 	static assert (false, ("Cannot instantiate SimdSet of type %s because its size "
230 		~ "(%d) does not fit evenly into XMM registers.").format(T.stringof, T.sizeof));
231 }
232 
233 //unittest
234 //{
235 //	static struct ThreeBytes
236 //	{
237 //		ubyte one;
238 //		ubyte two;
239 //		ubyte three;
240 //	}
241 //
242 //	SimdSet!ThreeBytes shouldNotCompile;
243 //}
244 
245 private extern (C) void* realloc(void*, size_t);
246 private extern (C) void free(void*);