1 /**
2  * SIMD-accelerated Set
3  * Copyright: © 2015 Economic Modeling Specialists, Intl.
4  * Authors: Brian Schott
5  * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost License 1.0)
6  */
7 module containers.simdset;
8 
9 private import std.experimental.allocator.mallocator : Mallocator;
10 
11 /**
12  * Set implementation that is well suited for small sets and simple items.
13  *
14  * Uses SSE instructions to compare multiple elements simultaneously, but has
15  * linear time complexity.
16  *
17  * Note: Only works on x86_64. Does NOT add GC ranges. Do not store pointers in
18  * this container unless they are also stored somewhere else.
19  *
20  * Params:
21  *     T = the element type
22  *     Allocator = the allocator to use. Defaults to `Mallocator`.
23  */
24 version (D_InlineAsm_X86_64) struct SimdSet(T, Allocator = Mallocator)
25 	if (T.sizeof == 1 || T.sizeof == 2 || T.sizeof == 4 || T.sizeof == 8)
26 {
27 	this(this) @disable;
28 
29 	private import std.experimental.allocator.common : stateSize;
30 
31 	static if (stateSize!Allocator != 0)
32 	{
33 		/// No default construction if an allocator must be provided.
34 		this() @disable;
35 
36 		/**
37 		 * Use the given `allocator` for allocations.
38 		 */
39 		this(Allocator allocator)
40 		in
41 		{
42 			assert(allocator !is null, "Allocator must not be null");
43 		}
44 		body
45 		{
46 			this.allocator = allocator;
47 		}
48 	}
49 
50 	~this()
51 	{
52 		allocator.deallocate(cast(void[]) storage);
53 	}
54 
55 	/**
56 	 * Params:
57 	 *     item = the item to check
58 	 * Returns:
59 	 *     true if the set contains the given item
60 	 */
61 	bool contains(T item) const pure nothrow @nogc
62 	{
63 		if (_length == 0)
64 			return false;
65 		bool retVal;
66 		immutable remainder = _length % (16 / T.sizeof);
67 		ushort mask = remainder == 0 ? 0xffff : (1 << (remainder * T.sizeof)) - 1;
68 		//ushort resultMask;
69 		ulong ptrStart = cast(ulong) storage.ptr;
70 		ulong ptrEnd = ptrStart + storage.length * T.sizeof;
71 		static if (T.sizeof == 1)
72 			ulong needle = (cast(ubyte) item) * 0x01010101_01010101;
73 		else static if (T.sizeof == 2)
74 			ulong needle = (cast(ushort) item) * 0x00010001_00010001;
75 		else static if (T.sizeof == 4)
76 			ulong needle = (cast(ulong) item) * 0x00000001_00000001;
77 		else static if (T.sizeof == 8)
78 			ulong needle = cast(ulong) item;
79 		else
80 			static assert(false);
81 		mixin(asmSearch());
82 	end:
83 		return retVal;
84 	}
85 
86 	/**
87 	 * Inserts the given item into the set.
88 	 *
89 	 * Params:
90 	 *     item = the item to insert
91 	 * Returns:
92 	 *     true if the item was inserted or false if it was already present
93 	 */
94 	bool insert(T item)
95 	{
96 		if (contains(item))
97 			return false;
98 		if (storage.length > _length)
99 			storage[_length] = item;
100 		else
101 		{
102 			immutable size_t cl = (storage.length * T.sizeof);
103 			immutable size_t nl = cl + 16;
104 			void[] a = cast(void[]) storage;
105 			allocator.reallocate(a, nl);
106 			storage = cast(typeof(storage)) a;
107 			storage[_length] = item;
108 		}
109 		_length++;
110 		return true;
111 	}
112 
113 	/**
114 	 * Removes the given item from the set.
115 	 *
116 	 * Params:
117 	 *     item = the time to remove
118 	 * Returns:
119 	 *     true if the item was removed, false if it was not present
120 	 */
121 	bool remove(T item)
122 	{
123 		import std.algorithm : countUntil;
124 
125 		// TODO: Make this more efficient
126 
127 		ptrdiff_t begin = countUntil(storage, item);
128 		if (begin == -1)
129 			return false;
130 		foreach (i; begin .. _length - 1)
131 			storage[i] = storage[i + 1];
132 		_length--;
133 		return true;
134 	}
135 
136 	/**
137 	 * Slice operator
138 	 */
139 	auto opSlice(this This)()
140 	{
141 		import containers.internal.element_type : ContainerElementType;
142 		return cast(ContainerElementType!(This, T)[]) storage[0 .. _length];
143 	}
144 
145 	/**
146 	 * Returns:
147 	 *     the number of items in the set
148 	 */
149 	size_t length() const pure nothrow @nogc @property
150 	{
151 		return _length;
152 	}
153 
154 	invariant
155 	{
156 		assert((storage.length * T.sizeof) % 16 == 0);
157 	}
158 
159 private:
160 
161 	import containers.internal.storage_type : ContainerStorageType;
162 	private import containers.internal.mixins : AllocatorState;
163 
164 	static string asmSearch()
165 	{
166 		import std..string : format;
167 
168 		static if (T.sizeof == 1)
169 			enum instruction = `pcmpeqb`;
170 		else static if (T.sizeof == 2)
171 			enum instruction = `pcmpeqw`;
172 		else static if (T.sizeof == 4)
173 			enum instruction = `pcmpeqd`;
174 		else static if (T.sizeof == 8)
175 			enum instruction = `pcmpeqq`;
176 		else
177 			static assert(false);
178 
179 		static if (__VERSION__ >= 2067)
180 			string s = `asm pure nothrow @nogc`;
181 		else
182 			string s = `asm`;
183 
184 		return s ~ `
185 		{
186 			mov R8, ptrStart;
187 			mov R9, ptrEnd;
188 			sub R8, 16;
189 			sub R9, 16;
190 			movq XMM0, needle;
191 			shufpd XMM0, XMM0, 0;
192 		loop:
193 			add R8, 16;
194 			movdqu XMM1, [R8];
195 			%s XMM1, XMM0;
196 			pmovmskb RAX, XMM1;
197 			//mov resultMask, AX;
198 			mov BX, AX;
199 			and BX, mask;
200 			cmp R8, R9;
201 			cmove AX, BX;
202 			popcnt AX, AX;
203 			test AX, AX;
204 			jnz found;
205 			cmp R8, R9;
206 			jl loop;
207 			mov retVal, 0;
208 			jmp end;
209 		found:
210 			mov retVal, 1;
211 			jmp end;
212 		}`.format(instruction);
213 	}
214 
215 	mixin AllocatorState!Allocator;
216 	ContainerStorageType!(T)[] storage;
217 	size_t _length;
218 }
219 
220 ///
221 version (D_InlineAsm_X86_64) unittest
222 {
223 	import std..string : format;
224 
225 	void testSimdSet(T)()
226 	{
227 		SimdSet!T set;
228 		assert(set.insert(1));
229 		assert(set.length == 1);
230 		assert(set.contains(1));
231 		assert(!set.insert(1));
232 		set.insert(0);
233 		set.insert(20);
234 		assert(set.contains(1));
235 		assert(set.contains(0));
236 		assert(!set.contains(10));
237 		assert(!set.contains(50));
238 		assert(set.contains(20));
239 		foreach (T i; 28 .. 127)
240 			set.insert(i);
241 		foreach (T i; 28 .. 127)
242 			assert(set.contains(i), "%d".format(i));
243 		foreach (T i; 28 .. 127)
244 			assert(set.remove(i));
245 		assert(set.length == 3, "%d".format(set.length));
246 		assert(set.contains(0));
247 		assert(set.contains(1));
248 		assert(set.contains(20));
249 		assert(!set.contains(28));
250 	}
251 
252 	testSimdSet!ubyte();
253 	testSimdSet!ushort();
254 	testSimdSet!uint();
255 	testSimdSet!ulong();
256 	testSimdSet!byte();
257 	testSimdSet!short();
258 	testSimdSet!int();
259 	testSimdSet!long();
260 }
261 
262 version (D_InlineAsm_X86_64) struct SimdSet(T) if (!(T.sizeof == 1
263 	|| T.sizeof == 2 || T.sizeof == 4 || T.sizeof == 8))
264 {
265 	import std..string : format;
266 	static assert (false, ("Cannot instantiate SimdSet of type %s because its size "
267 		~ "(%d) does not fit evenly into XMM registers.").format(T.stringof, T.sizeof));
268 }