1 /**
2  * SIMD-accelerated Set
3  * Copyright: © 2015 Economic Modeling Specialists, Intl.
4  * Authors: Brian Schott
5  * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost License 1.0)
6  */
7 module containers.simdset;
8 
9 /**
10  * Set implementation that is well suited for small sets and simple items.
11  *
12  * Uses SSE instructions to compare multiple elements simultaneously, but has
13  * linear time complexity. Use this container when you need to
14  *
15  * Note: Only works on x86_64. Does NOT add GC ranges. Do not store pointers in
16  * this container unless they are also stored somewhere else.
17  */
18 version (D_InlineAsm_X86_64) struct SimdSet(T) if (T.sizeof == 1
19 	|| T.sizeof == 2 || T.sizeof == 4 || T.sizeof == 8)
20 {
21 	this(this) @disable;
22 
23 	~this()
24 	{
25 		free(cast(void*) storage.ptr);
26 	}
27 
28 	/**
29 	 * Params:
30 	 *     item = the item to check
31 	 * Returns:
32 	 *     true if the set contains the given item
33 	 */
34 	bool contains(T item) const pure nothrow @nogc
35 	{
36 		if (_length == 0)
37 			return false;
38 		bool retVal;
39 		immutable remainder = _length % (16 / T.sizeof);
40 		ushort mask = remainder == 0 ? 0xffff : (1 << (remainder * T.sizeof)) - 1;
41 		//ushort resultMask;
42 		ulong ptrStart = cast(ulong) storage.ptr;
43 		ulong ptrEnd = ptrStart + storage.length * T.sizeof;
44 		static if (T.sizeof == 1)
45 			ulong needle = (cast(ubyte) item) * 0x01010101_01010101;
46 		else static if (T.sizeof == 2)
47 			ulong needle = (cast(ushort) item) * 0x00010001_00010001;
48 		else static if (T.sizeof == 4)
49 			ulong needle = (cast(ulong) item) * 0x00000001_00000001;
50 		else static if (T.sizeof == 8)
51 			ulong needle = cast(ulong) item;
52 		else
53 			static assert(false);
54 		mixin(asmSearch());
55 	end:
56 		return retVal;
57 	}
58 
59 	/**
60 	 * Inserts the given item into the set.
61 	 *
62 	 * Params:
63 	 *     item = the item to insert
64 	 * Returns:
65 	 *     true if the item was inserted or false if it was already present
66 	 */
67 	bool insert(T item)
68 	{
69 		if (contains(item))
70 			return false;
71 		if (storage.length > _length)
72 			storage[_length] = item;
73 		else
74 		{
75 			immutable size_t cl = (storage.length * T.sizeof);
76 			immutable size_t nl = cl + 16;
77 			storage = (cast(T*) realloc(storage.ptr, nl))[0 .. nl / T.sizeof];
78 			storage[_length] = item;
79 		}
80 		_length++;
81 		return true;
82 	}
83 
84 	/**
85 	 * Removes the given item from the set.
86 	 *
87 	 * Params:
88 	 *     item = the time to remove
89 	 * Returns:
90 	 *     true if the item was removed, false if it was not present
91 	 */
92 	bool remove(T item)
93 	{
94 		import std.algorithm : countUntil;
95 
96 		// TODO: Make this more efficient
97 
98 		ptrdiff_t begin = countUntil(storage, item);
99 		if (begin == -1)
100 			return false;
101 		foreach (i; begin .. _length - 1)
102 			storage[i] = storage[i + 1];
103 		_length--;
104 		return true;
105 	}
106 
107 	/**
108 	 * Returns:
109 	 *     the number of items in the set
110 	 */
111 	size_t length() const pure nothrow @nogc @property
112 	{
113 		return _length;
114 	}
115 
116 	invariant
117 	{
118 		assert((storage.length * T.sizeof) % 16 == 0);
119 	}
120 
121 private:
122 
123 	static string asmSearch()
124 	{
125 		import std.string : format;
126 
127 		static if (T.sizeof == 1)
128 			enum instruction = `pcmpeqb`;
129 		else static if (T.sizeof == 2)
130 			enum instruction = `pcmpeqw`;
131 		else static if (T.sizeof == 4)
132 			enum instruction = `pcmpeqd`;
133 		else static if (T.sizeof == 8)
134 			enum instruction = `pcmpeqq`;
135 		else
136 			static assert(false);
137 
138 		static if (__VERSION__ >= 2067)
139 			string s = `asm pure nothrow @nogc`;
140 		else
141 			string s = `asm`;
142 
143 		return s ~ `
144 		{
145 			mov R8, ptrStart;
146 			mov R9, ptrEnd;
147 			sub R8, 16;
148 			sub R9, 16;
149 			movq XMM0, needle;
150 			shufpd XMM0, XMM0, 0;
151 		loop:
152 			add R8, 16;
153 			movdqu XMM1, [R8];
154 			%s XMM1, XMM0;
155 			pmovmskb RAX, XMM1;
156 			//mov resultMask, AX;
157 			mov BX, AX;
158 			and BX, mask;
159 			cmp R8, R9;
160 			cmove AX, BX;
161 			popcnt AX, AX;
162 			test AX, AX;
163 			jnz found;
164 			cmp R8, R9;
165 			jl loop;
166 			mov retVal, 0;
167 			jmp end;
168 		found:
169 			mov retVal, 1;
170 			jmp end;
171 		}`.format(instruction);
172 	}
173 
174 	T[] storage;
175 	size_t _length;
176 }
177 
178 ///
179 version (D_InlineAsm_X86_64) unittest
180 {
181 	import std.string : format;
182 
183 	void testSimdSet(T)()
184 	{
185 		SimdSet!T set;
186 		assert(set.insert(1));
187 		assert(set.length == 1);
188 		assert(set.contains(1));
189 		assert(!set.insert(1));
190 		set.insert(0);
191 		set.insert(20);
192 		assert(set.contains(1));
193 		assert(set.contains(0));
194 		assert(!set.contains(10));
195 		assert(!set.contains(50));
196 		assert(set.contains(20));
197 		foreach (T i; 28 .. 127)
198 			set.insert(i);
199 		foreach (T i; 28 .. 127)
200 			assert(set.contains(i), "%d".format(i));
201 		foreach (T i; 28 .. 127)
202 			assert(set.remove(i));
203 		assert(set.length == 3, "%d".format(set.length));
204 		assert(set.contains(0));
205 		assert(set.contains(1));
206 		assert(set.contains(20));
207 		assert(!set.contains(28));
208 	}
209 
210 	testSimdSet!ubyte();
211 	testSimdSet!ushort();
212 	testSimdSet!uint();
213 	testSimdSet!ulong();
214 	testSimdSet!byte();
215 	testSimdSet!short();
216 	testSimdSet!int();
217 	testSimdSet!long();
218 }
219 
220 version (D_InlineAsm_X86_64) struct SimdSet(T) if (!(T.sizeof == 1
221 	|| T.sizeof == 2 || T.sizeof == 4 || T.sizeof == 8))
222 {
223 	import std.string : format;
224 	static assert (false, ("Cannot instantiate SimdSet of type %s because its size "
225 		~ "(%d) does not fit evenly into XMM registers.").format(T.stringof, T.sizeof));
226 }
227 
228 private extern (C) void* realloc(void*, size_t);
229 private extern (C) void free(void*);