xref: /aosp_15_r20/external/swiftshader/src/Reactor/SIMD.cpp (revision 03ce13f70fcc45d86ee91b7ee4cab1936a95046e)
1 // Copyright 2022 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "SIMD.hpp"
16 
17 #include "Assert.hpp"
18 #include "Debug.hpp"
19 #include "Print.hpp"
20 
21 #include <cmath>
22 
23 namespace rr {
24 
Int()25 SIMD::Int::Int()
26     : XYZW(this)
27 {
28 }
29 
Int(RValue<SIMD::Float> cast)30 SIMD::Int::Int(RValue<SIMD::Float> cast)
31     : XYZW(this)
32 {
33 	Value *xyzw = Nucleus::createFPToSI(cast.value(), SIMD::Int::type());
34 
35 	storeValue(xyzw);
36 }
37 
Int(int broadcast)38 SIMD::Int::Int(int broadcast)
39     : XYZW(this)
40 {
41 	std::vector<int64_t> constantVector = { broadcast };
42 	storeValue(Nucleus::createConstantVector(constantVector, type()));
43 }
44 
Int(int x,int y,int z,int w)45 SIMD::Int::Int(int x, int y, int z, int w)
46     : XYZW(this)
47 {
48 	std::vector<int64_t> constantVector = { x, y, z, w };
49 	storeValue(Nucleus::createConstantVector(constantVector, type()));
50 }
51 
Int(std::vector<int> v)52 SIMD::Int::Int(std::vector<int> v)
53     : XYZW(this)
54 {
55 	std::vector<int64_t> constantVector;
56 	for(int i : v) { constantVector.push_back(i); }
57 	storeValue(Nucleus::createConstantVector(constantVector, type()));
58 }
59 
Int(std::function<int (int)> LaneValueProducer)60 SIMD::Int::Int(std::function<int(int)> LaneValueProducer)
61     : XYZW(this)
62 {
63 	std::vector<int64_t> constantVector;
64 	for(int i = 0; i < SIMD::Width; i++) { constantVector.push_back(LaneValueProducer(i)); }
65 	storeValue(Nucleus::createConstantVector(constantVector, type()));
66 }
67 
Int(RValue<SIMD::Int> rhs)68 SIMD::Int::Int(RValue<SIMD::Int> rhs)
69     : XYZW(this)
70 {
71 	store(rhs);
72 }
73 
Int(const SIMD::Int & rhs)74 SIMD::Int::Int(const SIMD::Int &rhs)
75     : XYZW(this)
76 {
77 	store(rhs.load());
78 }
79 
Int(const Reference<SIMD::Int> & rhs)80 SIMD::Int::Int(const Reference<SIMD::Int> &rhs)
81     : XYZW(this)
82 {
83 	store(rhs.load());
84 }
85 
Int(RValue<SIMD::UInt> rhs)86 SIMD::Int::Int(RValue<SIMD::UInt> rhs)
87     : XYZW(this)
88 {
89 	storeValue(rhs.value());
90 }
91 
Int(const SIMD::UInt & rhs)92 SIMD::Int::Int(const SIMD::UInt &rhs)
93     : XYZW(this)
94 {
95 	storeValue(rhs.loadValue());
96 }
97 
Int(const Reference<SIMD::UInt> & rhs)98 SIMD::Int::Int(const Reference<SIMD::UInt> &rhs)
99     : XYZW(this)
100 {
101 	storeValue(rhs.loadValue());
102 }
103 
Int(const scalar::Int & rhs)104 SIMD::Int::Int(const scalar::Int &rhs)
105     : XYZW(this)
106 {
107 	*this = RValue<scalar::Int>(rhs.loadValue());
108 }
109 
Int(const Reference<scalar::Int> & rhs)110 SIMD::Int::Int(const Reference<scalar::Int> &rhs)
111     : XYZW(this)
112 {
113 	*this = RValue<scalar::Int>(rhs.loadValue());
114 }
115 
operator =(int x)116 RValue<SIMD::Int> SIMD::Int::operator=(int x)
117 {
118 	return *this = SIMD::Int(x);
119 }
120 
operator =(RValue<SIMD::Int> rhs)121 RValue<SIMD::Int> SIMD::Int::operator=(RValue<SIMD::Int> rhs)
122 {
123 	return store(rhs);
124 }
125 
operator =(const SIMD::Int & rhs)126 RValue<SIMD::Int> SIMD::Int::operator=(const SIMD::Int &rhs)
127 {
128 	return store(rhs.load());
129 }
130 
operator =(const Reference<SIMD::Int> & rhs)131 RValue<SIMD::Int> SIMD::Int::operator=(const Reference<SIMD::Int> &rhs)
132 {
133 	return store(rhs.load());
134 }
135 
operator +(RValue<SIMD::Int> lhs,RValue<SIMD::Int> rhs)136 RValue<SIMD::Int> operator+(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs)
137 {
138 	return RValue<SIMD::Int>(Nucleus::createAdd(lhs.value(), rhs.value()));
139 }
140 
operator -(RValue<SIMD::Int> lhs,RValue<SIMD::Int> rhs)141 RValue<SIMD::Int> operator-(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs)
142 {
143 	return RValue<SIMD::Int>(Nucleus::createSub(lhs.value(), rhs.value()));
144 }
145 
operator *(RValue<SIMD::Int> lhs,RValue<SIMD::Int> rhs)146 RValue<SIMD::Int> operator*(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs)
147 {
148 	return RValue<SIMD::Int>(Nucleus::createMul(lhs.value(), rhs.value()));
149 }
150 
operator /(RValue<SIMD::Int> lhs,RValue<SIMD::Int> rhs)151 RValue<SIMD::Int> operator/(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs)
152 {
153 	return RValue<SIMD::Int>(Nucleus::createSDiv(lhs.value(), rhs.value()));
154 }
155 
operator %(RValue<SIMD::Int> lhs,RValue<SIMD::Int> rhs)156 RValue<SIMD::Int> operator%(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs)
157 {
158 	return RValue<SIMD::Int>(Nucleus::createSRem(lhs.value(), rhs.value()));
159 }
160 
operator &(RValue<SIMD::Int> lhs,RValue<SIMD::Int> rhs)161 RValue<SIMD::Int> operator&(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs)
162 {
163 	return RValue<SIMD::Int>(Nucleus::createAnd(lhs.value(), rhs.value()));
164 }
165 
operator |(RValue<SIMD::Int> lhs,RValue<SIMD::Int> rhs)166 RValue<SIMD::Int> operator|(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs)
167 {
168 	return RValue<SIMD::Int>(Nucleus::createOr(lhs.value(), rhs.value()));
169 }
170 
operator ^(RValue<SIMD::Int> lhs,RValue<SIMD::Int> rhs)171 RValue<SIMD::Int> operator^(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs)
172 {
173 	return RValue<SIMD::Int>(Nucleus::createXor(lhs.value(), rhs.value()));
174 }
175 
operator <<(RValue<SIMD::Int> lhs,RValue<SIMD::Int> rhs)176 RValue<SIMD::Int> operator<<(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs)
177 {
178 	return RValue<SIMD::Int>(Nucleus::createShl(lhs.value(), rhs.value()));
179 }
180 
operator >>(RValue<SIMD::Int> lhs,RValue<SIMD::Int> rhs)181 RValue<SIMD::Int> operator>>(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs)
182 {
183 	return RValue<SIMD::Int>(Nucleus::createAShr(lhs.value(), rhs.value()));
184 }
185 
operator +=(SIMD::Int & lhs,RValue<SIMD::Int> rhs)186 RValue<SIMD::Int> operator+=(SIMD::Int &lhs, RValue<SIMD::Int> rhs)
187 {
188 	return lhs = lhs + rhs;
189 }
190 
operator -=(SIMD::Int & lhs,RValue<SIMD::Int> rhs)191 RValue<SIMD::Int> operator-=(SIMD::Int &lhs, RValue<SIMD::Int> rhs)
192 {
193 	return lhs = lhs - rhs;
194 }
195 
operator *=(SIMD::Int & lhs,RValue<SIMD::Int> rhs)196 RValue<SIMD::Int> operator*=(SIMD::Int &lhs, RValue<SIMD::Int> rhs)
197 {
198 	return lhs = lhs * rhs;
199 }
200 
201 //	RValue<SIMD::Int> operator/=(SIMD::Int &lhs, RValue<SIMD::Int> rhs)
202 //	{
203 //		return lhs = lhs / rhs;
204 //	}
205 
206 //	RValue<SIMD::Int> operator%=(SIMD::Int &lhs, RValue<SIMD::Int> rhs)
207 //	{
208 //		return lhs = lhs % rhs;
209 //	}
210 
operator &=(SIMD::Int & lhs,RValue<SIMD::Int> rhs)211 RValue<SIMD::Int> operator&=(SIMD::Int &lhs, RValue<SIMD::Int> rhs)
212 {
213 	return lhs = lhs & rhs;
214 }
215 
operator |=(SIMD::Int & lhs,RValue<SIMD::Int> rhs)216 RValue<SIMD::Int> operator|=(SIMD::Int &lhs, RValue<SIMD::Int> rhs)
217 {
218 	return lhs = lhs | rhs;
219 }
220 
operator ^=(SIMD::Int & lhs,RValue<SIMD::Int> rhs)221 RValue<SIMD::Int> operator^=(SIMD::Int &lhs, RValue<SIMD::Int> rhs)
222 {
223 	return lhs = lhs ^ rhs;
224 }
225 
operator <<=(SIMD::Int & lhs,unsigned char rhs)226 RValue<SIMD::Int> operator<<=(SIMD::Int &lhs, unsigned char rhs)
227 {
228 	return lhs = lhs << rhs;
229 }
230 
operator >>=(SIMD::Int & lhs,unsigned char rhs)231 RValue<SIMD::Int> operator>>=(SIMD::Int &lhs, unsigned char rhs)
232 {
233 	return lhs = lhs >> rhs;
234 }
235 
operator +(RValue<SIMD::Int> val)236 RValue<SIMD::Int> operator+(RValue<SIMD::Int> val)
237 {
238 	return val;
239 }
240 
operator -(RValue<SIMD::Int> val)241 RValue<SIMD::Int> operator-(RValue<SIMD::Int> val)
242 {
243 	return RValue<SIMD::Int>(Nucleus::createNeg(val.value()));
244 }
245 
operator ~(RValue<SIMD::Int> val)246 RValue<SIMD::Int> operator~(RValue<SIMD::Int> val)
247 {
248 	return RValue<SIMD::Int>(Nucleus::createNot(val.value()));
249 }
250 
Extract(RValue<SIMD::Int> x,int i)251 RValue<scalar::Int> Extract(RValue<SIMD::Int> x, int i)
252 {
253 	return RValue<scalar::Int>(Nucleus::createExtractElement(x.value(), scalar::Int::type(), i));
254 }
255 
Insert(RValue<SIMD::Int> x,RValue<scalar::Int> element,int i)256 RValue<SIMD::Int> Insert(RValue<SIMD::Int> x, RValue<scalar::Int> element, int i)
257 {
258 	return RValue<SIMD::Int>(Nucleus::createInsertElement(x.value(), element.value(), i));
259 }
260 
UInt()261 SIMD::UInt::UInt()
262     : XYZW(this)
263 {
264 }
265 
UInt(int broadcast)266 SIMD::UInt::UInt(int broadcast)
267     : XYZW(this)
268 {
269 	std::vector<int64_t> constantVector = { broadcast };
270 	storeValue(Nucleus::createConstantVector(constantVector, type()));
271 }
272 
UInt(int x,int y,int z,int w)273 SIMD::UInt::UInt(int x, int y, int z, int w)
274     : XYZW(this)
275 {
276 	std::vector<int64_t> constantVector = { x, y, z, w };
277 	storeValue(Nucleus::createConstantVector(constantVector, type()));
278 }
279 
UInt(std::vector<int> v)280 SIMD::UInt::UInt(std::vector<int> v)
281     : XYZW(this)
282 {
283 	std::vector<int64_t> constantVector;
284 	for(int i : v) { constantVector.push_back(i); }
285 	storeValue(Nucleus::createConstantVector(constantVector, type()));
286 }
287 
UInt(std::function<int (int)> LaneValueProducer)288 SIMD::UInt::UInt(std::function<int(int)> LaneValueProducer)
289     : XYZW(this)
290 {
291 	std::vector<int64_t> constantVector;
292 	for(int i = 0; i < SIMD::Width; i++) { constantVector.push_back(LaneValueProducer(i)); }
293 	storeValue(Nucleus::createConstantVector(constantVector, type()));
294 }
295 
UInt(RValue<SIMD::UInt> rhs)296 SIMD::UInt::UInt(RValue<SIMD::UInt> rhs)
297     : XYZW(this)
298 {
299 	store(rhs);
300 }
301 
UInt(const SIMD::UInt & rhs)302 SIMD::UInt::UInt(const SIMD::UInt &rhs)
303     : XYZW(this)
304 {
305 	store(rhs.load());
306 }
307 
UInt(const Reference<SIMD::UInt> & rhs)308 SIMD::UInt::UInt(const Reference<SIMD::UInt> &rhs)
309     : XYZW(this)
310 {
311 	store(rhs.load());
312 }
313 
UInt(RValue<SIMD::Int> rhs)314 SIMD::UInt::UInt(RValue<SIMD::Int> rhs)
315     : XYZW(this)
316 {
317 	storeValue(rhs.value());
318 }
319 
UInt(const SIMD::Int & rhs)320 SIMD::UInt::UInt(const SIMD::Int &rhs)
321     : XYZW(this)
322 {
323 	storeValue(rhs.loadValue());
324 }
325 
UInt(const Reference<SIMD::Int> & rhs)326 SIMD::UInt::UInt(const Reference<SIMD::Int> &rhs)
327     : XYZW(this)
328 {
329 	storeValue(rhs.loadValue());
330 }
331 
UInt(const scalar::UInt & rhs)332 SIMD::UInt::UInt(const scalar::UInt &rhs)
333     : XYZW(this)
334 {
335 	*this = RValue<scalar::UInt>(rhs.loadValue());
336 }
337 
UInt(const Reference<scalar::UInt> & rhs)338 SIMD::UInt::UInt(const Reference<scalar::UInt> &rhs)
339     : XYZW(this)
340 {
341 	*this = RValue<scalar::UInt>(rhs.loadValue());
342 }
343 
operator =(RValue<SIMD::UInt> rhs)344 RValue<SIMD::UInt> SIMD::UInt::operator=(RValue<SIMD::UInt> rhs)
345 {
346 	return store(rhs);
347 }
348 
operator =(const SIMD::UInt & rhs)349 RValue<SIMD::UInt> SIMD::UInt::operator=(const SIMD::UInt &rhs)
350 {
351 	return store(rhs.load());
352 }
353 
operator =(const Reference<SIMD::UInt> & rhs)354 RValue<SIMD::UInt> SIMD::UInt::operator=(const Reference<SIMD::UInt> &rhs)
355 {
356 	return store(rhs.load());
357 }
358 
operator +(RValue<SIMD::UInt> lhs,RValue<SIMD::UInt> rhs)359 RValue<SIMD::UInt> operator+(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs)
360 {
361 	return RValue<SIMD::UInt>(Nucleus::createAdd(lhs.value(), rhs.value()));
362 }
363 
operator -(RValue<SIMD::UInt> lhs,RValue<SIMD::UInt> rhs)364 RValue<SIMD::UInt> operator-(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs)
365 {
366 	return RValue<SIMD::UInt>(Nucleus::createSub(lhs.value(), rhs.value()));
367 }
368 
operator *(RValue<SIMD::UInt> lhs,RValue<SIMD::UInt> rhs)369 RValue<SIMD::UInt> operator*(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs)
370 {
371 	return RValue<SIMD::UInt>(Nucleus::createMul(lhs.value(), rhs.value()));
372 }
373 
operator /(RValue<SIMD::UInt> lhs,RValue<SIMD::UInt> rhs)374 RValue<SIMD::UInt> operator/(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs)
375 {
376 	return RValue<SIMD::UInt>(Nucleus::createUDiv(lhs.value(), rhs.value()));
377 }
378 
operator %(RValue<SIMD::UInt> lhs,RValue<SIMD::UInt> rhs)379 RValue<SIMD::UInt> operator%(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs)
380 {
381 	return RValue<SIMD::UInt>(Nucleus::createURem(lhs.value(), rhs.value()));
382 }
383 
operator &(RValue<SIMD::UInt> lhs,RValue<SIMD::UInt> rhs)384 RValue<SIMD::UInt> operator&(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs)
385 {
386 	return RValue<SIMD::UInt>(Nucleus::createAnd(lhs.value(), rhs.value()));
387 }
388 
operator |(RValue<SIMD::UInt> lhs,RValue<SIMD::UInt> rhs)389 RValue<SIMD::UInt> operator|(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs)
390 {
391 	return RValue<SIMD::UInt>(Nucleus::createOr(lhs.value(), rhs.value()));
392 }
393 
operator ^(RValue<SIMD::UInt> lhs,RValue<SIMD::UInt> rhs)394 RValue<SIMD::UInt> operator^(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs)
395 {
396 	return RValue<SIMD::UInt>(Nucleus::createXor(lhs.value(), rhs.value()));
397 }
398 
operator <<(RValue<SIMD::UInt> lhs,RValue<SIMD::UInt> rhs)399 RValue<SIMD::UInt> operator<<(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs)
400 {
401 	return RValue<SIMD::UInt>(Nucleus::createShl(lhs.value(), rhs.value()));
402 }
403 
operator >>(RValue<SIMD::UInt> lhs,RValue<SIMD::UInt> rhs)404 RValue<SIMD::UInt> operator>>(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs)
405 {
406 	return RValue<SIMD::UInt>(Nucleus::createLShr(lhs.value(), rhs.value()));
407 }
408 
operator +=(SIMD::UInt & lhs,RValue<SIMD::UInt> rhs)409 RValue<SIMD::UInt> operator+=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs)
410 {
411 	return lhs = lhs + rhs;
412 }
413 
operator -=(SIMD::UInt & lhs,RValue<SIMD::UInt> rhs)414 RValue<SIMD::UInt> operator-=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs)
415 {
416 	return lhs = lhs - rhs;
417 }
418 
operator *=(SIMD::UInt & lhs,RValue<SIMD::UInt> rhs)419 RValue<SIMD::UInt> operator*=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs)
420 {
421 	return lhs = lhs * rhs;
422 }
423 
424 //	RValue<SIMD::UInt> operator/=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs)
425 //	{
426 //		return lhs = lhs / rhs;
427 //	}
428 
429 //	RValue<SIMD::UInt> operator%=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs)
430 //	{
431 //		return lhs = lhs % rhs;
432 //	}
433 
operator &=(SIMD::UInt & lhs,RValue<SIMD::UInt> rhs)434 RValue<SIMD::UInt> operator&=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs)
435 {
436 	return lhs = lhs & rhs;
437 }
438 
operator |=(SIMD::UInt & lhs,RValue<SIMD::UInt> rhs)439 RValue<SIMD::UInt> operator|=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs)
440 {
441 	return lhs = lhs | rhs;
442 }
443 
operator ^=(SIMD::UInt & lhs,RValue<SIMD::UInt> rhs)444 RValue<SIMD::UInt> operator^=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs)
445 {
446 	return lhs = lhs ^ rhs;
447 }
448 
operator <<=(SIMD::UInt & lhs,unsigned char rhs)449 RValue<SIMD::UInt> operator<<=(SIMD::UInt &lhs, unsigned char rhs)
450 {
451 	return lhs = lhs << rhs;
452 }
453 
operator >>=(SIMD::UInt & lhs,unsigned char rhs)454 RValue<SIMD::UInt> operator>>=(SIMD::UInt &lhs, unsigned char rhs)
455 {
456 	return lhs = lhs >> rhs;
457 }
458 
operator +(RValue<SIMD::UInt> val)459 RValue<SIMD::UInt> operator+(RValue<SIMD::UInt> val)
460 {
461 	return val;
462 }
463 
operator -(RValue<SIMD::UInt> val)464 RValue<SIMD::UInt> operator-(RValue<SIMD::UInt> val)
465 {
466 	return RValue<SIMD::UInt>(Nucleus::createNeg(val.value()));
467 }
468 
operator ~(RValue<SIMD::UInt> val)469 RValue<SIMD::UInt> operator~(RValue<SIMD::UInt> val)
470 {
471 	return RValue<SIMD::UInt>(Nucleus::createNot(val.value()));
472 }
473 
Extract(RValue<SIMD::UInt> x,int i)474 RValue<scalar::UInt> Extract(RValue<SIMD::UInt> x, int i)
475 {
476 	return RValue<scalar::UInt>(Nucleus::createExtractElement(x.value(), scalar::Int::type(), i));
477 }
478 
Insert(RValue<SIMD::UInt> x,RValue<scalar::UInt> element,int i)479 RValue<SIMD::UInt> Insert(RValue<SIMD::UInt> x, RValue<scalar::UInt> element, int i)
480 {
481 	return RValue<SIMD::UInt>(Nucleus::createInsertElement(x.value(), element.value(), i));
482 }
483 
Float(RValue<SIMD::Int> cast)484 SIMD::Float::Float(RValue<SIMD::Int> cast)
485     : XYZW(this)
486 {
487 	Value *xyzw = Nucleus::createSIToFP(cast.value(), SIMD::Float::type());
488 
489 	storeValue(xyzw);
490 }
491 
Float(RValue<SIMD::UInt> cast)492 SIMD::Float::Float(RValue<SIMD::UInt> cast)
493     : XYZW(this)
494 {
495 	RValue<SIMD::Float> result = SIMD::Float(SIMD::Int(cast & SIMD::UInt(0x7FFFFFFF))) +
496 	                             As<SIMD::Float>((As<SIMD::Int>(cast) >> 31) & As<SIMD::Int>(SIMD::Float(0x80000000u)));
497 
498 	storeValue(result.value());
499 }
500 
Float()501 SIMD::Float::Float()
502     : XYZW(this)
503 {
504 }
505 
Float(float broadcast)506 SIMD::Float::Float(float broadcast)
507     : XYZW(this)
508 {
509 	// See rr::Float(float) constructor for the rationale behind this assert.
510 	ASSERT(std::isfinite(broadcast));
511 
512 	std::vector<double> constantVector = { broadcast };
513 	storeValue(Nucleus::createConstantVector(constantVector, type()));
514 }
515 
Float(float x,float y,float z,float w)516 SIMD::Float::Float(float x, float y, float z, float w)
517     : XYZW(this)
518 {
519 	std::vector<double> constantVector = { x, y, z, w };
520 	storeValue(Nucleus::createConstantVector(constantVector, type()));
521 }
522 
Float(std::vector<float> v)523 SIMD::Float::Float(std::vector<float> v)
524     : XYZW(this)
525 {
526 	std::vector<double> constantVector;
527 	for(int f : v) { constantVector.push_back(f); }
528 	storeValue(Nucleus::createConstantVector(constantVector, type()));
529 }
530 
Float(std::function<float (int)> LaneValueProducer)531 SIMD::Float::Float(std::function<float(int)> LaneValueProducer)
532     : XYZW(this)
533 {
534 	std::vector<double> constantVector;
535 	for(int i = 0; i < SIMD::Width; i++) { constantVector.push_back(LaneValueProducer(i)); }
536 	storeValue(Nucleus::createConstantVector(constantVector, type()));
537 }
538 
infinity()539 SIMD::Float SIMD::Float::infinity()
540 {
541 	SIMD::Float result;
542 
543 	constexpr double inf = std::numeric_limits<double>::infinity();
544 	std::vector<double> constantVector = { inf };
545 	result.storeValue(Nucleus::createConstantVector(constantVector, type()));
546 
547 	return result;
548 }
549 
Float(RValue<SIMD::Float> rhs)550 SIMD::Float::Float(RValue<SIMD::Float> rhs)
551     : XYZW(this)
552 {
553 	store(rhs);
554 }
555 
Float(const SIMD::Float & rhs)556 SIMD::Float::Float(const SIMD::Float &rhs)
557     : XYZW(this)
558 {
559 	store(rhs.load());
560 }
561 
Float(const Reference<SIMD::Float> & rhs)562 SIMD::Float::Float(const Reference<SIMD::Float> &rhs)
563     : XYZW(this)
564 {
565 	store(rhs.load());
566 }
567 
Float(const scalar::Float & rhs)568 SIMD::Float::Float(const scalar::Float &rhs)
569     : XYZW(this)
570 {
571 	*this = RValue<scalar::Float>(rhs.loadValue());
572 }
573 
Float(const Reference<scalar::Float> & rhs)574 SIMD::Float::Float(const Reference<scalar::Float> &rhs)
575     : XYZW(this)
576 {
577 	*this = RValue<scalar::Float>(rhs.loadValue());
578 }
579 
Float(RValue<packed::Float4> rhs)580 SIMD::Float::Float(RValue<packed::Float4> rhs)
581     : XYZW(this)
582 {
583 	ASSERT(SIMD::Width == 4);
584 	*this = Insert128(*this, rhs, 0);
585 }
586 
operator =(RValue<packed::Float4> rhs)587 RValue<SIMD::Float> SIMD::Float::operator=(RValue<packed::Float4> rhs)
588 {
589 	return *this = SIMD::Float(rhs);
590 }
591 
operator =(float x)592 RValue<SIMD::Float> SIMD::Float::operator=(float x)
593 {
594 	return *this = SIMD::Float(x);
595 }
596 
operator =(RValue<SIMD::Float> rhs)597 RValue<SIMD::Float> SIMD::Float::operator=(RValue<SIMD::Float> rhs)
598 {
599 	return store(rhs);
600 }
601 
operator =(const SIMD::Float & rhs)602 RValue<SIMD::Float> SIMD::Float::operator=(const SIMD::Float &rhs)
603 {
604 	return store(rhs.load());
605 }
606 
operator =(const Reference<SIMD::Float> & rhs)607 RValue<SIMD::Float> SIMD::Float::operator=(const Reference<SIMD::Float> &rhs)
608 {
609 	return store(rhs.load());
610 }
611 
operator =(RValue<scalar::Float> rhs)612 RValue<SIMD::Float> SIMD::Float::operator=(RValue<scalar::Float> rhs)
613 {
614 	return *this = SIMD::Float(rhs);
615 }
616 
operator =(const scalar::Float & rhs)617 RValue<SIMD::Float> SIMD::Float::operator=(const scalar::Float &rhs)
618 {
619 	return *this = SIMD::Float(rhs);
620 }
621 
operator =(const Reference<scalar::Float> & rhs)622 RValue<SIMD::Float> SIMD::Float::operator=(const Reference<scalar::Float> &rhs)
623 {
624 	return *this = SIMD::Float(rhs);
625 }
626 
operator +(RValue<SIMD::Float> lhs,RValue<SIMD::Float> rhs)627 RValue<SIMD::Float> operator+(RValue<SIMD::Float> lhs, RValue<SIMD::Float> rhs)
628 {
629 	return RValue<SIMD::Float>(Nucleus::createFAdd(lhs.value(), rhs.value()));
630 }
631 
operator -(RValue<SIMD::Float> lhs,RValue<SIMD::Float> rhs)632 RValue<SIMD::Float> operator-(RValue<SIMD::Float> lhs, RValue<SIMD::Float> rhs)
633 {
634 	return RValue<SIMD::Float>(Nucleus::createFSub(lhs.value(), rhs.value()));
635 }
636 
operator *(RValue<SIMD::Float> lhs,RValue<SIMD::Float> rhs)637 RValue<SIMD::Float> operator*(RValue<SIMD::Float> lhs, RValue<SIMD::Float> rhs)
638 {
639 	return RValue<SIMD::Float>(Nucleus::createFMul(lhs.value(), rhs.value()));
640 }
641 
operator /(RValue<SIMD::Float> lhs,RValue<SIMD::Float> rhs)642 RValue<SIMD::Float> operator/(RValue<SIMD::Float> lhs, RValue<SIMD::Float> rhs)
643 {
644 	return RValue<SIMD::Float>(Nucleus::createFDiv(lhs.value(), rhs.value()));
645 }
646 
operator +=(SIMD::Float & lhs,RValue<SIMD::Float> rhs)647 RValue<SIMD::Float> operator+=(SIMD::Float &lhs, RValue<SIMD::Float> rhs)
648 {
649 	return lhs = lhs + rhs;
650 }
651 
operator -=(SIMD::Float & lhs,RValue<SIMD::Float> rhs)652 RValue<SIMD::Float> operator-=(SIMD::Float &lhs, RValue<SIMD::Float> rhs)
653 {
654 	return lhs = lhs - rhs;
655 }
656 
operator *=(SIMD::Float & lhs,RValue<SIMD::Float> rhs)657 RValue<SIMD::Float> operator*=(SIMD::Float &lhs, RValue<SIMD::Float> rhs)
658 {
659 	return lhs = lhs * rhs;
660 }
661 
operator /=(SIMD::Float & lhs,RValue<SIMD::Float> rhs)662 RValue<SIMD::Float> operator/=(SIMD::Float &lhs, RValue<SIMD::Float> rhs)
663 {
664 	return lhs = lhs / rhs;
665 }
666 
operator %=(SIMD::Float & lhs,RValue<SIMD::Float> rhs)667 RValue<SIMD::Float> operator%=(SIMD::Float &lhs, RValue<SIMD::Float> rhs)
668 {
669 	return lhs = lhs % rhs;
670 }
671 
operator +(RValue<SIMD::Float> val)672 RValue<SIMD::Float> operator+(RValue<SIMD::Float> val)
673 {
674 	return val;
675 }
676 
operator -(RValue<SIMD::Float> val)677 RValue<SIMD::Float> operator-(RValue<SIMD::Float> val)
678 {
679 	return RValue<SIMD::Float>(Nucleus::createFNeg(val.value()));
680 }
681 
Rcp(RValue<SIMD::Float> x,bool relaxedPrecision,bool exactAtPow2)682 RValue<SIMD::Float> Rcp(RValue<SIMD::Float> x, bool relaxedPrecision, bool exactAtPow2)
683 {
684 	ASSERT(SIMD::Width == 4);
685 	return SIMD::Float(Rcp(Extract128(x, 0), relaxedPrecision, exactAtPow2));
686 }
687 
RcpSqrt(RValue<SIMD::Float> x,bool relaxedPrecision)688 RValue<SIMD::Float> RcpSqrt(RValue<SIMD::Float> x, bool relaxedPrecision)
689 {
690 	ASSERT(SIMD::Width == 4);
691 	return SIMD::Float(RcpSqrt(Extract128(x, 0), relaxedPrecision));
692 }
693 
Insert(RValue<SIMD::Float> x,RValue<scalar::Float> element,int i)694 RValue<SIMD::Float> Insert(RValue<SIMD::Float> x, RValue<scalar::Float> element, int i)
695 {
696 	return RValue<SIMD::Float>(Nucleus::createInsertElement(x.value(), element.value(), i));
697 }
698 
Extract(RValue<SIMD::Float> x,int i)699 RValue<scalar::Float> Extract(RValue<SIMD::Float> x, int i)
700 {
701 	return RValue<scalar::Float>(Nucleus::createExtractElement(x.value(), scalar::Float::type(), i));
702 }
703 
IsInf(RValue<SIMD::Float> x)704 RValue<SIMD::Int> IsInf(RValue<SIMD::Float> x)
705 {
706 	return CmpEQ(As<SIMD::Int>(x) & SIMD::Int(0x7FFFFFFF), SIMD::Int(0x7F800000));
707 }
708 
IsNan(RValue<SIMD::Float> x)709 RValue<SIMD::Int> IsNan(RValue<SIMD::Float> x)
710 {
711 	return ~CmpEQ(x, x);
712 }
713 
Sin(RValue<SIMD::Float> x)714 RValue<SIMD::Float> Sin(RValue<SIMD::Float> x)
715 {
716 	return ScalarizeCall(sinf, x);
717 }
718 
Cos(RValue<SIMD::Float> x)719 RValue<SIMD::Float> Cos(RValue<SIMD::Float> x)
720 {
721 	return ScalarizeCall(cosf, x);
722 }
723 
Tan(RValue<SIMD::Float> x)724 RValue<SIMD::Float> Tan(RValue<SIMD::Float> x)
725 {
726 	return ScalarizeCall(tanf, x);
727 }
728 
Asin(RValue<SIMD::Float> x)729 RValue<SIMD::Float> Asin(RValue<SIMD::Float> x)
730 {
731 	return ScalarizeCall(asinf, x);
732 }
733 
Acos(RValue<SIMD::Float> x)734 RValue<SIMD::Float> Acos(RValue<SIMD::Float> x)
735 {
736 	return ScalarizeCall(acosf, x);
737 }
738 
Atan(RValue<SIMD::Float> x)739 RValue<SIMD::Float> Atan(RValue<SIMD::Float> x)
740 {
741 	return ScalarizeCall(atanf, x);
742 }
743 
Sinh(RValue<SIMD::Float> x)744 RValue<SIMD::Float> Sinh(RValue<SIMD::Float> x)
745 {
746 	return ScalarizeCall(sinhf, x);
747 }
748 
Cosh(RValue<SIMD::Float> x)749 RValue<SIMD::Float> Cosh(RValue<SIMD::Float> x)
750 {
751 	return ScalarizeCall(coshf, x);
752 }
753 
Tanh(RValue<SIMD::Float> x)754 RValue<SIMD::Float> Tanh(RValue<SIMD::Float> x)
755 {
756 	return ScalarizeCall(tanhf, x);
757 }
758 
Asinh(RValue<SIMD::Float> x)759 RValue<SIMD::Float> Asinh(RValue<SIMD::Float> x)
760 {
761 	return ScalarizeCall(asinhf, x);
762 }
763 
Acosh(RValue<SIMD::Float> x)764 RValue<SIMD::Float> Acosh(RValue<SIMD::Float> x)
765 {
766 	return ScalarizeCall(acoshf, x);
767 }
768 
Atanh(RValue<SIMD::Float> x)769 RValue<SIMD::Float> Atanh(RValue<SIMD::Float> x)
770 {
771 	return ScalarizeCall(atanhf, x);
772 }
773 
Atan2(RValue<SIMD::Float> x,RValue<SIMD::Float> y)774 RValue<SIMD::Float> Atan2(RValue<SIMD::Float> x, RValue<SIMD::Float> y)
775 {
776 	return ScalarizeCall(atan2f, x, y);
777 }
778 
Pow(RValue<SIMD::Float> x,RValue<SIMD::Float> y)779 RValue<SIMD::Float> Pow(RValue<SIMD::Float> x, RValue<SIMD::Float> y)
780 {
781 	return ScalarizeCall(powf, x, y);
782 }
783 
Exp(RValue<SIMD::Float> x)784 RValue<SIMD::Float> Exp(RValue<SIMD::Float> x)
785 {
786 	return ScalarizeCall(expf, x);
787 }
788 
Log(RValue<SIMD::Float> x)789 RValue<SIMD::Float> Log(RValue<SIMD::Float> x)
790 {
791 	return ScalarizeCall(logf, x);
792 }
793 
Exp2(RValue<SIMD::Float> x)794 RValue<SIMD::Float> Exp2(RValue<SIMD::Float> x)
795 {
796 	return ScalarizeCall(exp2f, x);
797 }
798 
Log2(RValue<SIMD::Float> x)799 RValue<SIMD::Float> Log2(RValue<SIMD::Float> x)
800 {
801 	return ScalarizeCall(log2f, x);
802 }
803 
SignMask(RValue<SIMD::Int> x)804 RValue<Int> SignMask(RValue<SIMD::Int> x)
805 {
806 	ASSERT(SIMD::Width == 4);
807 	return SignMask(Extract128(x, 0));
808 }
809 
Ctlz(RValue<SIMD::UInt> x,bool isZeroUndef)810 RValue<SIMD::UInt> Ctlz(RValue<SIMD::UInt> x, bool isZeroUndef)
811 {
812 	ASSERT(SIMD::Width == 4);
813 	SIMD::UInt result;
814 	return Insert128(result, Ctlz(Extract128(x, 0), isZeroUndef), 0);
815 }
816 
Cttz(RValue<SIMD::UInt> x,bool isZeroUndef)817 RValue<SIMD::UInt> Cttz(RValue<SIMD::UInt> x, bool isZeroUndef)
818 {
819 	ASSERT(SIMD::Width == 4);
820 	SIMD::UInt result;
821 	return Insert128(result, Cttz(Extract128(x, 0), isZeroUndef), 0);
822 }
823 
MulHigh(RValue<SIMD::Int> x,RValue<SIMD::Int> y)824 RValue<SIMD::Int> MulHigh(RValue<SIMD::Int> x, RValue<SIMD::Int> y)
825 {
826 	ASSERT(SIMD::Width == 4);
827 	SIMD::Int result;
828 	return Insert128(result, MulHigh(Extract128(x, 0), Extract128(y, 0)), 0);
829 }
830 
MulHigh(RValue<SIMD::UInt> x,RValue<SIMD::UInt> y)831 RValue<SIMD::UInt> MulHigh(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y)
832 {
833 	ASSERT(SIMD::Width == 4);
834 	SIMD::UInt result;
835 	return Insert128(result, MulHigh(Extract128(x, 0), Extract128(y, 0)), 0);
836 }
837 
AnyTrue(const RValue<SIMD::Int> & bools)838 RValue<Bool> AnyTrue(const RValue<SIMD::Int> &bools)
839 {
840 	ASSERT(SIMD::Width == 4);
841 	return AnyTrue(Extract128(bools, 0));
842 }
843 
AnyFalse(const RValue<SIMD::Int> & bools)844 RValue<Bool> AnyFalse(const RValue<SIMD::Int> &bools)
845 {
846 	ASSERT(SIMD::Width == 4);
847 	return AnyFalse(Extract128(bools, 0));
848 }
849 
Divergent(const RValue<SIMD::Int> & ints)850 RValue<Bool> Divergent(const RValue<SIMD::Int> &ints)
851 {
852 	ASSERT(SIMD::Width == 4);
853 	return Divergent(Extract128(ints, 0));
854 }
855 
Swizzle(RValue<SIMD::Int> x,uint16_t select)856 RValue<SIMD::Int> Swizzle(RValue<SIMD::Int> x, uint16_t select)
857 {
858 	ASSERT(SIMD::Width == 4);
859 	SIMD::Int result;
860 	return Insert128(result, Swizzle(Extract128(x, 0), select), 0);
861 }
862 
Swizzle(RValue<SIMD::UInt> x,uint16_t select)863 RValue<SIMD::UInt> Swizzle(RValue<SIMD::UInt> x, uint16_t select)
864 {
865 	ASSERT(SIMD::Width == 4);
866 	SIMD::UInt result;
867 	return Insert128(result, Swizzle(Extract128(x, 0), select), 0);
868 }
869 
Swizzle(RValue<SIMD::Float> x,uint16_t select)870 RValue<SIMD::Float> Swizzle(RValue<SIMD::Float> x, uint16_t select)
871 {
872 	ASSERT(SIMD::Width == 4);
873 	SIMD::Float result;
874 	return Insert128(result, Swizzle(Extract128(x, 0), select), 0);
875 }
876 
Shuffle(RValue<SIMD::Int> x,RValue<SIMD::Int> y,uint16_t select)877 RValue<SIMD::Int> Shuffle(RValue<SIMD::Int> x, RValue<SIMD::Int> y, uint16_t select)
878 {
879 	ASSERT(SIMD::Width == 4);
880 	SIMD::Int result;
881 	return Insert128(result, Shuffle(Extract128(x, 0), Extract128(y, 0), select), 0);
882 }
883 
Shuffle(RValue<SIMD::UInt> x,RValue<SIMD::UInt> y,uint16_t select)884 RValue<SIMD::UInt> Shuffle(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y, uint16_t select)
885 {
886 	ASSERT(SIMD::Width == 4);
887 	SIMD::UInt result;
888 	return Insert128(result, Shuffle(Extract128(x, 0), Extract128(y, 0), select), 0);
889 }
890 
Shuffle(RValue<SIMD::Float> x,RValue<SIMD::Float> y,uint16_t select)891 RValue<SIMD::Float> Shuffle(RValue<SIMD::Float> x, RValue<SIMD::Float> y, uint16_t select)
892 {
893 	ASSERT(SIMD::Width == 4);
894 	SIMD::Float result;
895 	return Insert128(result, Shuffle(Extract128(x, 0), Extract128(y, 0), select), 0);
896 }
897 
Pointer(scalar::Pointer<Byte> base,rr::Int limit)898 SIMD::Pointer::Pointer(scalar::Pointer<Byte> base, rr::Int limit)
899     : base(base)
900     , dynamicLimit(limit)
901     , staticLimit(0)
902     , dynamicOffsets(0)
903     , staticOffsets(SIMD::Width)
904     , hasDynamicLimit(true)
905     , hasDynamicOffsets(false)
906     , isBasePlusOffset(true)
907 {}
908 
Pointer(scalar::Pointer<Byte> base,unsigned int limit)909 SIMD::Pointer::Pointer(scalar::Pointer<Byte> base, unsigned int limit)
910     : base(base)
911     , dynamicLimit(0)
912     , staticLimit(limit)
913     , dynamicOffsets(0)
914     , staticOffsets(SIMD::Width)
915     , hasDynamicLimit(false)
916     , hasDynamicOffsets(false)
917     , isBasePlusOffset(true)
918 {}
919 
Pointer(scalar::Pointer<Byte> base,rr::Int limit,SIMD::Int offset)920 SIMD::Pointer::Pointer(scalar::Pointer<Byte> base, rr::Int limit, SIMD::Int offset)
921     : base(base)
922     , dynamicLimit(limit)
923     , staticLimit(0)
924     , dynamicOffsets(offset)
925     , staticOffsets(SIMD::Width)
926     , hasDynamicLimit(true)
927     , hasDynamicOffsets(true)
928     , isBasePlusOffset(true)
929 {}
930 
Pointer(scalar::Pointer<Byte> base,unsigned int limit,SIMD::Int offset)931 SIMD::Pointer::Pointer(scalar::Pointer<Byte> base, unsigned int limit, SIMD::Int offset)
932     : base(base)
933     , dynamicLimit(0)
934     , staticLimit(limit)
935     , dynamicOffsets(offset)
936     , staticOffsets(SIMD::Width)
937     , hasDynamicLimit(false)
938     , hasDynamicOffsets(true)
939     , isBasePlusOffset(true)
940 {}
941 
Pointer(std::vector<scalar::Pointer<Byte>> pointers)942 SIMD::Pointer::Pointer(std::vector<scalar::Pointer<Byte>> pointers)
943     : pointers(pointers)
944     , isBasePlusOffset(false)
945 {}
946 
Pointer(SIMD::UInt cast)947 SIMD::Pointer::Pointer(SIMD::UInt cast)
948     : pointers(SIMD::Width)
949     , isBasePlusOffset(false)
950 {
951 	assert(sizeof(void *) == 4);
952 	for(int i = 0; i < SIMD::Width; i++)
953 	{
954 		pointers[i] = As<rr::Pointer<Byte>>(Extract(cast, i));
955 	}
956 }
957 
Pointer(SIMD::UInt castLow,SIMD::UInt castHigh)958 SIMD::Pointer::Pointer(SIMD::UInt castLow, SIMD::UInt castHigh)
959     : pointers(SIMD::Width)
960     , isBasePlusOffset(false)
961 {
962 	assert(sizeof(void *) == 8);
963 	for(int i = 0; i < SIMD::Width; i++)
964 	{
965 		UInt2 address;
966 		address = Insert(address, Extract(castLow, i), 0);
967 		address = Insert(address, Extract(castHigh, i), 1);
968 		pointers[i] = As<rr::Pointer<Byte>>(address);
969 	}
970 }
971 
operator +=(SIMD::Int i)972 SIMD::Pointer &SIMD::Pointer::operator+=(SIMD::Int i)
973 {
974 	if(isBasePlusOffset)
975 	{
976 		dynamicOffsets += i;
977 		hasDynamicOffsets = true;
978 	}
979 	else
980 	{
981 		for(int el = 0; el < SIMD::Width; el++) { pointers[el] += Extract(i, el); }
982 	}
983 	return *this;
984 }
985 
operator +(SIMD::Int i)986 SIMD::Pointer SIMD::Pointer::operator+(SIMD::Int i)
987 {
988 	SIMD::Pointer p = *this;
989 	p += i;
990 	return p;
991 }
992 
operator +=(int i)993 SIMD::Pointer &SIMD::Pointer::operator+=(int i)
994 {
995 	if(isBasePlusOffset)
996 	{
997 		for(int el = 0; el < SIMD::Width; el++) { staticOffsets[el] += i; }
998 	}
999 	else
1000 	{
1001 		for(int el = 0; el < SIMD::Width; el++) { pointers[el] += i; }
1002 	}
1003 	return *this;
1004 }
1005 
operator +(int i)1006 SIMD::Pointer SIMD::Pointer::operator+(int i)
1007 {
1008 	SIMD::Pointer p = *this;
1009 	p += i;
1010 	return p;
1011 }
1012 
offsets() const1013 SIMD::Int SIMD::Pointer::offsets() const
1014 {
1015 	ASSERT_MSG(isBasePlusOffset, "No offsets for this type of pointer");
1016 	return dynamicOffsets + SIMD::Int(staticOffsets);
1017 }
1018 
isInBounds(unsigned int accessSize,OutOfBoundsBehavior robustness) const1019 SIMD::Int SIMD::Pointer::isInBounds(unsigned int accessSize, OutOfBoundsBehavior robustness) const
1020 {
1021 	ASSERT(accessSize > 0);
1022 
1023 	if(isStaticallyInBounds(accessSize, robustness))
1024 	{
1025 		return SIMD::Int(0xFFFFFFFF);
1026 	}
1027 
1028 	if(!hasDynamicOffsets && !hasDynamicLimit)
1029 	{
1030 		ASSERT(SIMD::Width == 4);
1031 		// Common fast paths.
1032 		return SIMD::Int(
1033 		    (staticOffsets[0] + accessSize - 1 < staticLimit) ? 0xFFFFFFFF : 0,
1034 		    (staticOffsets[1] + accessSize - 1 < staticLimit) ? 0xFFFFFFFF : 0,
1035 		    (staticOffsets[2] + accessSize - 1 < staticLimit) ? 0xFFFFFFFF : 0,
1036 		    (staticOffsets[3] + accessSize - 1 < staticLimit) ? 0xFFFFFFFF : 0);
1037 	}
1038 
1039 	return CmpGE(offsets(), 0) & CmpLT(offsets() + SIMD::Int(accessSize - 1), limit());
1040 }
1041 
isStaticallyInBounds(unsigned int accessSize,OutOfBoundsBehavior robustness) const1042 bool SIMD::Pointer::isStaticallyInBounds(unsigned int accessSize, OutOfBoundsBehavior robustness) const
1043 {
1044 	if(hasDynamicOffsets)
1045 	{
1046 		return false;
1047 	}
1048 
1049 	if(hasDynamicLimit)
1050 	{
1051 		if(hasStaticEqualOffsets() || hasStaticSequentialOffsets(accessSize))
1052 		{
1053 			switch(robustness)
1054 			{
1055 			case OutOfBoundsBehavior::UndefinedBehavior:
1056 				// With this robustness setting the application/compiler guarantees in-bounds accesses on active lanes,
1057 				// but since it can't know in advance which branches are taken this must be true even for inactives lanes.
1058 				return true;
1059 			case OutOfBoundsBehavior::Nullify:
1060 			case OutOfBoundsBehavior::RobustBufferAccess:
1061 			case OutOfBoundsBehavior::UndefinedValue:
1062 				return false;
1063 			}
1064 		}
1065 	}
1066 
1067 	for(int i = 0; i < SIMD::Width; i++)
1068 	{
1069 		if(staticOffsets[i] + accessSize - 1 >= staticLimit)
1070 		{
1071 			return false;
1072 		}
1073 	}
1074 
1075 	return true;
1076 }
1077 
limit() const1078 SIMD::Int SIMD::Pointer::limit() const
1079 {
1080 	return dynamicLimit + staticLimit;
1081 }
1082 
1083 // Returns true if all offsets are compile-time static and sequential
1084 // (N+0*step, N+1*step, N+2*step, N+3*step)
hasStaticSequentialOffsets(unsigned int step) const1085 bool SIMD::Pointer::hasStaticSequentialOffsets(unsigned int step) const
1086 {
1087 	ASSERT_MSG(isBasePlusOffset, "No offsets for this type of pointer");
1088 	if(hasDynamicOffsets)
1089 	{
1090 		return false;
1091 	}
1092 
1093 	for(int i = 1; i < SIMD::Width; i++)
1094 	{
1095 		if(staticOffsets[i - 1] + int32_t(step) != staticOffsets[i])
1096 		{
1097 			return false;
1098 		}
1099 	}
1100 
1101 	return true;
1102 }
1103 
1104 // Returns true if all offsets are compile-time static and equal
1105 // (N, N, N, N)
hasStaticEqualOffsets() const1106 bool SIMD::Pointer::hasStaticEqualOffsets() const
1107 {
1108 	ASSERT_MSG(isBasePlusOffset, "No offsets for this type of pointer");
1109 	if(hasDynamicOffsets)
1110 	{
1111 		return false;
1112 	}
1113 
1114 	for(int i = 1; i < SIMD::Width; i++)
1115 	{
1116 		if(staticOffsets[0] != staticOffsets[i])
1117 		{
1118 			return false;
1119 		}
1120 	}
1121 
1122 	return true;
1123 }
1124 
getUniformPointer() const1125 scalar::Pointer<Byte> SIMD::Pointer::getUniformPointer() const
1126 {
1127 #ifndef NDEBUG
1128 	if(isBasePlusOffset)
1129 	{
1130 		SIMD::Int uniform = offsets();
1131 		scalar::Int x = Extract(uniform, 0);
1132 
1133 		for(int i = 1; i < SIMD::Width; i++)
1134 		{
1135 			Assert(x == Extract(uniform, i));
1136 		}
1137 	}
1138 	else
1139 	{
1140 		for(int i = 1; i < SIMD::Width; i++)
1141 		{
1142 			Assert(pointers[0] == pointers[i]);
1143 		}
1144 	}
1145 #endif
1146 
1147 	return getPointerForLane(0);
1148 }
1149 
getPointerForLane(int lane) const1150 scalar::Pointer<Byte> SIMD::Pointer::getPointerForLane(int lane) const
1151 {
1152 	if(isBasePlusOffset)
1153 	{
1154 		return base + Extract(offsets(), lane);
1155 	}
1156 	else
1157 	{
1158 		return pointers[lane];
1159 	}
1160 }
1161 
castTo(SIMD::UInt & bits) const1162 void SIMD::Pointer::castTo(SIMD::UInt &bits) const
1163 {
1164 	assert(sizeof(void *) == 4);
1165 	for(int i = 0; i < SIMD::Width; i++)
1166 	{
1167 		bits = Insert(bits, As<scalar::UInt>(pointers[i]), i);
1168 	}
1169 }
1170 
castTo(SIMD::UInt & lowerBits,SIMD::UInt & upperBits) const1171 void SIMD::Pointer::castTo(SIMD::UInt &lowerBits, SIMD::UInt &upperBits) const
1172 {
1173 	assert(sizeof(void *) == 8);
1174 	for(int i = 0; i < SIMD::Width; i++)
1175 	{
1176 		UInt2 address = As<UInt2>(pointers[i]);
1177 		lowerBits = Insert(lowerBits, Extract(address, 0), i);
1178 		upperBits = Insert(upperBits, Extract(address, 1), i);
1179 	}
1180 }
1181 
IfThenElse(SIMD::Int condition,const SIMD::Pointer & lhs,const SIMD::Pointer & rhs)1182 SIMD::Pointer SIMD::Pointer::IfThenElse(SIMD::Int condition, const SIMD::Pointer &lhs, const SIMD::Pointer &rhs)
1183 {
1184 	std::vector<scalar::Pointer<Byte>> pointers(SIMD::Width);
1185 	for(int i = 0; i < SIMD::Width; i++)
1186 	{
1187 		If(Extract(condition, i) != 0)
1188 		{
1189 			pointers[i] = lhs.getPointerForLane(i);
1190 		}
1191 		Else
1192 		{
1193 			pointers[i] = rhs.getPointerForLane(i);
1194 		}
1195 	}
1196 
1197 	return { pointers };
1198 }
1199 
1200 #ifdef ENABLE_RR_PRINT
getPrintValues() const1201 std::vector<rr::Value *> SIMD::Pointer::getPrintValues() const
1202 {
1203 	if(isBasePlusOffset)
1204 	{
1205 		return PrintValue::vals(base, offsets());
1206 	}
1207 	else
1208 	{
1209 		std::vector<Value *> vals;
1210 		for(int i = 0; i < SIMD::Width; i++)
1211 		{
1212 			vals.push_back(RValue<scalar::Pointer<Byte>>(pointers[i]).value());
1213 		}
1214 		return vals;
1215 	}
1216 }
1217 #endif
1218 
1219 }  // namespace rr
1220