1 // Copyright 2022 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "SIMD.hpp"
16
17 #include "Assert.hpp"
18 #include "Debug.hpp"
19 #include "Print.hpp"
20
21 #include <cmath>
22
23 namespace rr {
24
Int()25 SIMD::Int::Int()
26 : XYZW(this)
27 {
28 }
29
Int(RValue<SIMD::Float> cast)30 SIMD::Int::Int(RValue<SIMD::Float> cast)
31 : XYZW(this)
32 {
33 Value *xyzw = Nucleus::createFPToSI(cast.value(), SIMD::Int::type());
34
35 storeValue(xyzw);
36 }
37
Int(int broadcast)38 SIMD::Int::Int(int broadcast)
39 : XYZW(this)
40 {
41 std::vector<int64_t> constantVector = { broadcast };
42 storeValue(Nucleus::createConstantVector(constantVector, type()));
43 }
44
Int(int x,int y,int z,int w)45 SIMD::Int::Int(int x, int y, int z, int w)
46 : XYZW(this)
47 {
48 std::vector<int64_t> constantVector = { x, y, z, w };
49 storeValue(Nucleus::createConstantVector(constantVector, type()));
50 }
51
Int(std::vector<int> v)52 SIMD::Int::Int(std::vector<int> v)
53 : XYZW(this)
54 {
55 std::vector<int64_t> constantVector;
56 for(int i : v) { constantVector.push_back(i); }
57 storeValue(Nucleus::createConstantVector(constantVector, type()));
58 }
59
Int(std::function<int (int)> LaneValueProducer)60 SIMD::Int::Int(std::function<int(int)> LaneValueProducer)
61 : XYZW(this)
62 {
63 std::vector<int64_t> constantVector;
64 for(int i = 0; i < SIMD::Width; i++) { constantVector.push_back(LaneValueProducer(i)); }
65 storeValue(Nucleus::createConstantVector(constantVector, type()));
66 }
67
Int(RValue<SIMD::Int> rhs)68 SIMD::Int::Int(RValue<SIMD::Int> rhs)
69 : XYZW(this)
70 {
71 store(rhs);
72 }
73
Int(const SIMD::Int & rhs)74 SIMD::Int::Int(const SIMD::Int &rhs)
75 : XYZW(this)
76 {
77 store(rhs.load());
78 }
79
Int(const Reference<SIMD::Int> & rhs)80 SIMD::Int::Int(const Reference<SIMD::Int> &rhs)
81 : XYZW(this)
82 {
83 store(rhs.load());
84 }
85
Int(RValue<SIMD::UInt> rhs)86 SIMD::Int::Int(RValue<SIMD::UInt> rhs)
87 : XYZW(this)
88 {
89 storeValue(rhs.value());
90 }
91
Int(const SIMD::UInt & rhs)92 SIMD::Int::Int(const SIMD::UInt &rhs)
93 : XYZW(this)
94 {
95 storeValue(rhs.loadValue());
96 }
97
Int(const Reference<SIMD::UInt> & rhs)98 SIMD::Int::Int(const Reference<SIMD::UInt> &rhs)
99 : XYZW(this)
100 {
101 storeValue(rhs.loadValue());
102 }
103
Int(const scalar::Int & rhs)104 SIMD::Int::Int(const scalar::Int &rhs)
105 : XYZW(this)
106 {
107 *this = RValue<scalar::Int>(rhs.loadValue());
108 }
109
Int(const Reference<scalar::Int> & rhs)110 SIMD::Int::Int(const Reference<scalar::Int> &rhs)
111 : XYZW(this)
112 {
113 *this = RValue<scalar::Int>(rhs.loadValue());
114 }
115
operator =(int x)116 RValue<SIMD::Int> SIMD::Int::operator=(int x)
117 {
118 return *this = SIMD::Int(x);
119 }
120
operator =(RValue<SIMD::Int> rhs)121 RValue<SIMD::Int> SIMD::Int::operator=(RValue<SIMD::Int> rhs)
122 {
123 return store(rhs);
124 }
125
operator =(const SIMD::Int & rhs)126 RValue<SIMD::Int> SIMD::Int::operator=(const SIMD::Int &rhs)
127 {
128 return store(rhs.load());
129 }
130
operator =(const Reference<SIMD::Int> & rhs)131 RValue<SIMD::Int> SIMD::Int::operator=(const Reference<SIMD::Int> &rhs)
132 {
133 return store(rhs.load());
134 }
135
operator +(RValue<SIMD::Int> lhs,RValue<SIMD::Int> rhs)136 RValue<SIMD::Int> operator+(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs)
137 {
138 return RValue<SIMD::Int>(Nucleus::createAdd(lhs.value(), rhs.value()));
139 }
140
operator -(RValue<SIMD::Int> lhs,RValue<SIMD::Int> rhs)141 RValue<SIMD::Int> operator-(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs)
142 {
143 return RValue<SIMD::Int>(Nucleus::createSub(lhs.value(), rhs.value()));
144 }
145
operator *(RValue<SIMD::Int> lhs,RValue<SIMD::Int> rhs)146 RValue<SIMD::Int> operator*(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs)
147 {
148 return RValue<SIMD::Int>(Nucleus::createMul(lhs.value(), rhs.value()));
149 }
150
operator /(RValue<SIMD::Int> lhs,RValue<SIMD::Int> rhs)151 RValue<SIMD::Int> operator/(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs)
152 {
153 return RValue<SIMD::Int>(Nucleus::createSDiv(lhs.value(), rhs.value()));
154 }
155
operator %(RValue<SIMD::Int> lhs,RValue<SIMD::Int> rhs)156 RValue<SIMD::Int> operator%(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs)
157 {
158 return RValue<SIMD::Int>(Nucleus::createSRem(lhs.value(), rhs.value()));
159 }
160
operator &(RValue<SIMD::Int> lhs,RValue<SIMD::Int> rhs)161 RValue<SIMD::Int> operator&(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs)
162 {
163 return RValue<SIMD::Int>(Nucleus::createAnd(lhs.value(), rhs.value()));
164 }
165
operator |(RValue<SIMD::Int> lhs,RValue<SIMD::Int> rhs)166 RValue<SIMD::Int> operator|(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs)
167 {
168 return RValue<SIMD::Int>(Nucleus::createOr(lhs.value(), rhs.value()));
169 }
170
operator ^(RValue<SIMD::Int> lhs,RValue<SIMD::Int> rhs)171 RValue<SIMD::Int> operator^(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs)
172 {
173 return RValue<SIMD::Int>(Nucleus::createXor(lhs.value(), rhs.value()));
174 }
175
operator <<(RValue<SIMD::Int> lhs,RValue<SIMD::Int> rhs)176 RValue<SIMD::Int> operator<<(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs)
177 {
178 return RValue<SIMD::Int>(Nucleus::createShl(lhs.value(), rhs.value()));
179 }
180
operator >>(RValue<SIMD::Int> lhs,RValue<SIMD::Int> rhs)181 RValue<SIMD::Int> operator>>(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs)
182 {
183 return RValue<SIMD::Int>(Nucleus::createAShr(lhs.value(), rhs.value()));
184 }
185
operator +=(SIMD::Int & lhs,RValue<SIMD::Int> rhs)186 RValue<SIMD::Int> operator+=(SIMD::Int &lhs, RValue<SIMD::Int> rhs)
187 {
188 return lhs = lhs + rhs;
189 }
190
operator -=(SIMD::Int & lhs,RValue<SIMD::Int> rhs)191 RValue<SIMD::Int> operator-=(SIMD::Int &lhs, RValue<SIMD::Int> rhs)
192 {
193 return lhs = lhs - rhs;
194 }
195
operator *=(SIMD::Int & lhs,RValue<SIMD::Int> rhs)196 RValue<SIMD::Int> operator*=(SIMD::Int &lhs, RValue<SIMD::Int> rhs)
197 {
198 return lhs = lhs * rhs;
199 }
200
201 // RValue<SIMD::Int> operator/=(SIMD::Int &lhs, RValue<SIMD::Int> rhs)
202 // {
203 // return lhs = lhs / rhs;
204 // }
205
206 // RValue<SIMD::Int> operator%=(SIMD::Int &lhs, RValue<SIMD::Int> rhs)
207 // {
208 // return lhs = lhs % rhs;
209 // }
210
operator &=(SIMD::Int & lhs,RValue<SIMD::Int> rhs)211 RValue<SIMD::Int> operator&=(SIMD::Int &lhs, RValue<SIMD::Int> rhs)
212 {
213 return lhs = lhs & rhs;
214 }
215
operator |=(SIMD::Int & lhs,RValue<SIMD::Int> rhs)216 RValue<SIMD::Int> operator|=(SIMD::Int &lhs, RValue<SIMD::Int> rhs)
217 {
218 return lhs = lhs | rhs;
219 }
220
operator ^=(SIMD::Int & lhs,RValue<SIMD::Int> rhs)221 RValue<SIMD::Int> operator^=(SIMD::Int &lhs, RValue<SIMD::Int> rhs)
222 {
223 return lhs = lhs ^ rhs;
224 }
225
operator <<=(SIMD::Int & lhs,unsigned char rhs)226 RValue<SIMD::Int> operator<<=(SIMD::Int &lhs, unsigned char rhs)
227 {
228 return lhs = lhs << rhs;
229 }
230
operator >>=(SIMD::Int & lhs,unsigned char rhs)231 RValue<SIMD::Int> operator>>=(SIMD::Int &lhs, unsigned char rhs)
232 {
233 return lhs = lhs >> rhs;
234 }
235
operator +(RValue<SIMD::Int> val)236 RValue<SIMD::Int> operator+(RValue<SIMD::Int> val)
237 {
238 return val;
239 }
240
operator -(RValue<SIMD::Int> val)241 RValue<SIMD::Int> operator-(RValue<SIMD::Int> val)
242 {
243 return RValue<SIMD::Int>(Nucleus::createNeg(val.value()));
244 }
245
operator ~(RValue<SIMD::Int> val)246 RValue<SIMD::Int> operator~(RValue<SIMD::Int> val)
247 {
248 return RValue<SIMD::Int>(Nucleus::createNot(val.value()));
249 }
250
Extract(RValue<SIMD::Int> x,int i)251 RValue<scalar::Int> Extract(RValue<SIMD::Int> x, int i)
252 {
253 return RValue<scalar::Int>(Nucleus::createExtractElement(x.value(), scalar::Int::type(), i));
254 }
255
Insert(RValue<SIMD::Int> x,RValue<scalar::Int> element,int i)256 RValue<SIMD::Int> Insert(RValue<SIMD::Int> x, RValue<scalar::Int> element, int i)
257 {
258 return RValue<SIMD::Int>(Nucleus::createInsertElement(x.value(), element.value(), i));
259 }
260
UInt()261 SIMD::UInt::UInt()
262 : XYZW(this)
263 {
264 }
265
UInt(int broadcast)266 SIMD::UInt::UInt(int broadcast)
267 : XYZW(this)
268 {
269 std::vector<int64_t> constantVector = { broadcast };
270 storeValue(Nucleus::createConstantVector(constantVector, type()));
271 }
272
UInt(int x,int y,int z,int w)273 SIMD::UInt::UInt(int x, int y, int z, int w)
274 : XYZW(this)
275 {
276 std::vector<int64_t> constantVector = { x, y, z, w };
277 storeValue(Nucleus::createConstantVector(constantVector, type()));
278 }
279
UInt(std::vector<int> v)280 SIMD::UInt::UInt(std::vector<int> v)
281 : XYZW(this)
282 {
283 std::vector<int64_t> constantVector;
284 for(int i : v) { constantVector.push_back(i); }
285 storeValue(Nucleus::createConstantVector(constantVector, type()));
286 }
287
UInt(std::function<int (int)> LaneValueProducer)288 SIMD::UInt::UInt(std::function<int(int)> LaneValueProducer)
289 : XYZW(this)
290 {
291 std::vector<int64_t> constantVector;
292 for(int i = 0; i < SIMD::Width; i++) { constantVector.push_back(LaneValueProducer(i)); }
293 storeValue(Nucleus::createConstantVector(constantVector, type()));
294 }
295
UInt(RValue<SIMD::UInt> rhs)296 SIMD::UInt::UInt(RValue<SIMD::UInt> rhs)
297 : XYZW(this)
298 {
299 store(rhs);
300 }
301
UInt(const SIMD::UInt & rhs)302 SIMD::UInt::UInt(const SIMD::UInt &rhs)
303 : XYZW(this)
304 {
305 store(rhs.load());
306 }
307
UInt(const Reference<SIMD::UInt> & rhs)308 SIMD::UInt::UInt(const Reference<SIMD::UInt> &rhs)
309 : XYZW(this)
310 {
311 store(rhs.load());
312 }
313
UInt(RValue<SIMD::Int> rhs)314 SIMD::UInt::UInt(RValue<SIMD::Int> rhs)
315 : XYZW(this)
316 {
317 storeValue(rhs.value());
318 }
319
UInt(const SIMD::Int & rhs)320 SIMD::UInt::UInt(const SIMD::Int &rhs)
321 : XYZW(this)
322 {
323 storeValue(rhs.loadValue());
324 }
325
UInt(const Reference<SIMD::Int> & rhs)326 SIMD::UInt::UInt(const Reference<SIMD::Int> &rhs)
327 : XYZW(this)
328 {
329 storeValue(rhs.loadValue());
330 }
331
UInt(const scalar::UInt & rhs)332 SIMD::UInt::UInt(const scalar::UInt &rhs)
333 : XYZW(this)
334 {
335 *this = RValue<scalar::UInt>(rhs.loadValue());
336 }
337
UInt(const Reference<scalar::UInt> & rhs)338 SIMD::UInt::UInt(const Reference<scalar::UInt> &rhs)
339 : XYZW(this)
340 {
341 *this = RValue<scalar::UInt>(rhs.loadValue());
342 }
343
operator =(RValue<SIMD::UInt> rhs)344 RValue<SIMD::UInt> SIMD::UInt::operator=(RValue<SIMD::UInt> rhs)
345 {
346 return store(rhs);
347 }
348
operator =(const SIMD::UInt & rhs)349 RValue<SIMD::UInt> SIMD::UInt::operator=(const SIMD::UInt &rhs)
350 {
351 return store(rhs.load());
352 }
353
operator =(const Reference<SIMD::UInt> & rhs)354 RValue<SIMD::UInt> SIMD::UInt::operator=(const Reference<SIMD::UInt> &rhs)
355 {
356 return store(rhs.load());
357 }
358
operator +(RValue<SIMD::UInt> lhs,RValue<SIMD::UInt> rhs)359 RValue<SIMD::UInt> operator+(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs)
360 {
361 return RValue<SIMD::UInt>(Nucleus::createAdd(lhs.value(), rhs.value()));
362 }
363
operator -(RValue<SIMD::UInt> lhs,RValue<SIMD::UInt> rhs)364 RValue<SIMD::UInt> operator-(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs)
365 {
366 return RValue<SIMD::UInt>(Nucleus::createSub(lhs.value(), rhs.value()));
367 }
368
operator *(RValue<SIMD::UInt> lhs,RValue<SIMD::UInt> rhs)369 RValue<SIMD::UInt> operator*(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs)
370 {
371 return RValue<SIMD::UInt>(Nucleus::createMul(lhs.value(), rhs.value()));
372 }
373
operator /(RValue<SIMD::UInt> lhs,RValue<SIMD::UInt> rhs)374 RValue<SIMD::UInt> operator/(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs)
375 {
376 return RValue<SIMD::UInt>(Nucleus::createUDiv(lhs.value(), rhs.value()));
377 }
378
operator %(RValue<SIMD::UInt> lhs,RValue<SIMD::UInt> rhs)379 RValue<SIMD::UInt> operator%(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs)
380 {
381 return RValue<SIMD::UInt>(Nucleus::createURem(lhs.value(), rhs.value()));
382 }
383
operator &(RValue<SIMD::UInt> lhs,RValue<SIMD::UInt> rhs)384 RValue<SIMD::UInt> operator&(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs)
385 {
386 return RValue<SIMD::UInt>(Nucleus::createAnd(lhs.value(), rhs.value()));
387 }
388
operator |(RValue<SIMD::UInt> lhs,RValue<SIMD::UInt> rhs)389 RValue<SIMD::UInt> operator|(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs)
390 {
391 return RValue<SIMD::UInt>(Nucleus::createOr(lhs.value(), rhs.value()));
392 }
393
operator ^(RValue<SIMD::UInt> lhs,RValue<SIMD::UInt> rhs)394 RValue<SIMD::UInt> operator^(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs)
395 {
396 return RValue<SIMD::UInt>(Nucleus::createXor(lhs.value(), rhs.value()));
397 }
398
operator <<(RValue<SIMD::UInt> lhs,RValue<SIMD::UInt> rhs)399 RValue<SIMD::UInt> operator<<(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs)
400 {
401 return RValue<SIMD::UInt>(Nucleus::createShl(lhs.value(), rhs.value()));
402 }
403
operator >>(RValue<SIMD::UInt> lhs,RValue<SIMD::UInt> rhs)404 RValue<SIMD::UInt> operator>>(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs)
405 {
406 return RValue<SIMD::UInt>(Nucleus::createLShr(lhs.value(), rhs.value()));
407 }
408
operator +=(SIMD::UInt & lhs,RValue<SIMD::UInt> rhs)409 RValue<SIMD::UInt> operator+=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs)
410 {
411 return lhs = lhs + rhs;
412 }
413
operator -=(SIMD::UInt & lhs,RValue<SIMD::UInt> rhs)414 RValue<SIMD::UInt> operator-=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs)
415 {
416 return lhs = lhs - rhs;
417 }
418
operator *=(SIMD::UInt & lhs,RValue<SIMD::UInt> rhs)419 RValue<SIMD::UInt> operator*=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs)
420 {
421 return lhs = lhs * rhs;
422 }
423
424 // RValue<SIMD::UInt> operator/=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs)
425 // {
426 // return lhs = lhs / rhs;
427 // }
428
429 // RValue<SIMD::UInt> operator%=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs)
430 // {
431 // return lhs = lhs % rhs;
432 // }
433
operator &=(SIMD::UInt & lhs,RValue<SIMD::UInt> rhs)434 RValue<SIMD::UInt> operator&=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs)
435 {
436 return lhs = lhs & rhs;
437 }
438
operator |=(SIMD::UInt & lhs,RValue<SIMD::UInt> rhs)439 RValue<SIMD::UInt> operator|=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs)
440 {
441 return lhs = lhs | rhs;
442 }
443
operator ^=(SIMD::UInt & lhs,RValue<SIMD::UInt> rhs)444 RValue<SIMD::UInt> operator^=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs)
445 {
446 return lhs = lhs ^ rhs;
447 }
448
operator <<=(SIMD::UInt & lhs,unsigned char rhs)449 RValue<SIMD::UInt> operator<<=(SIMD::UInt &lhs, unsigned char rhs)
450 {
451 return lhs = lhs << rhs;
452 }
453
operator >>=(SIMD::UInt & lhs,unsigned char rhs)454 RValue<SIMD::UInt> operator>>=(SIMD::UInt &lhs, unsigned char rhs)
455 {
456 return lhs = lhs >> rhs;
457 }
458
operator +(RValue<SIMD::UInt> val)459 RValue<SIMD::UInt> operator+(RValue<SIMD::UInt> val)
460 {
461 return val;
462 }
463
operator -(RValue<SIMD::UInt> val)464 RValue<SIMD::UInt> operator-(RValue<SIMD::UInt> val)
465 {
466 return RValue<SIMD::UInt>(Nucleus::createNeg(val.value()));
467 }
468
operator ~(RValue<SIMD::UInt> val)469 RValue<SIMD::UInt> operator~(RValue<SIMD::UInt> val)
470 {
471 return RValue<SIMD::UInt>(Nucleus::createNot(val.value()));
472 }
473
Extract(RValue<SIMD::UInt> x,int i)474 RValue<scalar::UInt> Extract(RValue<SIMD::UInt> x, int i)
475 {
476 return RValue<scalar::UInt>(Nucleus::createExtractElement(x.value(), scalar::Int::type(), i));
477 }
478
Insert(RValue<SIMD::UInt> x,RValue<scalar::UInt> element,int i)479 RValue<SIMD::UInt> Insert(RValue<SIMD::UInt> x, RValue<scalar::UInt> element, int i)
480 {
481 return RValue<SIMD::UInt>(Nucleus::createInsertElement(x.value(), element.value(), i));
482 }
483
Float(RValue<SIMD::Int> cast)484 SIMD::Float::Float(RValue<SIMD::Int> cast)
485 : XYZW(this)
486 {
487 Value *xyzw = Nucleus::createSIToFP(cast.value(), SIMD::Float::type());
488
489 storeValue(xyzw);
490 }
491
Float(RValue<SIMD::UInt> cast)492 SIMD::Float::Float(RValue<SIMD::UInt> cast)
493 : XYZW(this)
494 {
495 RValue<SIMD::Float> result = SIMD::Float(SIMD::Int(cast & SIMD::UInt(0x7FFFFFFF))) +
496 As<SIMD::Float>((As<SIMD::Int>(cast) >> 31) & As<SIMD::Int>(SIMD::Float(0x80000000u)));
497
498 storeValue(result.value());
499 }
500
Float()501 SIMD::Float::Float()
502 : XYZW(this)
503 {
504 }
505
Float(float broadcast)506 SIMD::Float::Float(float broadcast)
507 : XYZW(this)
508 {
509 // See rr::Float(float) constructor for the rationale behind this assert.
510 ASSERT(std::isfinite(broadcast));
511
512 std::vector<double> constantVector = { broadcast };
513 storeValue(Nucleus::createConstantVector(constantVector, type()));
514 }
515
Float(float x,float y,float z,float w)516 SIMD::Float::Float(float x, float y, float z, float w)
517 : XYZW(this)
518 {
519 std::vector<double> constantVector = { x, y, z, w };
520 storeValue(Nucleus::createConstantVector(constantVector, type()));
521 }
522
Float(std::vector<float> v)523 SIMD::Float::Float(std::vector<float> v)
524 : XYZW(this)
525 {
526 std::vector<double> constantVector;
527 for(int f : v) { constantVector.push_back(f); }
528 storeValue(Nucleus::createConstantVector(constantVector, type()));
529 }
530
Float(std::function<float (int)> LaneValueProducer)531 SIMD::Float::Float(std::function<float(int)> LaneValueProducer)
532 : XYZW(this)
533 {
534 std::vector<double> constantVector;
535 for(int i = 0; i < SIMD::Width; i++) { constantVector.push_back(LaneValueProducer(i)); }
536 storeValue(Nucleus::createConstantVector(constantVector, type()));
537 }
538
infinity()539 SIMD::Float SIMD::Float::infinity()
540 {
541 SIMD::Float result;
542
543 constexpr double inf = std::numeric_limits<double>::infinity();
544 std::vector<double> constantVector = { inf };
545 result.storeValue(Nucleus::createConstantVector(constantVector, type()));
546
547 return result;
548 }
549
Float(RValue<SIMD::Float> rhs)550 SIMD::Float::Float(RValue<SIMD::Float> rhs)
551 : XYZW(this)
552 {
553 store(rhs);
554 }
555
Float(const SIMD::Float & rhs)556 SIMD::Float::Float(const SIMD::Float &rhs)
557 : XYZW(this)
558 {
559 store(rhs.load());
560 }
561
Float(const Reference<SIMD::Float> & rhs)562 SIMD::Float::Float(const Reference<SIMD::Float> &rhs)
563 : XYZW(this)
564 {
565 store(rhs.load());
566 }
567
Float(const scalar::Float & rhs)568 SIMD::Float::Float(const scalar::Float &rhs)
569 : XYZW(this)
570 {
571 *this = RValue<scalar::Float>(rhs.loadValue());
572 }
573
Float(const Reference<scalar::Float> & rhs)574 SIMD::Float::Float(const Reference<scalar::Float> &rhs)
575 : XYZW(this)
576 {
577 *this = RValue<scalar::Float>(rhs.loadValue());
578 }
579
Float(RValue<packed::Float4> rhs)580 SIMD::Float::Float(RValue<packed::Float4> rhs)
581 : XYZW(this)
582 {
583 ASSERT(SIMD::Width == 4);
584 *this = Insert128(*this, rhs, 0);
585 }
586
operator =(RValue<packed::Float4> rhs)587 RValue<SIMD::Float> SIMD::Float::operator=(RValue<packed::Float4> rhs)
588 {
589 return *this = SIMD::Float(rhs);
590 }
591
operator =(float x)592 RValue<SIMD::Float> SIMD::Float::operator=(float x)
593 {
594 return *this = SIMD::Float(x);
595 }
596
operator =(RValue<SIMD::Float> rhs)597 RValue<SIMD::Float> SIMD::Float::operator=(RValue<SIMD::Float> rhs)
598 {
599 return store(rhs);
600 }
601
operator =(const SIMD::Float & rhs)602 RValue<SIMD::Float> SIMD::Float::operator=(const SIMD::Float &rhs)
603 {
604 return store(rhs.load());
605 }
606
operator =(const Reference<SIMD::Float> & rhs)607 RValue<SIMD::Float> SIMD::Float::operator=(const Reference<SIMD::Float> &rhs)
608 {
609 return store(rhs.load());
610 }
611
operator =(RValue<scalar::Float> rhs)612 RValue<SIMD::Float> SIMD::Float::operator=(RValue<scalar::Float> rhs)
613 {
614 return *this = SIMD::Float(rhs);
615 }
616
operator =(const scalar::Float & rhs)617 RValue<SIMD::Float> SIMD::Float::operator=(const scalar::Float &rhs)
618 {
619 return *this = SIMD::Float(rhs);
620 }
621
operator =(const Reference<scalar::Float> & rhs)622 RValue<SIMD::Float> SIMD::Float::operator=(const Reference<scalar::Float> &rhs)
623 {
624 return *this = SIMD::Float(rhs);
625 }
626
operator +(RValue<SIMD::Float> lhs,RValue<SIMD::Float> rhs)627 RValue<SIMD::Float> operator+(RValue<SIMD::Float> lhs, RValue<SIMD::Float> rhs)
628 {
629 return RValue<SIMD::Float>(Nucleus::createFAdd(lhs.value(), rhs.value()));
630 }
631
operator -(RValue<SIMD::Float> lhs,RValue<SIMD::Float> rhs)632 RValue<SIMD::Float> operator-(RValue<SIMD::Float> lhs, RValue<SIMD::Float> rhs)
633 {
634 return RValue<SIMD::Float>(Nucleus::createFSub(lhs.value(), rhs.value()));
635 }
636
operator *(RValue<SIMD::Float> lhs,RValue<SIMD::Float> rhs)637 RValue<SIMD::Float> operator*(RValue<SIMD::Float> lhs, RValue<SIMD::Float> rhs)
638 {
639 return RValue<SIMD::Float>(Nucleus::createFMul(lhs.value(), rhs.value()));
640 }
641
operator /(RValue<SIMD::Float> lhs,RValue<SIMD::Float> rhs)642 RValue<SIMD::Float> operator/(RValue<SIMD::Float> lhs, RValue<SIMD::Float> rhs)
643 {
644 return RValue<SIMD::Float>(Nucleus::createFDiv(lhs.value(), rhs.value()));
645 }
646
operator +=(SIMD::Float & lhs,RValue<SIMD::Float> rhs)647 RValue<SIMD::Float> operator+=(SIMD::Float &lhs, RValue<SIMD::Float> rhs)
648 {
649 return lhs = lhs + rhs;
650 }
651
operator -=(SIMD::Float & lhs,RValue<SIMD::Float> rhs)652 RValue<SIMD::Float> operator-=(SIMD::Float &lhs, RValue<SIMD::Float> rhs)
653 {
654 return lhs = lhs - rhs;
655 }
656
operator *=(SIMD::Float & lhs,RValue<SIMD::Float> rhs)657 RValue<SIMD::Float> operator*=(SIMD::Float &lhs, RValue<SIMD::Float> rhs)
658 {
659 return lhs = lhs * rhs;
660 }
661
operator /=(SIMD::Float & lhs,RValue<SIMD::Float> rhs)662 RValue<SIMD::Float> operator/=(SIMD::Float &lhs, RValue<SIMD::Float> rhs)
663 {
664 return lhs = lhs / rhs;
665 }
666
operator %=(SIMD::Float & lhs,RValue<SIMD::Float> rhs)667 RValue<SIMD::Float> operator%=(SIMD::Float &lhs, RValue<SIMD::Float> rhs)
668 {
669 return lhs = lhs % rhs;
670 }
671
operator +(RValue<SIMD::Float> val)672 RValue<SIMD::Float> operator+(RValue<SIMD::Float> val)
673 {
674 return val;
675 }
676
operator -(RValue<SIMD::Float> val)677 RValue<SIMD::Float> operator-(RValue<SIMD::Float> val)
678 {
679 return RValue<SIMD::Float>(Nucleus::createFNeg(val.value()));
680 }
681
Rcp(RValue<SIMD::Float> x,bool relaxedPrecision,bool exactAtPow2)682 RValue<SIMD::Float> Rcp(RValue<SIMD::Float> x, bool relaxedPrecision, bool exactAtPow2)
683 {
684 ASSERT(SIMD::Width == 4);
685 return SIMD::Float(Rcp(Extract128(x, 0), relaxedPrecision, exactAtPow2));
686 }
687
RcpSqrt(RValue<SIMD::Float> x,bool relaxedPrecision)688 RValue<SIMD::Float> RcpSqrt(RValue<SIMD::Float> x, bool relaxedPrecision)
689 {
690 ASSERT(SIMD::Width == 4);
691 return SIMD::Float(RcpSqrt(Extract128(x, 0), relaxedPrecision));
692 }
693
Insert(RValue<SIMD::Float> x,RValue<scalar::Float> element,int i)694 RValue<SIMD::Float> Insert(RValue<SIMD::Float> x, RValue<scalar::Float> element, int i)
695 {
696 return RValue<SIMD::Float>(Nucleus::createInsertElement(x.value(), element.value(), i));
697 }
698
Extract(RValue<SIMD::Float> x,int i)699 RValue<scalar::Float> Extract(RValue<SIMD::Float> x, int i)
700 {
701 return RValue<scalar::Float>(Nucleus::createExtractElement(x.value(), scalar::Float::type(), i));
702 }
703
IsInf(RValue<SIMD::Float> x)704 RValue<SIMD::Int> IsInf(RValue<SIMD::Float> x)
705 {
706 return CmpEQ(As<SIMD::Int>(x) & SIMD::Int(0x7FFFFFFF), SIMD::Int(0x7F800000));
707 }
708
IsNan(RValue<SIMD::Float> x)709 RValue<SIMD::Int> IsNan(RValue<SIMD::Float> x)
710 {
711 return ~CmpEQ(x, x);
712 }
713
Sin(RValue<SIMD::Float> x)714 RValue<SIMD::Float> Sin(RValue<SIMD::Float> x)
715 {
716 return ScalarizeCall(sinf, x);
717 }
718
Cos(RValue<SIMD::Float> x)719 RValue<SIMD::Float> Cos(RValue<SIMD::Float> x)
720 {
721 return ScalarizeCall(cosf, x);
722 }
723
Tan(RValue<SIMD::Float> x)724 RValue<SIMD::Float> Tan(RValue<SIMD::Float> x)
725 {
726 return ScalarizeCall(tanf, x);
727 }
728
Asin(RValue<SIMD::Float> x)729 RValue<SIMD::Float> Asin(RValue<SIMD::Float> x)
730 {
731 return ScalarizeCall(asinf, x);
732 }
733
Acos(RValue<SIMD::Float> x)734 RValue<SIMD::Float> Acos(RValue<SIMD::Float> x)
735 {
736 return ScalarizeCall(acosf, x);
737 }
738
Atan(RValue<SIMD::Float> x)739 RValue<SIMD::Float> Atan(RValue<SIMD::Float> x)
740 {
741 return ScalarizeCall(atanf, x);
742 }
743
Sinh(RValue<SIMD::Float> x)744 RValue<SIMD::Float> Sinh(RValue<SIMD::Float> x)
745 {
746 return ScalarizeCall(sinhf, x);
747 }
748
Cosh(RValue<SIMD::Float> x)749 RValue<SIMD::Float> Cosh(RValue<SIMD::Float> x)
750 {
751 return ScalarizeCall(coshf, x);
752 }
753
Tanh(RValue<SIMD::Float> x)754 RValue<SIMD::Float> Tanh(RValue<SIMD::Float> x)
755 {
756 return ScalarizeCall(tanhf, x);
757 }
758
Asinh(RValue<SIMD::Float> x)759 RValue<SIMD::Float> Asinh(RValue<SIMD::Float> x)
760 {
761 return ScalarizeCall(asinhf, x);
762 }
763
Acosh(RValue<SIMD::Float> x)764 RValue<SIMD::Float> Acosh(RValue<SIMD::Float> x)
765 {
766 return ScalarizeCall(acoshf, x);
767 }
768
Atanh(RValue<SIMD::Float> x)769 RValue<SIMD::Float> Atanh(RValue<SIMD::Float> x)
770 {
771 return ScalarizeCall(atanhf, x);
772 }
773
Atan2(RValue<SIMD::Float> x,RValue<SIMD::Float> y)774 RValue<SIMD::Float> Atan2(RValue<SIMD::Float> x, RValue<SIMD::Float> y)
775 {
776 return ScalarizeCall(atan2f, x, y);
777 }
778
Pow(RValue<SIMD::Float> x,RValue<SIMD::Float> y)779 RValue<SIMD::Float> Pow(RValue<SIMD::Float> x, RValue<SIMD::Float> y)
780 {
781 return ScalarizeCall(powf, x, y);
782 }
783
Exp(RValue<SIMD::Float> x)784 RValue<SIMD::Float> Exp(RValue<SIMD::Float> x)
785 {
786 return ScalarizeCall(expf, x);
787 }
788
Log(RValue<SIMD::Float> x)789 RValue<SIMD::Float> Log(RValue<SIMD::Float> x)
790 {
791 return ScalarizeCall(logf, x);
792 }
793
Exp2(RValue<SIMD::Float> x)794 RValue<SIMD::Float> Exp2(RValue<SIMD::Float> x)
795 {
796 return ScalarizeCall(exp2f, x);
797 }
798
Log2(RValue<SIMD::Float> x)799 RValue<SIMD::Float> Log2(RValue<SIMD::Float> x)
800 {
801 return ScalarizeCall(log2f, x);
802 }
803
SignMask(RValue<SIMD::Int> x)804 RValue<Int> SignMask(RValue<SIMD::Int> x)
805 {
806 ASSERT(SIMD::Width == 4);
807 return SignMask(Extract128(x, 0));
808 }
809
Ctlz(RValue<SIMD::UInt> x,bool isZeroUndef)810 RValue<SIMD::UInt> Ctlz(RValue<SIMD::UInt> x, bool isZeroUndef)
811 {
812 ASSERT(SIMD::Width == 4);
813 SIMD::UInt result;
814 return Insert128(result, Ctlz(Extract128(x, 0), isZeroUndef), 0);
815 }
816
Cttz(RValue<SIMD::UInt> x,bool isZeroUndef)817 RValue<SIMD::UInt> Cttz(RValue<SIMD::UInt> x, bool isZeroUndef)
818 {
819 ASSERT(SIMD::Width == 4);
820 SIMD::UInt result;
821 return Insert128(result, Cttz(Extract128(x, 0), isZeroUndef), 0);
822 }
823
MulHigh(RValue<SIMD::Int> x,RValue<SIMD::Int> y)824 RValue<SIMD::Int> MulHigh(RValue<SIMD::Int> x, RValue<SIMD::Int> y)
825 {
826 ASSERT(SIMD::Width == 4);
827 SIMD::Int result;
828 return Insert128(result, MulHigh(Extract128(x, 0), Extract128(y, 0)), 0);
829 }
830
MulHigh(RValue<SIMD::UInt> x,RValue<SIMD::UInt> y)831 RValue<SIMD::UInt> MulHigh(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y)
832 {
833 ASSERT(SIMD::Width == 4);
834 SIMD::UInt result;
835 return Insert128(result, MulHigh(Extract128(x, 0), Extract128(y, 0)), 0);
836 }
837
AnyTrue(const RValue<SIMD::Int> & bools)838 RValue<Bool> AnyTrue(const RValue<SIMD::Int> &bools)
839 {
840 ASSERT(SIMD::Width == 4);
841 return AnyTrue(Extract128(bools, 0));
842 }
843
AnyFalse(const RValue<SIMD::Int> & bools)844 RValue<Bool> AnyFalse(const RValue<SIMD::Int> &bools)
845 {
846 ASSERT(SIMD::Width == 4);
847 return AnyFalse(Extract128(bools, 0));
848 }
849
Divergent(const RValue<SIMD::Int> & ints)850 RValue<Bool> Divergent(const RValue<SIMD::Int> &ints)
851 {
852 ASSERT(SIMD::Width == 4);
853 return Divergent(Extract128(ints, 0));
854 }
855
Swizzle(RValue<SIMD::Int> x,uint16_t select)856 RValue<SIMD::Int> Swizzle(RValue<SIMD::Int> x, uint16_t select)
857 {
858 ASSERT(SIMD::Width == 4);
859 SIMD::Int result;
860 return Insert128(result, Swizzle(Extract128(x, 0), select), 0);
861 }
862
Swizzle(RValue<SIMD::UInt> x,uint16_t select)863 RValue<SIMD::UInt> Swizzle(RValue<SIMD::UInt> x, uint16_t select)
864 {
865 ASSERT(SIMD::Width == 4);
866 SIMD::UInt result;
867 return Insert128(result, Swizzle(Extract128(x, 0), select), 0);
868 }
869
Swizzle(RValue<SIMD::Float> x,uint16_t select)870 RValue<SIMD::Float> Swizzle(RValue<SIMD::Float> x, uint16_t select)
871 {
872 ASSERT(SIMD::Width == 4);
873 SIMD::Float result;
874 return Insert128(result, Swizzle(Extract128(x, 0), select), 0);
875 }
876
Shuffle(RValue<SIMD::Int> x,RValue<SIMD::Int> y,uint16_t select)877 RValue<SIMD::Int> Shuffle(RValue<SIMD::Int> x, RValue<SIMD::Int> y, uint16_t select)
878 {
879 ASSERT(SIMD::Width == 4);
880 SIMD::Int result;
881 return Insert128(result, Shuffle(Extract128(x, 0), Extract128(y, 0), select), 0);
882 }
883
Shuffle(RValue<SIMD::UInt> x,RValue<SIMD::UInt> y,uint16_t select)884 RValue<SIMD::UInt> Shuffle(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y, uint16_t select)
885 {
886 ASSERT(SIMD::Width == 4);
887 SIMD::UInt result;
888 return Insert128(result, Shuffle(Extract128(x, 0), Extract128(y, 0), select), 0);
889 }
890
Shuffle(RValue<SIMD::Float> x,RValue<SIMD::Float> y,uint16_t select)891 RValue<SIMD::Float> Shuffle(RValue<SIMD::Float> x, RValue<SIMD::Float> y, uint16_t select)
892 {
893 ASSERT(SIMD::Width == 4);
894 SIMD::Float result;
895 return Insert128(result, Shuffle(Extract128(x, 0), Extract128(y, 0), select), 0);
896 }
897
Pointer(scalar::Pointer<Byte> base,rr::Int limit)898 SIMD::Pointer::Pointer(scalar::Pointer<Byte> base, rr::Int limit)
899 : base(base)
900 , dynamicLimit(limit)
901 , staticLimit(0)
902 , dynamicOffsets(0)
903 , staticOffsets(SIMD::Width)
904 , hasDynamicLimit(true)
905 , hasDynamicOffsets(false)
906 , isBasePlusOffset(true)
907 {}
908
Pointer(scalar::Pointer<Byte> base,unsigned int limit)909 SIMD::Pointer::Pointer(scalar::Pointer<Byte> base, unsigned int limit)
910 : base(base)
911 , dynamicLimit(0)
912 , staticLimit(limit)
913 , dynamicOffsets(0)
914 , staticOffsets(SIMD::Width)
915 , hasDynamicLimit(false)
916 , hasDynamicOffsets(false)
917 , isBasePlusOffset(true)
918 {}
919
Pointer(scalar::Pointer<Byte> base,rr::Int limit,SIMD::Int offset)920 SIMD::Pointer::Pointer(scalar::Pointer<Byte> base, rr::Int limit, SIMD::Int offset)
921 : base(base)
922 , dynamicLimit(limit)
923 , staticLimit(0)
924 , dynamicOffsets(offset)
925 , staticOffsets(SIMD::Width)
926 , hasDynamicLimit(true)
927 , hasDynamicOffsets(true)
928 , isBasePlusOffset(true)
929 {}
930
Pointer(scalar::Pointer<Byte> base,unsigned int limit,SIMD::Int offset)931 SIMD::Pointer::Pointer(scalar::Pointer<Byte> base, unsigned int limit, SIMD::Int offset)
932 : base(base)
933 , dynamicLimit(0)
934 , staticLimit(limit)
935 , dynamicOffsets(offset)
936 , staticOffsets(SIMD::Width)
937 , hasDynamicLimit(false)
938 , hasDynamicOffsets(true)
939 , isBasePlusOffset(true)
940 {}
941
Pointer(std::vector<scalar::Pointer<Byte>> pointers)942 SIMD::Pointer::Pointer(std::vector<scalar::Pointer<Byte>> pointers)
943 : pointers(pointers)
944 , isBasePlusOffset(false)
945 {}
946
Pointer(SIMD::UInt cast)947 SIMD::Pointer::Pointer(SIMD::UInt cast)
948 : pointers(SIMD::Width)
949 , isBasePlusOffset(false)
950 {
951 assert(sizeof(void *) == 4);
952 for(int i = 0; i < SIMD::Width; i++)
953 {
954 pointers[i] = As<rr::Pointer<Byte>>(Extract(cast, i));
955 }
956 }
957
Pointer(SIMD::UInt castLow,SIMD::UInt castHigh)958 SIMD::Pointer::Pointer(SIMD::UInt castLow, SIMD::UInt castHigh)
959 : pointers(SIMD::Width)
960 , isBasePlusOffset(false)
961 {
962 assert(sizeof(void *) == 8);
963 for(int i = 0; i < SIMD::Width; i++)
964 {
965 UInt2 address;
966 address = Insert(address, Extract(castLow, i), 0);
967 address = Insert(address, Extract(castHigh, i), 1);
968 pointers[i] = As<rr::Pointer<Byte>>(address);
969 }
970 }
971
operator +=(SIMD::Int i)972 SIMD::Pointer &SIMD::Pointer::operator+=(SIMD::Int i)
973 {
974 if(isBasePlusOffset)
975 {
976 dynamicOffsets += i;
977 hasDynamicOffsets = true;
978 }
979 else
980 {
981 for(int el = 0; el < SIMD::Width; el++) { pointers[el] += Extract(i, el); }
982 }
983 return *this;
984 }
985
operator +(SIMD::Int i)986 SIMD::Pointer SIMD::Pointer::operator+(SIMD::Int i)
987 {
988 SIMD::Pointer p = *this;
989 p += i;
990 return p;
991 }
992
operator +=(int i)993 SIMD::Pointer &SIMD::Pointer::operator+=(int i)
994 {
995 if(isBasePlusOffset)
996 {
997 for(int el = 0; el < SIMD::Width; el++) { staticOffsets[el] += i; }
998 }
999 else
1000 {
1001 for(int el = 0; el < SIMD::Width; el++) { pointers[el] += i; }
1002 }
1003 return *this;
1004 }
1005
operator +(int i)1006 SIMD::Pointer SIMD::Pointer::operator+(int i)
1007 {
1008 SIMD::Pointer p = *this;
1009 p += i;
1010 return p;
1011 }
1012
offsets() const1013 SIMD::Int SIMD::Pointer::offsets() const
1014 {
1015 ASSERT_MSG(isBasePlusOffset, "No offsets for this type of pointer");
1016 return dynamicOffsets + SIMD::Int(staticOffsets);
1017 }
1018
isInBounds(unsigned int accessSize,OutOfBoundsBehavior robustness) const1019 SIMD::Int SIMD::Pointer::isInBounds(unsigned int accessSize, OutOfBoundsBehavior robustness) const
1020 {
1021 ASSERT(accessSize > 0);
1022
1023 if(isStaticallyInBounds(accessSize, robustness))
1024 {
1025 return SIMD::Int(0xFFFFFFFF);
1026 }
1027
1028 if(!hasDynamicOffsets && !hasDynamicLimit)
1029 {
1030 ASSERT(SIMD::Width == 4);
1031 // Common fast paths.
1032 return SIMD::Int(
1033 (staticOffsets[0] + accessSize - 1 < staticLimit) ? 0xFFFFFFFF : 0,
1034 (staticOffsets[1] + accessSize - 1 < staticLimit) ? 0xFFFFFFFF : 0,
1035 (staticOffsets[2] + accessSize - 1 < staticLimit) ? 0xFFFFFFFF : 0,
1036 (staticOffsets[3] + accessSize - 1 < staticLimit) ? 0xFFFFFFFF : 0);
1037 }
1038
1039 return CmpGE(offsets(), 0) & CmpLT(offsets() + SIMD::Int(accessSize - 1), limit());
1040 }
1041
isStaticallyInBounds(unsigned int accessSize,OutOfBoundsBehavior robustness) const1042 bool SIMD::Pointer::isStaticallyInBounds(unsigned int accessSize, OutOfBoundsBehavior robustness) const
1043 {
1044 if(hasDynamicOffsets)
1045 {
1046 return false;
1047 }
1048
1049 if(hasDynamicLimit)
1050 {
1051 if(hasStaticEqualOffsets() || hasStaticSequentialOffsets(accessSize))
1052 {
1053 switch(robustness)
1054 {
1055 case OutOfBoundsBehavior::UndefinedBehavior:
1056 // With this robustness setting the application/compiler guarantees in-bounds accesses on active lanes,
1057 // but since it can't know in advance which branches are taken this must be true even for inactives lanes.
1058 return true;
1059 case OutOfBoundsBehavior::Nullify:
1060 case OutOfBoundsBehavior::RobustBufferAccess:
1061 case OutOfBoundsBehavior::UndefinedValue:
1062 return false;
1063 }
1064 }
1065 }
1066
1067 for(int i = 0; i < SIMD::Width; i++)
1068 {
1069 if(staticOffsets[i] + accessSize - 1 >= staticLimit)
1070 {
1071 return false;
1072 }
1073 }
1074
1075 return true;
1076 }
1077
limit() const1078 SIMD::Int SIMD::Pointer::limit() const
1079 {
1080 return dynamicLimit + staticLimit;
1081 }
1082
1083 // Returns true if all offsets are compile-time static and sequential
1084 // (N+0*step, N+1*step, N+2*step, N+3*step)
hasStaticSequentialOffsets(unsigned int step) const1085 bool SIMD::Pointer::hasStaticSequentialOffsets(unsigned int step) const
1086 {
1087 ASSERT_MSG(isBasePlusOffset, "No offsets for this type of pointer");
1088 if(hasDynamicOffsets)
1089 {
1090 return false;
1091 }
1092
1093 for(int i = 1; i < SIMD::Width; i++)
1094 {
1095 if(staticOffsets[i - 1] + int32_t(step) != staticOffsets[i])
1096 {
1097 return false;
1098 }
1099 }
1100
1101 return true;
1102 }
1103
1104 // Returns true if all offsets are compile-time static and equal
1105 // (N, N, N, N)
hasStaticEqualOffsets() const1106 bool SIMD::Pointer::hasStaticEqualOffsets() const
1107 {
1108 ASSERT_MSG(isBasePlusOffset, "No offsets for this type of pointer");
1109 if(hasDynamicOffsets)
1110 {
1111 return false;
1112 }
1113
1114 for(int i = 1; i < SIMD::Width; i++)
1115 {
1116 if(staticOffsets[0] != staticOffsets[i])
1117 {
1118 return false;
1119 }
1120 }
1121
1122 return true;
1123 }
1124
getUniformPointer() const1125 scalar::Pointer<Byte> SIMD::Pointer::getUniformPointer() const
1126 {
1127 #ifndef NDEBUG
1128 if(isBasePlusOffset)
1129 {
1130 SIMD::Int uniform = offsets();
1131 scalar::Int x = Extract(uniform, 0);
1132
1133 for(int i = 1; i < SIMD::Width; i++)
1134 {
1135 Assert(x == Extract(uniform, i));
1136 }
1137 }
1138 else
1139 {
1140 for(int i = 1; i < SIMD::Width; i++)
1141 {
1142 Assert(pointers[0] == pointers[i]);
1143 }
1144 }
1145 #endif
1146
1147 return getPointerForLane(0);
1148 }
1149
getPointerForLane(int lane) const1150 scalar::Pointer<Byte> SIMD::Pointer::getPointerForLane(int lane) const
1151 {
1152 if(isBasePlusOffset)
1153 {
1154 return base + Extract(offsets(), lane);
1155 }
1156 else
1157 {
1158 return pointers[lane];
1159 }
1160 }
1161
castTo(SIMD::UInt & bits) const1162 void SIMD::Pointer::castTo(SIMD::UInt &bits) const
1163 {
1164 assert(sizeof(void *) == 4);
1165 for(int i = 0; i < SIMD::Width; i++)
1166 {
1167 bits = Insert(bits, As<scalar::UInt>(pointers[i]), i);
1168 }
1169 }
1170
castTo(SIMD::UInt & lowerBits,SIMD::UInt & upperBits) const1171 void SIMD::Pointer::castTo(SIMD::UInt &lowerBits, SIMD::UInt &upperBits) const
1172 {
1173 assert(sizeof(void *) == 8);
1174 for(int i = 0; i < SIMD::Width; i++)
1175 {
1176 UInt2 address = As<UInt2>(pointers[i]);
1177 lowerBits = Insert(lowerBits, Extract(address, 0), i);
1178 upperBits = Insert(upperBits, Extract(address, 1), i);
1179 }
1180 }
1181
IfThenElse(SIMD::Int condition,const SIMD::Pointer & lhs,const SIMD::Pointer & rhs)1182 SIMD::Pointer SIMD::Pointer::IfThenElse(SIMD::Int condition, const SIMD::Pointer &lhs, const SIMD::Pointer &rhs)
1183 {
1184 std::vector<scalar::Pointer<Byte>> pointers(SIMD::Width);
1185 for(int i = 0; i < SIMD::Width; i++)
1186 {
1187 If(Extract(condition, i) != 0)
1188 {
1189 pointers[i] = lhs.getPointerForLane(i);
1190 }
1191 Else
1192 {
1193 pointers[i] = rhs.getPointerForLane(i);
1194 }
1195 }
1196
1197 return { pointers };
1198 }
1199
1200 #ifdef ENABLE_RR_PRINT
getPrintValues() const1201 std::vector<rr::Value *> SIMD::Pointer::getPrintValues() const
1202 {
1203 if(isBasePlusOffset)
1204 {
1205 return PrintValue::vals(base, offsets());
1206 }
1207 else
1208 {
1209 std::vector<Value *> vals;
1210 for(int i = 0; i < SIMD::Width; i++)
1211 {
1212 vals.push_back(RValue<scalar::Pointer<Byte>>(pointers[i]).value());
1213 }
1214 return vals;
1215 }
1216 }
1217 #endif
1218
1219 } // namespace rr
1220