1 // Copyright 2019 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "SpirvShader.hpp"
16
17 #include <spirv/unified1/spirv.hpp>
18
19 namespace sw {
20
21 // Template function to perform a binary group operation.
22 // |TYPE| should be the type of the binary operation (as a SIMD::<ScalarType>).
23 // |I| should be a type suitable to initialize the identity value.
24 // |APPLY| should be a callable object that takes two RValue<TYPE> parameters
25 // and returns a new RValue<TYPE> corresponding to the operation's result.
26 template<typename TYPE, typename I, typename APPLY>
BinaryOperation(spv::GroupOperation operation,RValue<SIMD::UInt> value,RValue<SIMD::UInt> mask,const I identityValue,APPLY && apply)27 static RValue<TYPE> BinaryOperation(
28 spv::GroupOperation operation,
29 RValue<SIMD::UInt> value,
30 RValue<SIMD::UInt> mask,
31 const I identityValue,
32 APPLY &&apply)
33 {
34 auto identity = TYPE(identityValue);
35 SIMD::UInt v_uint = (value & mask) | (As<SIMD::UInt>(identity) & ~mask);
36 TYPE v = As<TYPE>(v_uint);
37
38 switch(operation)
39 {
40 case spv::GroupOperationReduce:
41 {
42 // NOTE: floating-point add and multiply are not really commutative so
43 // ensure that all values in the final lanes are identical
44 TYPE v2 = apply(v.xxzz, v.yyww); // [xy] [xy] [zw] [zw]
45 return apply(v2.xxxx, v2.zzzz); // [xyzw] [xyzw] [xyzw] [xyzw]
46 }
47 break;
48 case spv::GroupOperationInclusiveScan:
49 {
50 TYPE v2 = apply(v, Shuffle(v, identity, 0x4012) /* [id, v.y, v.z, v.w] */); // [x] [xy] [yz] [zw]
51 return apply(v2, Shuffle(v2, identity, 0x4401) /* [id, id, v2.x, v2.y] */); // [x] [xy] [xyz] [xyzw]
52 }
53 break;
54 case spv::GroupOperationExclusiveScan:
55 {
56 TYPE v2 = apply(v, Shuffle(v, identity, 0x4012) /* [id, v.y, v.z, v.w] */); // [x] [xy] [yz] [zw]
57 TYPE v3 = apply(v2, Shuffle(v2, identity, 0x4401) /* [id, id, v2.x, v2.y] */); // [x] [xy] [xyz] [xyzw]
58 return Shuffle(v3, identity, 0x4012 /* [id, v3.x, v3.y, v3.z] */); // [i] [x] [xy] [xyz]
59 }
60 break;
61 default:
62 UNSUPPORTED("Group operation: %d", operation);
63 return identity;
64 }
65 }
66
EmitGroupNonUniform(InsnIterator insn)67 void SpirvEmitter::EmitGroupNonUniform(InsnIterator insn)
68 {
69 ASSERT(SIMD::Width == 4); // EmitGroupNonUniform makes many assumptions that the SIMD vector width is 4
70
71 auto &type = shader.getType(Type::ID(insn.word(1)));
72 Object::ID resultId = insn.word(2);
73 auto scope = spv::Scope(shader.GetConstScalarInt(insn.word(3)));
74 ASSERT_MSG(scope == spv::ScopeSubgroup, "Scope for Non Uniform Group Operations must be Subgroup for Vulkan 1.1");
75
76 auto &dst = createIntermediate(resultId, type.componentCount);
77
78 switch(insn.opcode())
79 {
80 case spv::OpGroupNonUniformElect:
81 {
82 // Result is true only in the active invocation with the lowest id
83 // in the group, otherwise result is false.
84 SIMD::Int active = activeLaneMask(); // Considers helper invocations active. See b/151137030
85 // TODO: Would be nice if we could write this as:
86 // elect = active & ~(active.Oxyz | active.OOxy | active.OOOx)
87 auto v0111 = SIMD::Int(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
88 auto elect = active & ~(v0111 & (active.xxyz | active.xxxy | active.xxxx));
89 dst.move(0, elect);
90 }
91 break;
92
93 case spv::OpGroupNonUniformAll:
94 {
95 Operand predicate(shader, *this, insn.word(4));
96 dst.move(0, AndAll(predicate.UInt(0) | ~As<SIMD::UInt>(activeLaneMask()))); // Considers helper invocations active. See b/151137030
97 }
98 break;
99
100 case spv::OpGroupNonUniformAny:
101 {
102 Operand predicate(shader, *this, insn.word(4));
103 dst.move(0, OrAll(predicate.UInt(0) & As<SIMD::UInt>(activeLaneMask()))); // Considers helper invocations active. See b/151137030
104 }
105 break;
106
107 case spv::OpGroupNonUniformAllEqual:
108 {
109 Operand value(shader, *this, insn.word(4));
110 auto res = SIMD::UInt(0xffffffff);
111 SIMD::UInt active = As<SIMD::UInt>(activeLaneMask()); // Considers helper invocations active. See b/151137030
112 SIMD::UInt inactive = ~active;
113 for(auto i = 0u; i < type.componentCount; i++)
114 {
115 SIMD::UInt v = value.UInt(i) & active;
116 SIMD::UInt filled = v;
117 for(int j = 0; j < SIMD::Width - 1; j++)
118 {
119 filled |= filled.yzwx & inactive; // Populate inactive 'holes' with a live value
120 }
121 res &= AndAll(CmpEQ(filled.xyzw, filled.yzwx));
122 }
123 dst.move(0, res);
124 }
125 break;
126
127 case spv::OpGroupNonUniformBroadcast:
128 {
129 auto valueId = Object::ID(insn.word(4));
130 auto idId = Object::ID(insn.word(5));
131 Operand value(shader, *this, valueId);
132
133 // Decide between the fast path for constants and the slow path for
134 // intermediates.
135 if(shader.getObject(idId).kind == Object::Kind::Constant)
136 {
137 auto id = SIMD::Int(shader.GetConstScalarInt(insn.word(5)));
138 auto mask = CmpEQ(id, SIMD::Int(0, 1, 2, 3));
139 for(auto i = 0u; i < type.componentCount; i++)
140 {
141 dst.move(i, OrAll(value.Int(i) & mask));
142 }
143 }
144 else
145 {
146 Operand id(shader, *this, idId);
147
148 SIMD::UInt active = As<SIMD::UInt>(activeLaneMask()); // Considers helper invocations active. See b/151137030
149 SIMD::UInt inactive = ~active;
150 SIMD::UInt filled = id.UInt(0) & active;
151
152 for(int j = 0; j < SIMD::Width - 1; j++)
153 {
154 filled |= filled.yzwx & inactive; // Populate inactive 'holes' with a live value
155 }
156
157 auto mask = CmpEQ(filled, SIMD::UInt(0, 1, 2, 3));
158
159 for(uint32_t i = 0u; i < type.componentCount; i++)
160 {
161 dst.move(i, OrAll(value.UInt(i) & mask));
162 }
163 }
164 }
165 break;
166
167 case spv::OpGroupNonUniformBroadcastFirst:
168 {
169 auto valueId = Object::ID(insn.word(4));
170 Operand value(shader, *this, valueId);
171 // Result is true only in the active invocation with the lowest id
172 // in the group, otherwise result is false.
173 SIMD::Int active = activeLaneMask(); // Considers helper invocations active. See b/151137030
174 // TODO: Would be nice if we could write this as:
175 // elect = active & ~(active.Oxyz | active.OOxy | active.OOOx)
176 auto v0111 = SIMD::Int(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
177 auto elect = active & ~(v0111 & (active.xxyz | active.xxxy | active.xxxx));
178 for(auto i = 0u; i < type.componentCount; i++)
179 {
180 dst.move(i, OrAll(value.Int(i) & elect));
181 }
182 }
183 break;
184
185 case spv::OpGroupNonUniformQuadBroadcast:
186 {
187 auto valueId = Object::ID(insn.word(4));
188 Operand value(shader, *this, valueId);
189
190 ASSERT(shader.getType(shader.getObject(insn.word(5))).componentCount == 1);
191 auto indexId = Object::ID(insn.word(5));
192 SIMD::Int index = Operand(shader, *this, indexId).Int(0);
193
194 SIMD::Int active = activeLaneMask();
195 // Populate all lanes in index with the same value. Index is required to be
196 // uniform per the SPIR-V spec, so all active lanes should be identical.
197 index = OrAll(active & index);
198 SIMD::Int mask = CmpEQ(index, SIMD::Int(0, 1, 2, 3));
199
200 for(auto i = 0u; i < type.componentCount; i++)
201 {
202 dst.move(i, OrAll(value.Int(i) & mask));
203 }
204 }
205 break;
206
207 case spv::OpGroupNonUniformQuadSwap:
208 {
209 auto valueId = Object::ID(insn.word(4));
210 // SPIR-V spec: Drection must be a scalar of integer type and come from a constant instruction
211 int direction = shader.GetConstScalarInt(insn.word(5));
212
213 Operand value(shader, *this, valueId);
214 for(auto i = 0u; i < type.componentCount; i++)
215 {
216 SIMD::Int v = value.Int(i);
217 switch(direction)
218 {
219 case 0: // Horizontal
220 dst.move(i, v.yxwz);
221 break;
222 case 1: // Vertical
223 dst.move(i, v.zwxy);
224 break;
225 case 2: // Diagonal
226 dst.move(i, v.wzyx);
227 break;
228 default:
229 // The SPIR-V spec doesn't define what happens in this case,
230 // so the result in undefined.
231 UNSUPPORTED("SPIR-V does not define a OpGroupNonUniformQuadSwap result for a direction of %d", direction);
232 break;
233 }
234 }
235 }
236 break;
237
238 case spv::OpGroupNonUniformBallot:
239 {
240 ASSERT(type.componentCount == 4);
241 Operand predicate(shader, *this, insn.word(4));
242 dst.move(0, SIMD::Int(SignMask(activeLaneMask() & predicate.Int(0)))); // Considers helper invocations active. See b/151137030
243 dst.move(1, SIMD::Int(0));
244 dst.move(2, SIMD::Int(0));
245 dst.move(3, SIMD::Int(0));
246 }
247 break;
248
249 case spv::OpGroupNonUniformInverseBallot:
250 {
251 auto valueId = Object::ID(insn.word(4));
252 ASSERT(type.componentCount == 1);
253 ASSERT(shader.getObjectType(valueId).componentCount == 4);
254 Operand value(shader, *this, valueId);
255 auto bit = (value.Int(0) >> SIMD::Int(0, 1, 2, 3)) & SIMD::Int(1);
256 dst.move(0, -bit);
257 }
258 break;
259
260 case spv::OpGroupNonUniformBallotBitExtract:
261 {
262 auto valueId = Object::ID(insn.word(4));
263 auto indexId = Object::ID(insn.word(5));
264 ASSERT(type.componentCount == 1);
265 ASSERT(shader.getObjectType(valueId).componentCount == 4);
266 ASSERT(shader.getObjectType(indexId).componentCount == 1);
267 Operand value(shader, *this, valueId);
268 Operand index(shader, *this, indexId);
269 auto vecIdx = index.Int(0) / SIMD::Int(32);
270 auto bitIdx = index.Int(0) & SIMD::Int(31);
271 auto bits = (value.Int(0) & CmpEQ(vecIdx, SIMD::Int(0))) |
272 (value.Int(1) & CmpEQ(vecIdx, SIMD::Int(1))) |
273 (value.Int(2) & CmpEQ(vecIdx, SIMD::Int(2))) |
274 (value.Int(3) & CmpEQ(vecIdx, SIMD::Int(3)));
275 dst.move(0, -((bits >> bitIdx) & SIMD::Int(1)));
276 }
277 break;
278
279 case spv::OpGroupNonUniformBallotBitCount:
280 {
281 auto operation = spv::GroupOperation(insn.word(4));
282 auto valueId = Object::ID(insn.word(5));
283 ASSERT(type.componentCount == 1);
284 ASSERT(shader.getObjectType(valueId).componentCount == 4);
285 Operand value(shader, *this, valueId);
286 switch(operation)
287 {
288 case spv::GroupOperationReduce:
289 dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(15)));
290 break;
291 case spv::GroupOperationInclusiveScan:
292 dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(1, 3, 7, 15)));
293 break;
294 case spv::GroupOperationExclusiveScan:
295 dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(0, 1, 3, 7)));
296 break;
297 default:
298 UNSUPPORTED("GroupOperation %d", int(operation));
299 }
300 }
301 break;
302
303 case spv::OpGroupNonUniformBallotFindLSB:
304 {
305 auto valueId = Object::ID(insn.word(4));
306 ASSERT(type.componentCount == 1);
307 ASSERT(shader.getObjectType(valueId).componentCount == 4);
308 Operand value(shader, *this, valueId);
309 dst.move(0, Cttz(value.UInt(0) & SIMD::UInt(15), false));
310 }
311 break;
312
313 case spv::OpGroupNonUniformBallotFindMSB:
314 {
315 auto valueId = Object::ID(insn.word(4));
316 ASSERT(type.componentCount == 1);
317 ASSERT(shader.getObjectType(valueId).componentCount == 4);
318 Operand value(shader, *this, valueId);
319 dst.move(0, SIMD::UInt(31) - Ctlz(value.UInt(0) & SIMD::UInt(15), false));
320 }
321 break;
322
323 case spv::OpGroupNonUniformShuffle:
324 {
325 Operand value(shader, *this, insn.word(4));
326 Operand id(shader, *this, insn.word(5));
327 auto x = CmpEQ(SIMD::Int(0), id.Int(0));
328 auto y = CmpEQ(SIMD::Int(1), id.Int(0));
329 auto z = CmpEQ(SIMD::Int(2), id.Int(0));
330 auto w = CmpEQ(SIMD::Int(3), id.Int(0));
331 for(auto i = 0u; i < type.componentCount; i++)
332 {
333 SIMD::Int v = value.Int(i);
334 dst.move(i, (x & v.xxxx) | (y & v.yyyy) | (z & v.zzzz) | (w & v.wwww));
335 }
336 }
337 break;
338
339 case spv::OpGroupNonUniformShuffleXor:
340 {
341 Operand value(shader, *this, insn.word(4));
342 Operand mask(shader, *this, insn.word(5));
343 auto x = CmpEQ(SIMD::Int(0), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0));
344 auto y = CmpEQ(SIMD::Int(1), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0));
345 auto z = CmpEQ(SIMD::Int(2), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0));
346 auto w = CmpEQ(SIMD::Int(3), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0));
347 for(auto i = 0u; i < type.componentCount; i++)
348 {
349 SIMD::Int v = value.Int(i);
350 dst.move(i, (x & v.xxxx) | (y & v.yyyy) | (z & v.zzzz) | (w & v.wwww));
351 }
352 }
353 break;
354
355 case spv::OpGroupNonUniformShuffleUp:
356 {
357 Operand value(shader, *this, insn.word(4));
358 Operand delta(shader, *this, insn.word(5));
359 auto d0 = CmpEQ(SIMD::Int(0), delta.Int(0));
360 auto d1 = CmpEQ(SIMD::Int(1), delta.Int(0));
361 auto d2 = CmpEQ(SIMD::Int(2), delta.Int(0));
362 auto d3 = CmpEQ(SIMD::Int(3), delta.Int(0));
363 for(auto i = 0u; i < type.componentCount; i++)
364 {
365 SIMD::Int v = value.Int(i);
366 dst.move(i, (d0 & v.xyzw) | (d1 & v.xxyz) | (d2 & v.xxxy) | (d3 & v.xxxx));
367 }
368 }
369 break;
370
371 case spv::OpGroupNonUniformShuffleDown:
372 {
373 Operand value(shader, *this, insn.word(4));
374 Operand delta(shader, *this, insn.word(5));
375 auto d0 = CmpEQ(SIMD::Int(0), delta.Int(0));
376 auto d1 = CmpEQ(SIMD::Int(1), delta.Int(0));
377 auto d2 = CmpEQ(SIMD::Int(2), delta.Int(0));
378 auto d3 = CmpEQ(SIMD::Int(3), delta.Int(0));
379 for(auto i = 0u; i < type.componentCount; i++)
380 {
381 SIMD::Int v = value.Int(i);
382 dst.move(i, (d0 & v.xyzw) | (d1 & v.yzww) | (d2 & v.zwww) | (d3 & v.wwww));
383 }
384 }
385 break;
386
387 // The remaining instructions are GroupNonUniformArithmetic operations
388 default:
389 auto &type = shader.getType(Type::ID(insn.word(1)));
390 auto operation = static_cast<spv::GroupOperation>(insn.word(4));
391 Operand value(shader, *this, insn.word(5));
392 auto mask = As<SIMD::UInt>(activeLaneMask()); // Considers helper invocations active. See b/151137030
393
394 for(uint32_t i = 0; i < type.componentCount; i++)
395 {
396 switch(insn.opcode())
397 {
398 case spv::OpGroupNonUniformIAdd:
399 dst.move(i, BinaryOperation<SIMD::Int>(
400 operation, value.UInt(i), mask, 0,
401 [](auto a, auto b) { return a + b; }));
402 break;
403 case spv::OpGroupNonUniformFAdd:
404 dst.move(i, BinaryOperation<SIMD::Float>(
405 operation, value.UInt(i), mask, 0.0f,
406 [](auto a, auto b) { return a + b; }));
407 break;
408
409 case spv::OpGroupNonUniformIMul:
410 dst.move(i, BinaryOperation<SIMD::Int>(
411 operation, value.UInt(i), mask, 1,
412 [](auto a, auto b) { return a * b; }));
413 break;
414
415 case spv::OpGroupNonUniformFMul:
416 dst.move(i, BinaryOperation<SIMD::Float>(
417 operation, value.UInt(i), mask, 1.0f,
418 [](auto a, auto b) { return a * b; }));
419 break;
420
421 case spv::OpGroupNonUniformBitwiseAnd:
422 dst.move(i, BinaryOperation<SIMD::UInt>(
423 operation, value.UInt(i), mask, ~0u,
424 [](auto a, auto b) { return a & b; }));
425 break;
426
427 case spv::OpGroupNonUniformBitwiseOr:
428 dst.move(i, BinaryOperation<SIMD::UInt>(
429 operation, value.UInt(i), mask, 0,
430 [](auto a, auto b) { return a | b; }));
431 break;
432
433 case spv::OpGroupNonUniformBitwiseXor:
434 dst.move(i, BinaryOperation<SIMD::UInt>(
435 operation, value.UInt(i), mask, 0,
436 [](auto a, auto b) { return a ^ b; }));
437 break;
438
439 case spv::OpGroupNonUniformSMin:
440 dst.move(i, BinaryOperation<SIMD::Int>(
441 operation, value.UInt(i), mask, INT32_MAX,
442 [](auto a, auto b) { return Min(a, b); }));
443 break;
444
445 case spv::OpGroupNonUniformUMin:
446 dst.move(i, BinaryOperation<SIMD::UInt>(
447 operation, value.UInt(i), mask, ~0u,
448 [](auto a, auto b) { return Min(a, b); }));
449 break;
450
451 case spv::OpGroupNonUniformFMin:
452 dst.move(i, BinaryOperation<SIMD::Float>(
453 operation, value.UInt(i), mask, SIMD::Float::infinity(),
454 [](auto a, auto b) { return NMin(a, b); }));
455 break;
456
457 case spv::OpGroupNonUniformSMax:
458 dst.move(i, BinaryOperation<SIMD::Int>(
459 operation, value.UInt(i), mask, INT32_MIN,
460 [](auto a, auto b) { return Max(a, b); }));
461 break;
462
463 case spv::OpGroupNonUniformUMax:
464 dst.move(i, BinaryOperation<SIMD::UInt>(
465 operation, value.UInt(i), mask, 0,
466 [](auto a, auto b) { return Max(a, b); }));
467 break;
468
469 case spv::OpGroupNonUniformFMax:
470 dst.move(i, BinaryOperation<SIMD::Float>(
471 operation, value.UInt(i), mask, -SIMD::Float::infinity(),
472 [](auto a, auto b) { return NMax(a, b); }));
473 break;
474
475 case spv::OpGroupNonUniformLogicalAnd:
476 dst.move(i, BinaryOperation<SIMD::UInt>(
477 operation, value.UInt(i), mask, ~0u,
478 [](auto a, auto b) {
479 SIMD::UInt zero = SIMD::UInt(0);
480 return CmpNEQ(a, zero) & CmpNEQ(b, zero);
481 }));
482 break;
483
484 case spv::OpGroupNonUniformLogicalOr:
485 dst.move(i, BinaryOperation<SIMD::UInt>(
486 operation, value.UInt(i), mask, 0,
487 [](auto a, auto b) {
488 SIMD::UInt zero = SIMD::UInt(0);
489 return CmpNEQ(a, zero) | CmpNEQ(b, zero);
490 }));
491 break;
492
493 case spv::OpGroupNonUniformLogicalXor:
494 dst.move(i, BinaryOperation<SIMD::UInt>(
495 operation, value.UInt(i), mask, 0,
496 [](auto a, auto b) {
497 SIMD::UInt zero = SIMD::UInt(0);
498 return CmpNEQ(a, zero) ^ CmpNEQ(b, zero);
499 }));
500 break;
501
502 default:
503 UNSUPPORTED("EmitGroupNonUniform op: %s", shader.OpcodeName(type.opcode()));
504 }
505 }
506 break;
507 }
508 }
509
510 } // namespace sw
511