xref: /aosp_15_r20/external/swiftshader/src/Pipeline/SpirvShaderGroup.cpp (revision 03ce13f70fcc45d86ee91b7ee4cab1936a95046e)
1 // Copyright 2019 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "SpirvShader.hpp"
16 
17 #include <spirv/unified1/spirv.hpp>
18 
19 namespace sw {
20 
21 // Template function to perform a binary group operation.
22 // |TYPE| should be the type of the binary operation (as a SIMD::<ScalarType>).
23 // |I| should be a type suitable to initialize the identity value.
24 // |APPLY| should be a callable object that takes two RValue<TYPE> parameters
25 // and returns a new RValue<TYPE> corresponding to the operation's result.
26 template<typename TYPE, typename I, typename APPLY>
BinaryOperation(spv::GroupOperation operation,RValue<SIMD::UInt> value,RValue<SIMD::UInt> mask,const I identityValue,APPLY && apply)27 static RValue<TYPE> BinaryOperation(
28     spv::GroupOperation operation,
29     RValue<SIMD::UInt> value,
30     RValue<SIMD::UInt> mask,
31     const I identityValue,
32     APPLY &&apply)
33 {
34 	auto identity = TYPE(identityValue);
35 	SIMD::UInt v_uint = (value & mask) | (As<SIMD::UInt>(identity) & ~mask);
36 	TYPE v = As<TYPE>(v_uint);
37 
38 	switch(operation)
39 	{
40 	case spv::GroupOperationReduce:
41 		{
42 			// NOTE: floating-point add and multiply are not really commutative so
43 			//       ensure that all values in the final lanes are identical
44 			TYPE v2 = apply(v.xxzz, v.yyww);  // [xy]   [xy]   [zw]   [zw]
45 			return apply(v2.xxxx, v2.zzzz);   // [xyzw] [xyzw] [xyzw] [xyzw]
46 		}
47 		break;
48 	case spv::GroupOperationInclusiveScan:
49 		{
50 			TYPE v2 = apply(v, Shuffle(v, identity, 0x4012) /* [id, v.y, v.z, v.w] */);   // [x] [xy] [yz]  [zw]
51 			return apply(v2, Shuffle(v2, identity, 0x4401) /* [id,  id, v2.x, v2.y] */);  // [x] [xy] [xyz] [xyzw]
52 		}
53 		break;
54 	case spv::GroupOperationExclusiveScan:
55 		{
56 			TYPE v2 = apply(v, Shuffle(v, identity, 0x4012) /* [id, v.y, v.z, v.w] */);      // [x] [xy] [yz]  [zw]
57 			TYPE v3 = apply(v2, Shuffle(v2, identity, 0x4401) /* [id,  id, v2.x, v2.y] */);  // [x] [xy] [xyz] [xyzw]
58 			return Shuffle(v3, identity, 0x4012 /* [id, v3.x, v3.y, v3.z] */);               // [i] [x]  [xy]  [xyz]
59 		}
60 		break;
61 	default:
62 		UNSUPPORTED("Group operation: %d", operation);
63 		return identity;
64 	}
65 }
66 
EmitGroupNonUniform(InsnIterator insn)67 void SpirvEmitter::EmitGroupNonUniform(InsnIterator insn)
68 {
69 	ASSERT(SIMD::Width == 4);  // EmitGroupNonUniform makes many assumptions that the SIMD vector width is 4
70 
71 	auto &type = shader.getType(Type::ID(insn.word(1)));
72 	Object::ID resultId = insn.word(2);
73 	auto scope = spv::Scope(shader.GetConstScalarInt(insn.word(3)));
74 	ASSERT_MSG(scope == spv::ScopeSubgroup, "Scope for Non Uniform Group Operations must be Subgroup for Vulkan 1.1");
75 
76 	auto &dst = createIntermediate(resultId, type.componentCount);
77 
78 	switch(insn.opcode())
79 	{
80 	case spv::OpGroupNonUniformElect:
81 		{
82 			// Result is true only in the active invocation with the lowest id
83 			// in the group, otherwise result is false.
84 			SIMD::Int active = activeLaneMask();  // Considers helper invocations active. See b/151137030
85 			// TODO: Would be nice if we could write this as:
86 			//   elect = active & ~(active.Oxyz | active.OOxy | active.OOOx)
87 			auto v0111 = SIMD::Int(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
88 			auto elect = active & ~(v0111 & (active.xxyz | active.xxxy | active.xxxx));
89 			dst.move(0, elect);
90 		}
91 		break;
92 
93 	case spv::OpGroupNonUniformAll:
94 		{
95 			Operand predicate(shader, *this, insn.word(4));
96 			dst.move(0, AndAll(predicate.UInt(0) | ~As<SIMD::UInt>(activeLaneMask())));  // Considers helper invocations active. See b/151137030
97 		}
98 		break;
99 
100 	case spv::OpGroupNonUniformAny:
101 		{
102 			Operand predicate(shader, *this, insn.word(4));
103 			dst.move(0, OrAll(predicate.UInt(0) & As<SIMD::UInt>(activeLaneMask())));  // Considers helper invocations active. See b/151137030
104 		}
105 		break;
106 
107 	case spv::OpGroupNonUniformAllEqual:
108 		{
109 			Operand value(shader, *this, insn.word(4));
110 			auto res = SIMD::UInt(0xffffffff);
111 			SIMD::UInt active = As<SIMD::UInt>(activeLaneMask());  // Considers helper invocations active. See b/151137030
112 			SIMD::UInt inactive = ~active;
113 			for(auto i = 0u; i < type.componentCount; i++)
114 			{
115 				SIMD::UInt v = value.UInt(i) & active;
116 				SIMD::UInt filled = v;
117 				for(int j = 0; j < SIMD::Width - 1; j++)
118 				{
119 					filled |= filled.yzwx & inactive;  // Populate inactive 'holes' with a live value
120 				}
121 				res &= AndAll(CmpEQ(filled.xyzw, filled.yzwx));
122 			}
123 			dst.move(0, res);
124 		}
125 		break;
126 
127 	case spv::OpGroupNonUniformBroadcast:
128 		{
129 			auto valueId = Object::ID(insn.word(4));
130 			auto idId = Object::ID(insn.word(5));
131 			Operand value(shader, *this, valueId);
132 
133 			// Decide between the fast path for constants and the slow path for
134 			// intermediates.
135 			if(shader.getObject(idId).kind == Object::Kind::Constant)
136 			{
137 				auto id = SIMD::Int(shader.GetConstScalarInt(insn.word(5)));
138 				auto mask = CmpEQ(id, SIMD::Int(0, 1, 2, 3));
139 				for(auto i = 0u; i < type.componentCount; i++)
140 				{
141 					dst.move(i, OrAll(value.Int(i) & mask));
142 				}
143 			}
144 			else
145 			{
146 				Operand id(shader, *this, idId);
147 
148 				SIMD::UInt active = As<SIMD::UInt>(activeLaneMask());  // Considers helper invocations active. See b/151137030
149 				SIMD::UInt inactive = ~active;
150 				SIMD::UInt filled = id.UInt(0) & active;
151 
152 				for(int j = 0; j < SIMD::Width - 1; j++)
153 				{
154 					filled |= filled.yzwx & inactive;  // Populate inactive 'holes' with a live value
155 				}
156 
157 				auto mask = CmpEQ(filled, SIMD::UInt(0, 1, 2, 3));
158 
159 				for(uint32_t i = 0u; i < type.componentCount; i++)
160 				{
161 					dst.move(i, OrAll(value.UInt(i) & mask));
162 				}
163 			}
164 		}
165 		break;
166 
167 	case spv::OpGroupNonUniformBroadcastFirst:
168 		{
169 			auto valueId = Object::ID(insn.word(4));
170 			Operand value(shader, *this, valueId);
171 			// Result is true only in the active invocation with the lowest id
172 			// in the group, otherwise result is false.
173 			SIMD::Int active = activeLaneMask();  // Considers helper invocations active. See b/151137030
174 			// TODO: Would be nice if we could write this as:
175 			//   elect = active & ~(active.Oxyz | active.OOxy | active.OOOx)
176 			auto v0111 = SIMD::Int(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
177 			auto elect = active & ~(v0111 & (active.xxyz | active.xxxy | active.xxxx));
178 			for(auto i = 0u; i < type.componentCount; i++)
179 			{
180 				dst.move(i, OrAll(value.Int(i) & elect));
181 			}
182 		}
183 		break;
184 
185 	case spv::OpGroupNonUniformQuadBroadcast:
186 		{
187 			auto valueId = Object::ID(insn.word(4));
188 			Operand value(shader, *this, valueId);
189 
190 			ASSERT(shader.getType(shader.getObject(insn.word(5))).componentCount == 1);
191 			auto indexId = Object::ID(insn.word(5));
192 			SIMD::Int index = Operand(shader, *this, indexId).Int(0);
193 
194 			SIMD::Int active = activeLaneMask();
195 			// Populate all lanes in index with the same value. Index is required to be
196 			// uniform per the SPIR-V spec, so all active lanes should be identical.
197 			index = OrAll(active & index);
198 			SIMD::Int mask = CmpEQ(index, SIMD::Int(0, 1, 2, 3));
199 
200 			for(auto i = 0u; i < type.componentCount; i++)
201 			{
202 				dst.move(i, OrAll(value.Int(i) & mask));
203 			}
204 		}
205 		break;
206 
207 	case spv::OpGroupNonUniformQuadSwap:
208 		{
209 			auto valueId = Object::ID(insn.word(4));
210 			// SPIR-V spec: Drection must be a scalar of integer type and come from a constant instruction
211 			int direction = shader.GetConstScalarInt(insn.word(5));
212 
213 			Operand value(shader, *this, valueId);
214 			for(auto i = 0u; i < type.componentCount; i++)
215 			{
216 				SIMD::Int v = value.Int(i);
217 				switch(direction)
218 				{
219 				case 0:  // Horizontal
220 					dst.move(i, v.yxwz);
221 					break;
222 				case 1:  // Vertical
223 					dst.move(i, v.zwxy);
224 					break;
225 				case 2:  // Diagonal
226 					dst.move(i, v.wzyx);
227 					break;
228 				default:
229 					// The SPIR-V spec doesn't define what happens in this case,
230 					// so the result in undefined.
231 					UNSUPPORTED("SPIR-V does not define a OpGroupNonUniformQuadSwap result for a direction of %d", direction);
232 					break;
233 				}
234 			}
235 		}
236 		break;
237 
238 	case spv::OpGroupNonUniformBallot:
239 		{
240 			ASSERT(type.componentCount == 4);
241 			Operand predicate(shader, *this, insn.word(4));
242 			dst.move(0, SIMD::Int(SignMask(activeLaneMask() & predicate.Int(0))));  // Considers helper invocations active. See b/151137030
243 			dst.move(1, SIMD::Int(0));
244 			dst.move(2, SIMD::Int(0));
245 			dst.move(3, SIMD::Int(0));
246 		}
247 		break;
248 
249 	case spv::OpGroupNonUniformInverseBallot:
250 		{
251 			auto valueId = Object::ID(insn.word(4));
252 			ASSERT(type.componentCount == 1);
253 			ASSERT(shader.getObjectType(valueId).componentCount == 4);
254 			Operand value(shader, *this, valueId);
255 			auto bit = (value.Int(0) >> SIMD::Int(0, 1, 2, 3)) & SIMD::Int(1);
256 			dst.move(0, -bit);
257 		}
258 		break;
259 
260 	case spv::OpGroupNonUniformBallotBitExtract:
261 		{
262 			auto valueId = Object::ID(insn.word(4));
263 			auto indexId = Object::ID(insn.word(5));
264 			ASSERT(type.componentCount == 1);
265 			ASSERT(shader.getObjectType(valueId).componentCount == 4);
266 			ASSERT(shader.getObjectType(indexId).componentCount == 1);
267 			Operand value(shader, *this, valueId);
268 			Operand index(shader, *this, indexId);
269 			auto vecIdx = index.Int(0) / SIMD::Int(32);
270 			auto bitIdx = index.Int(0) & SIMD::Int(31);
271 			auto bits = (value.Int(0) & CmpEQ(vecIdx, SIMD::Int(0))) |
272 			            (value.Int(1) & CmpEQ(vecIdx, SIMD::Int(1))) |
273 			            (value.Int(2) & CmpEQ(vecIdx, SIMD::Int(2))) |
274 			            (value.Int(3) & CmpEQ(vecIdx, SIMD::Int(3)));
275 			dst.move(0, -((bits >> bitIdx) & SIMD::Int(1)));
276 		}
277 		break;
278 
279 	case spv::OpGroupNonUniformBallotBitCount:
280 		{
281 			auto operation = spv::GroupOperation(insn.word(4));
282 			auto valueId = Object::ID(insn.word(5));
283 			ASSERT(type.componentCount == 1);
284 			ASSERT(shader.getObjectType(valueId).componentCount == 4);
285 			Operand value(shader, *this, valueId);
286 			switch(operation)
287 			{
288 			case spv::GroupOperationReduce:
289 				dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(15)));
290 				break;
291 			case spv::GroupOperationInclusiveScan:
292 				dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(1, 3, 7, 15)));
293 				break;
294 			case spv::GroupOperationExclusiveScan:
295 				dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(0, 1, 3, 7)));
296 				break;
297 			default:
298 				UNSUPPORTED("GroupOperation %d", int(operation));
299 			}
300 		}
301 		break;
302 
303 	case spv::OpGroupNonUniformBallotFindLSB:
304 		{
305 			auto valueId = Object::ID(insn.word(4));
306 			ASSERT(type.componentCount == 1);
307 			ASSERT(shader.getObjectType(valueId).componentCount == 4);
308 			Operand value(shader, *this, valueId);
309 			dst.move(0, Cttz(value.UInt(0) & SIMD::UInt(15), false));
310 		}
311 		break;
312 
313 	case spv::OpGroupNonUniformBallotFindMSB:
314 		{
315 			auto valueId = Object::ID(insn.word(4));
316 			ASSERT(type.componentCount == 1);
317 			ASSERT(shader.getObjectType(valueId).componentCount == 4);
318 			Operand value(shader, *this, valueId);
319 			dst.move(0, SIMD::UInt(31) - Ctlz(value.UInt(0) & SIMD::UInt(15), false));
320 		}
321 		break;
322 
323 	case spv::OpGroupNonUniformShuffle:
324 		{
325 			Operand value(shader, *this, insn.word(4));
326 			Operand id(shader, *this, insn.word(5));
327 			auto x = CmpEQ(SIMD::Int(0), id.Int(0));
328 			auto y = CmpEQ(SIMD::Int(1), id.Int(0));
329 			auto z = CmpEQ(SIMD::Int(2), id.Int(0));
330 			auto w = CmpEQ(SIMD::Int(3), id.Int(0));
331 			for(auto i = 0u; i < type.componentCount; i++)
332 			{
333 				SIMD::Int v = value.Int(i);
334 				dst.move(i, (x & v.xxxx) | (y & v.yyyy) | (z & v.zzzz) | (w & v.wwww));
335 			}
336 		}
337 		break;
338 
339 	case spv::OpGroupNonUniformShuffleXor:
340 		{
341 			Operand value(shader, *this, insn.word(4));
342 			Operand mask(shader, *this, insn.word(5));
343 			auto x = CmpEQ(SIMD::Int(0), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0));
344 			auto y = CmpEQ(SIMD::Int(1), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0));
345 			auto z = CmpEQ(SIMD::Int(2), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0));
346 			auto w = CmpEQ(SIMD::Int(3), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0));
347 			for(auto i = 0u; i < type.componentCount; i++)
348 			{
349 				SIMD::Int v = value.Int(i);
350 				dst.move(i, (x & v.xxxx) | (y & v.yyyy) | (z & v.zzzz) | (w & v.wwww));
351 			}
352 		}
353 		break;
354 
355 	case spv::OpGroupNonUniformShuffleUp:
356 		{
357 			Operand value(shader, *this, insn.word(4));
358 			Operand delta(shader, *this, insn.word(5));
359 			auto d0 = CmpEQ(SIMD::Int(0), delta.Int(0));
360 			auto d1 = CmpEQ(SIMD::Int(1), delta.Int(0));
361 			auto d2 = CmpEQ(SIMD::Int(2), delta.Int(0));
362 			auto d3 = CmpEQ(SIMD::Int(3), delta.Int(0));
363 			for(auto i = 0u; i < type.componentCount; i++)
364 			{
365 				SIMD::Int v = value.Int(i);
366 				dst.move(i, (d0 & v.xyzw) | (d1 & v.xxyz) | (d2 & v.xxxy) | (d3 & v.xxxx));
367 			}
368 		}
369 		break;
370 
371 	case spv::OpGroupNonUniformShuffleDown:
372 		{
373 			Operand value(shader, *this, insn.word(4));
374 			Operand delta(shader, *this, insn.word(5));
375 			auto d0 = CmpEQ(SIMD::Int(0), delta.Int(0));
376 			auto d1 = CmpEQ(SIMD::Int(1), delta.Int(0));
377 			auto d2 = CmpEQ(SIMD::Int(2), delta.Int(0));
378 			auto d3 = CmpEQ(SIMD::Int(3), delta.Int(0));
379 			for(auto i = 0u; i < type.componentCount; i++)
380 			{
381 				SIMD::Int v = value.Int(i);
382 				dst.move(i, (d0 & v.xyzw) | (d1 & v.yzww) | (d2 & v.zwww) | (d3 & v.wwww));
383 			}
384 		}
385 		break;
386 
387 	// The remaining instructions are GroupNonUniformArithmetic operations
388 	default:
389 		auto &type = shader.getType(Type::ID(insn.word(1)));
390 		auto operation = static_cast<spv::GroupOperation>(insn.word(4));
391 		Operand value(shader, *this, insn.word(5));
392 		auto mask = As<SIMD::UInt>(activeLaneMask());  // Considers helper invocations active. See b/151137030
393 
394 		for(uint32_t i = 0; i < type.componentCount; i++)
395 		{
396 			switch(insn.opcode())
397 			{
398 			case spv::OpGroupNonUniformIAdd:
399 				dst.move(i, BinaryOperation<SIMD::Int>(
400 				                operation, value.UInt(i), mask, 0,
401 				                [](auto a, auto b) { return a + b; }));
402 				break;
403 			case spv::OpGroupNonUniformFAdd:
404 				dst.move(i, BinaryOperation<SIMD::Float>(
405 				                operation, value.UInt(i), mask, 0.0f,
406 				                [](auto a, auto b) { return a + b; }));
407 				break;
408 
409 			case spv::OpGroupNonUniformIMul:
410 				dst.move(i, BinaryOperation<SIMD::Int>(
411 				                operation, value.UInt(i), mask, 1,
412 				                [](auto a, auto b) { return a * b; }));
413 				break;
414 
415 			case spv::OpGroupNonUniformFMul:
416 				dst.move(i, BinaryOperation<SIMD::Float>(
417 				                operation, value.UInt(i), mask, 1.0f,
418 				                [](auto a, auto b) { return a * b; }));
419 				break;
420 
421 			case spv::OpGroupNonUniformBitwiseAnd:
422 				dst.move(i, BinaryOperation<SIMD::UInt>(
423 				                operation, value.UInt(i), mask, ~0u,
424 				                [](auto a, auto b) { return a & b; }));
425 				break;
426 
427 			case spv::OpGroupNonUniformBitwiseOr:
428 				dst.move(i, BinaryOperation<SIMD::UInt>(
429 				                operation, value.UInt(i), mask, 0,
430 				                [](auto a, auto b) { return a | b; }));
431 				break;
432 
433 			case spv::OpGroupNonUniformBitwiseXor:
434 				dst.move(i, BinaryOperation<SIMD::UInt>(
435 				                operation, value.UInt(i), mask, 0,
436 				                [](auto a, auto b) { return a ^ b; }));
437 				break;
438 
439 			case spv::OpGroupNonUniformSMin:
440 				dst.move(i, BinaryOperation<SIMD::Int>(
441 				                operation, value.UInt(i), mask, INT32_MAX,
442 				                [](auto a, auto b) { return Min(a, b); }));
443 				break;
444 
445 			case spv::OpGroupNonUniformUMin:
446 				dst.move(i, BinaryOperation<SIMD::UInt>(
447 				                operation, value.UInt(i), mask, ~0u,
448 				                [](auto a, auto b) { return Min(a, b); }));
449 				break;
450 
451 			case spv::OpGroupNonUniformFMin:
452 				dst.move(i, BinaryOperation<SIMD::Float>(
453 				                operation, value.UInt(i), mask, SIMD::Float::infinity(),
454 				                [](auto a, auto b) { return NMin(a, b); }));
455 				break;
456 
457 			case spv::OpGroupNonUniformSMax:
458 				dst.move(i, BinaryOperation<SIMD::Int>(
459 				                operation, value.UInt(i), mask, INT32_MIN,
460 				                [](auto a, auto b) { return Max(a, b); }));
461 				break;
462 
463 			case spv::OpGroupNonUniformUMax:
464 				dst.move(i, BinaryOperation<SIMD::UInt>(
465 				                operation, value.UInt(i), mask, 0,
466 				                [](auto a, auto b) { return Max(a, b); }));
467 				break;
468 
469 			case spv::OpGroupNonUniformFMax:
470 				dst.move(i, BinaryOperation<SIMD::Float>(
471 				                operation, value.UInt(i), mask, -SIMD::Float::infinity(),
472 				                [](auto a, auto b) { return NMax(a, b); }));
473 				break;
474 
475 			case spv::OpGroupNonUniformLogicalAnd:
476 				dst.move(i, BinaryOperation<SIMD::UInt>(
477 				                operation, value.UInt(i), mask, ~0u,
478 				                [](auto a, auto b) {
479 					                SIMD::UInt zero = SIMD::UInt(0);
480 					                return CmpNEQ(a, zero) & CmpNEQ(b, zero);
481 				                }));
482 				break;
483 
484 			case spv::OpGroupNonUniformLogicalOr:
485 				dst.move(i, BinaryOperation<SIMD::UInt>(
486 				                operation, value.UInt(i), mask, 0,
487 				                [](auto a, auto b) {
488 					                SIMD::UInt zero = SIMD::UInt(0);
489 					                return CmpNEQ(a, zero) | CmpNEQ(b, zero);
490 				                }));
491 				break;
492 
493 			case spv::OpGroupNonUniformLogicalXor:
494 				dst.move(i, BinaryOperation<SIMD::UInt>(
495 				                operation, value.UInt(i), mask, 0,
496 				                [](auto a, auto b) {
497 					                SIMD::UInt zero = SIMD::UInt(0);
498 					                return CmpNEQ(a, zero) ^ CmpNEQ(b, zero);
499 				                }));
500 				break;
501 
502 			default:
503 				UNSUPPORTED("EmitGroupNonUniform op: %s", shader.OpcodeName(type.opcode()));
504 			}
505 		}
506 		break;
507 	}
508 }
509 
510 }  // namespace sw
511