1 //
2 // Copyright (C) 2018 Google, Inc.
3 //
4 // All rights reserved.
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions
8 // are met:
9 //
10 // Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 //
13 // Redistributions in binary form must reproduce the above
14 // copyright notice, this list of conditions and the following
15 // disclaimer in the documentation and/or other materials provided
16 // with the distribution.
17 //
18 // Neither the name of 3Dlabs Inc. Ltd. nor the names of its
19 // contributors may be used to endorse or promote products derived
20 // from this software without specific prior written permission.
21 //
22 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 // COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
30 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
32 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 // POSSIBILITY OF SUCH DAMAGE.
34
35 //
36 // Post-processing for SPIR-V IR, in internal form, not standard binary form.
37 //
38
39 #include <cassert>
40 #include <cstdlib>
41
42 #include <unordered_map>
43 #include <unordered_set>
44 #include <algorithm>
45
46 #include "SpvBuilder.h"
47 #include "spirv.hpp"
48
49 namespace spv {
50 #include "GLSL.std.450.h"
51 #include "GLSL.ext.KHR.h"
52 #include "GLSL.ext.EXT.h"
53 #include "GLSL.ext.AMD.h"
54 #include "GLSL.ext.NV.h"
55 #include "GLSL.ext.ARM.h"
56 #include "GLSL.ext.QCOM.h"
57 }
58
59 namespace spv {
60
61 // Hook to visit each operand type and result type of an instruction.
62 // Will be called multiple times for one instruction, once for each typed
63 // operand and the result.
postProcessType(const Instruction & inst,Id typeId)64 void Builder::postProcessType(const Instruction& inst, Id typeId)
65 {
66 // Characterize the type being questioned
67 Id basicTypeOp = getMostBasicTypeClass(typeId);
68 int width = 0;
69 if (basicTypeOp == OpTypeFloat || basicTypeOp == OpTypeInt)
70 width = getScalarTypeWidth(typeId);
71
72 // Do opcode-specific checks
73 switch (inst.getOpCode()) {
74 case OpLoad:
75 case OpStore:
76 if (basicTypeOp == OpTypeStruct) {
77 if (containsType(typeId, OpTypeInt, 8))
78 addCapability(CapabilityInt8);
79 if (containsType(typeId, OpTypeInt, 16))
80 addCapability(CapabilityInt16);
81 if (containsType(typeId, OpTypeFloat, 16))
82 addCapability(CapabilityFloat16);
83 } else {
84 StorageClass storageClass = getStorageClass(inst.getIdOperand(0));
85 if (width == 8) {
86 switch (storageClass) {
87 case StorageClassPhysicalStorageBufferEXT:
88 case StorageClassUniform:
89 case StorageClassStorageBuffer:
90 case StorageClassPushConstant:
91 break;
92 default:
93 addCapability(CapabilityInt8);
94 break;
95 }
96 } else if (width == 16) {
97 switch (storageClass) {
98 case StorageClassPhysicalStorageBufferEXT:
99 case StorageClassUniform:
100 case StorageClassStorageBuffer:
101 case StorageClassPushConstant:
102 case StorageClassInput:
103 case StorageClassOutput:
104 break;
105 default:
106 if (basicTypeOp == OpTypeInt)
107 addCapability(CapabilityInt16);
108 if (basicTypeOp == OpTypeFloat)
109 addCapability(CapabilityFloat16);
110 break;
111 }
112 }
113 }
114 break;
115 case OpCopyObject:
116 break;
117 case OpFConvert:
118 case OpSConvert:
119 case OpUConvert:
120 // Look for any 8/16-bit storage capabilities. If there are none, assume that
121 // the convert instruction requires the Float16/Int8/16 capability.
122 if (containsType(typeId, OpTypeFloat, 16) || containsType(typeId, OpTypeInt, 16)) {
123 bool foundStorage = false;
124 for (auto it = capabilities.begin(); it != capabilities.end(); ++it) {
125 spv::Capability cap = *it;
126 if (cap == spv::CapabilityStorageInputOutput16 ||
127 cap == spv::CapabilityStoragePushConstant16 ||
128 cap == spv::CapabilityStorageUniformBufferBlock16 ||
129 cap == spv::CapabilityStorageUniform16) {
130 foundStorage = true;
131 break;
132 }
133 }
134 if (!foundStorage) {
135 if (containsType(typeId, OpTypeFloat, 16))
136 addCapability(CapabilityFloat16);
137 if (containsType(typeId, OpTypeInt, 16))
138 addCapability(CapabilityInt16);
139 }
140 }
141 if (containsType(typeId, OpTypeInt, 8)) {
142 bool foundStorage = false;
143 for (auto it = capabilities.begin(); it != capabilities.end(); ++it) {
144 spv::Capability cap = *it;
145 if (cap == spv::CapabilityStoragePushConstant8 ||
146 cap == spv::CapabilityUniformAndStorageBuffer8BitAccess ||
147 cap == spv::CapabilityStorageBuffer8BitAccess) {
148 foundStorage = true;
149 break;
150 }
151 }
152 if (!foundStorage) {
153 addCapability(CapabilityInt8);
154 }
155 }
156 break;
157 case OpExtInst:
158 switch (inst.getImmediateOperand(1)) {
159 case GLSLstd450Frexp:
160 case GLSLstd450FrexpStruct:
161 if (getSpvVersion() < spv::Spv_1_3 && containsType(typeId, OpTypeInt, 16))
162 addExtension(spv::E_SPV_AMD_gpu_shader_int16);
163 break;
164 case GLSLstd450InterpolateAtCentroid:
165 case GLSLstd450InterpolateAtSample:
166 case GLSLstd450InterpolateAtOffset:
167 if (getSpvVersion() < spv::Spv_1_3 && containsType(typeId, OpTypeFloat, 16))
168 addExtension(spv::E_SPV_AMD_gpu_shader_half_float);
169 break;
170 default:
171 break;
172 }
173 break;
174 case OpAccessChain:
175 case OpPtrAccessChain:
176 if (isPointerType(typeId))
177 break;
178 if (basicTypeOp == OpTypeInt) {
179 if (width == 16)
180 addCapability(CapabilityInt16);
181 else if (width == 8)
182 addCapability(CapabilityInt8);
183 }
184 break;
185 default:
186 if (basicTypeOp == OpTypeInt) {
187 if (width == 16)
188 addCapability(CapabilityInt16);
189 else if (width == 8)
190 addCapability(CapabilityInt8);
191 else if (width == 64)
192 addCapability(CapabilityInt64);
193 } else if (basicTypeOp == OpTypeFloat) {
194 if (width == 16)
195 addCapability(CapabilityFloat16);
196 else if (width == 64)
197 addCapability(CapabilityFloat64);
198 }
199 break;
200 }
201 }
202
203 // Called for each instruction that resides in a block.
postProcess(Instruction & inst)204 void Builder::postProcess(Instruction& inst)
205 {
206 // Add capabilities based simply on the opcode.
207 switch (inst.getOpCode()) {
208 case OpExtInst:
209 switch (inst.getImmediateOperand(1)) {
210 case GLSLstd450InterpolateAtCentroid:
211 case GLSLstd450InterpolateAtSample:
212 case GLSLstd450InterpolateAtOffset:
213 addCapability(CapabilityInterpolationFunction);
214 break;
215 default:
216 break;
217 }
218 break;
219 case OpDPdxFine:
220 case OpDPdyFine:
221 case OpFwidthFine:
222 case OpDPdxCoarse:
223 case OpDPdyCoarse:
224 case OpFwidthCoarse:
225 addCapability(CapabilityDerivativeControl);
226 break;
227
228 case OpImageQueryLod:
229 case OpImageQuerySize:
230 case OpImageQuerySizeLod:
231 case OpImageQuerySamples:
232 case OpImageQueryLevels:
233 addCapability(CapabilityImageQuery);
234 break;
235
236 case OpGroupNonUniformPartitionNV:
237 addExtension(E_SPV_NV_shader_subgroup_partitioned);
238 addCapability(CapabilityGroupNonUniformPartitionedNV);
239 break;
240
241 case OpLoad:
242 case OpStore:
243 {
244 // For any load/store to a PhysicalStorageBufferEXT, walk the accesschain
245 // index list to compute the misalignment. The pre-existing alignment value
246 // (set via Builder::AccessChain::alignment) only accounts for the base of
247 // the reference type and any scalar component selection in the accesschain,
248 // and this function computes the rest from the SPIR-V Offset decorations.
249 Instruction *accessChain = module.getInstruction(inst.getIdOperand(0));
250 if (accessChain->getOpCode() == OpAccessChain) {
251 Instruction *base = module.getInstruction(accessChain->getIdOperand(0));
252 // Get the type of the base of the access chain. It must be a pointer type.
253 Id typeId = base->getTypeId();
254 Instruction *type = module.getInstruction(typeId);
255 assert(type->getOpCode() == OpTypePointer);
256 if (type->getImmediateOperand(0) != StorageClassPhysicalStorageBufferEXT) {
257 break;
258 }
259 // Get the pointee type.
260 typeId = type->getIdOperand(1);
261 type = module.getInstruction(typeId);
262 // Walk the index list for the access chain. For each index, find any
263 // misalignment that can apply when accessing the member/element via
264 // Offset/ArrayStride/MatrixStride decorations, and bitwise OR them all
265 // together.
266 int alignment = 0;
267 for (int i = 1; i < accessChain->getNumOperands(); ++i) {
268 Instruction *idx = module.getInstruction(accessChain->getIdOperand(i));
269 if (type->getOpCode() == OpTypeStruct) {
270 assert(idx->getOpCode() == OpConstant);
271 unsigned int c = idx->getImmediateOperand(0);
272
273 const auto function = [&](const std::unique_ptr<Instruction>& decoration) {
274 if (decoration.get()->getOpCode() == OpMemberDecorate &&
275 decoration.get()->getIdOperand(0) == typeId &&
276 decoration.get()->getImmediateOperand(1) == c &&
277 (decoration.get()->getImmediateOperand(2) == DecorationOffset ||
278 decoration.get()->getImmediateOperand(2) == DecorationMatrixStride)) {
279 alignment |= decoration.get()->getImmediateOperand(3);
280 }
281 };
282 std::for_each(decorations.begin(), decorations.end(), function);
283 // get the next member type
284 typeId = type->getIdOperand(c);
285 type = module.getInstruction(typeId);
286 } else if (type->getOpCode() == OpTypeArray ||
287 type->getOpCode() == OpTypeRuntimeArray) {
288 const auto function = [&](const std::unique_ptr<Instruction>& decoration) {
289 if (decoration.get()->getOpCode() == OpDecorate &&
290 decoration.get()->getIdOperand(0) == typeId &&
291 decoration.get()->getImmediateOperand(1) == DecorationArrayStride) {
292 alignment |= decoration.get()->getImmediateOperand(2);
293 }
294 };
295 std::for_each(decorations.begin(), decorations.end(), function);
296 // Get the element type
297 typeId = type->getIdOperand(0);
298 type = module.getInstruction(typeId);
299 } else {
300 // Once we get to any non-aggregate type, we're done.
301 break;
302 }
303 }
304 assert(inst.getNumOperands() >= 3);
305 unsigned int memoryAccess = inst.getImmediateOperand((inst.getOpCode() == OpStore) ? 2 : 1);
306 assert(memoryAccess & MemoryAccessAlignedMask);
307 static_cast<void>(memoryAccess);
308 // Compute the index of the alignment operand.
309 int alignmentIdx = 2;
310 if (inst.getOpCode() == OpStore)
311 alignmentIdx++;
312 // Merge new and old (mis)alignment
313 alignment |= inst.getImmediateOperand(alignmentIdx);
314 // Pick the LSB
315 alignment = alignment & ~(alignment & (alignment-1));
316 // update the Aligned operand
317 inst.setImmediateOperand(alignmentIdx, alignment);
318 }
319 break;
320 }
321
322 default:
323 break;
324 }
325
326 // Checks based on type
327 if (inst.getTypeId() != NoType)
328 postProcessType(inst, inst.getTypeId());
329 for (int op = 0; op < inst.getNumOperands(); ++op) {
330 if (inst.isIdOperand(op)) {
331 // In blocks, these are always result ids, but we are relying on
332 // getTypeId() to return NoType for things like OpLabel.
333 if (getTypeId(inst.getIdOperand(op)) != NoType)
334 postProcessType(inst, getTypeId(inst.getIdOperand(op)));
335 }
336 }
337 }
338
339 // comment in header
postProcessCFG()340 void Builder::postProcessCFG()
341 {
342 // reachableBlocks is the set of blockss reached via control flow, or which are
343 // unreachable continue targert or unreachable merge.
344 std::unordered_set<const Block*> reachableBlocks;
345 std::unordered_map<Block*, Block*> headerForUnreachableContinue;
346 std::unordered_set<Block*> unreachableMerges;
347 std::unordered_set<Id> unreachableDefinitions;
348 // Collect IDs defined in unreachable blocks. For each function, label the
349 // reachable blocks first. Then for each unreachable block, collect the
350 // result IDs of the instructions in it.
351 for (auto fi = module.getFunctions().cbegin(); fi != module.getFunctions().cend(); fi++) {
352 Function* f = *fi;
353 Block* entry = f->getEntryBlock();
354 inReadableOrder(entry,
355 [&reachableBlocks, &unreachableMerges, &headerForUnreachableContinue]
356 (Block* b, ReachReason why, Block* header) {
357 reachableBlocks.insert(b);
358 if (why == ReachDeadContinue) headerForUnreachableContinue[b] = header;
359 if (why == ReachDeadMerge) unreachableMerges.insert(b);
360 });
361 for (auto bi = f->getBlocks().cbegin(); bi != f->getBlocks().cend(); bi++) {
362 Block* b = *bi;
363 if (unreachableMerges.count(b) != 0 || headerForUnreachableContinue.count(b) != 0) {
364 auto ii = b->getInstructions().cbegin();
365 ++ii; // Keep potential decorations on the label.
366 for (; ii != b->getInstructions().cend(); ++ii)
367 unreachableDefinitions.insert(ii->get()->getResultId());
368 } else if (reachableBlocks.count(b) == 0) {
369 // The normal case for unreachable code. All definitions are considered dead.
370 for (auto ii = b->getInstructions().cbegin(); ii != b->getInstructions().cend(); ++ii)
371 unreachableDefinitions.insert(ii->get()->getResultId());
372 }
373 }
374 }
375
376 // Modify unreachable merge blocks and unreachable continue targets.
377 // Delete their contents.
378 for (auto mergeIter = unreachableMerges.begin(); mergeIter != unreachableMerges.end(); ++mergeIter) {
379 (*mergeIter)->rewriteAsCanonicalUnreachableMerge();
380 }
381 for (auto continueIter = headerForUnreachableContinue.begin();
382 continueIter != headerForUnreachableContinue.end();
383 ++continueIter) {
384 Block* continue_target = continueIter->first;
385 Block* header = continueIter->second;
386 continue_target->rewriteAsCanonicalUnreachableContinue(header);
387 }
388
389 // Remove unneeded decorations, for unreachable instructions
390 for (auto decorationIter = decorations.begin(); decorationIter != decorations.end();) {
391 Id decorationId = (*decorationIter)->getIdOperand(0);
392 if (unreachableDefinitions.count(decorationId) != 0) {
393 decorationIter = decorations.erase(decorationIter);
394 } else {
395 ++decorationIter;
396 }
397 }
398 }
399
400 // comment in header
postProcessFeatures()401 void Builder::postProcessFeatures() {
402 // Add per-instruction capabilities, extensions, etc.,
403
404 // Look for any 8/16 bit type in physical storage buffer class, and set the
405 // appropriate capability. This happens in createSpvVariable for other storage
406 // classes, but there isn't always a variable for physical storage buffer.
407 for (int t = 0; t < (int)groupedTypes[OpTypePointer].size(); ++t) {
408 Instruction* type = groupedTypes[OpTypePointer][t];
409 if (type->getImmediateOperand(0) == (unsigned)StorageClassPhysicalStorageBufferEXT) {
410 if (containsType(type->getIdOperand(1), OpTypeInt, 8)) {
411 addIncorporatedExtension(spv::E_SPV_KHR_8bit_storage, spv::Spv_1_5);
412 addCapability(spv::CapabilityStorageBuffer8BitAccess);
413 }
414 if (containsType(type->getIdOperand(1), OpTypeInt, 16) ||
415 containsType(type->getIdOperand(1), OpTypeFloat, 16)) {
416 addIncorporatedExtension(spv::E_SPV_KHR_16bit_storage, spv::Spv_1_3);
417 addCapability(spv::CapabilityStorageBuffer16BitAccess);
418 }
419 }
420 }
421
422 // process all block-contained instructions
423 for (auto fi = module.getFunctions().cbegin(); fi != module.getFunctions().cend(); fi++) {
424 Function* f = *fi;
425 for (auto bi = f->getBlocks().cbegin(); bi != f->getBlocks().cend(); bi++) {
426 Block* b = *bi;
427 for (auto ii = b->getInstructions().cbegin(); ii != b->getInstructions().cend(); ii++)
428 postProcess(*ii->get());
429
430 // For all local variables that contain pointers to PhysicalStorageBufferEXT, check whether
431 // there is an existing restrict/aliased decoration. If we don't find one, add Aliased as the
432 // default.
433 for (auto vi = b->getLocalVariables().cbegin(); vi != b->getLocalVariables().cend(); vi++) {
434 const Instruction& inst = *vi->get();
435 Id resultId = inst.getResultId();
436 if (containsPhysicalStorageBufferOrArray(getDerefTypeId(resultId))) {
437 bool foundDecoration = false;
438 const auto function = [&](const std::unique_ptr<Instruction>& decoration) {
439 if (decoration.get()->getIdOperand(0) == resultId &&
440 decoration.get()->getOpCode() == OpDecorate &&
441 (decoration.get()->getImmediateOperand(1) == spv::DecorationAliasedPointerEXT ||
442 decoration.get()->getImmediateOperand(1) == spv::DecorationRestrictPointerEXT)) {
443 foundDecoration = true;
444 }
445 };
446 std::for_each(decorations.begin(), decorations.end(), function);
447 if (!foundDecoration) {
448 addDecoration(resultId, spv::DecorationAliasedPointerEXT);
449 }
450 }
451 }
452 }
453 }
454
455 // If any Vulkan memory model-specific functionality is used, update the
456 // OpMemoryModel to match.
457 if (capabilities.find(spv::CapabilityVulkanMemoryModelKHR) != capabilities.end()) {
458 memoryModel = spv::MemoryModelVulkanKHR;
459 addIncorporatedExtension(spv::E_SPV_KHR_vulkan_memory_model, spv::Spv_1_5);
460 }
461
462 // Add Aliased decoration if there's more than one Workgroup Block variable.
463 if (capabilities.find(spv::CapabilityWorkgroupMemoryExplicitLayoutKHR) != capabilities.end()) {
464 assert(entryPoints.size() == 1);
465 auto &ep = entryPoints[0];
466
467 std::vector<Id> workgroup_variables;
468 for (int i = 0; i < (int)ep->getNumOperands(); i++) {
469 if (!ep->isIdOperand(i))
470 continue;
471
472 const Id id = ep->getIdOperand(i);
473 const Instruction *instr = module.getInstruction(id);
474 if (instr->getOpCode() != spv::OpVariable)
475 continue;
476
477 if (instr->getImmediateOperand(0) == spv::StorageClassWorkgroup)
478 workgroup_variables.push_back(id);
479 }
480
481 if (workgroup_variables.size() > 1) {
482 for (size_t i = 0; i < workgroup_variables.size(); i++)
483 addDecoration(workgroup_variables[i], spv::DecorationAliased);
484 }
485 }
486 }
487
488 // SPIR-V requires that any instruction consuming the result of an OpSampledImage
489 // be in the same block as the OpSampledImage instruction. This pass goes finds
490 // uses of OpSampledImage where that is not the case and duplicates the
491 // OpSampledImage to be immediately before the instruction that consumes it.
492 // The old OpSampledImage is left in place, potentially with no users.
postProcessSamplers()493 void Builder::postProcessSamplers()
494 {
495 // first, find all OpSampledImage instructions and store them in a map.
496 std::map<Id, Instruction*> sampledImageInstrs;
497 for (auto f: module.getFunctions()) {
498 for (auto b: f->getBlocks()) {
499 for (auto &i: b->getInstructions()) {
500 if (i->getOpCode() == spv::OpSampledImage) {
501 sampledImageInstrs[i->getResultId()] = i.get();
502 }
503 }
504 }
505 }
506 // next find all uses of the given ids and rewrite them if needed.
507 for (auto f: module.getFunctions()) {
508 for (auto b: f->getBlocks()) {
509 auto &instrs = b->getInstructions();
510 for (size_t idx = 0; idx < instrs.size(); idx++) {
511 Instruction *i = instrs[idx].get();
512 for (int opnum = 0; opnum < i->getNumOperands(); opnum++) {
513 // Is this operand of the current instruction the result of an OpSampledImage?
514 if (i->isIdOperand(opnum) &&
515 sampledImageInstrs.count(i->getIdOperand(opnum)))
516 {
517 Instruction *opSampImg = sampledImageInstrs[i->getIdOperand(opnum)];
518 if (i->getBlock() != opSampImg->getBlock()) {
519 Instruction *newInstr = new Instruction(getUniqueId(),
520 opSampImg->getTypeId(),
521 spv::OpSampledImage);
522 newInstr->addIdOperand(opSampImg->getIdOperand(0));
523 newInstr->addIdOperand(opSampImg->getIdOperand(1));
524 newInstr->setBlock(b);
525
526 // rewrite the user of the OpSampledImage to use the new instruction.
527 i->setIdOperand(opnum, newInstr->getResultId());
528 // insert the new OpSampledImage right before the current instruction.
529 instrs.insert(instrs.begin() + idx,
530 std::unique_ptr<Instruction>(newInstr));
531 idx++;
532 }
533 }
534 }
535 }
536 }
537 }
538 }
539
540 // comment in header
postProcess(bool compileOnly)541 void Builder::postProcess(bool compileOnly)
542 {
543 // postProcessCFG needs an entrypoint to determine what is reachable, but if we are not creating an "executable" shader, we don't have an entrypoint
544 if (!compileOnly)
545 postProcessCFG();
546
547 postProcessFeatures();
548 postProcessSamplers();
549 }
550
551 } // end spv namespace
552