xref: /aosp_15_r20/external/angle/third_party/glslang/src/SPIRV/SpvPostProcess.cpp (revision 8975f5c5ed3d1c378011245431ada316dfb6f244)
1 //
2 // Copyright (C) 2018 Google, Inc.
3 //
4 // All rights reserved.
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions
8 // are met:
9 //
10 //    Redistributions of source code must retain the above copyright
11 //    notice, this list of conditions and the following disclaimer.
12 //
13 //    Redistributions in binary form must reproduce the above
14 //    copyright notice, this list of conditions and the following
15 //    disclaimer in the documentation and/or other materials provided
16 //    with the distribution.
17 //
18 //    Neither the name of 3Dlabs Inc. Ltd. nor the names of its
19 //    contributors may be used to endorse or promote products derived
20 //    from this software without specific prior written permission.
21 //
22 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 // COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
30 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
32 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 // POSSIBILITY OF SUCH DAMAGE.
34 
35 //
36 // Post-processing for SPIR-V IR, in internal form, not standard binary form.
37 //
38 
39 #include <cassert>
40 #include <cstdlib>
41 
42 #include <unordered_map>
43 #include <unordered_set>
44 #include <algorithm>
45 
46 #include "SpvBuilder.h"
47 #include "spirv.hpp"
48 
49 namespace spv {
50     #include "GLSL.std.450.h"
51     #include "GLSL.ext.KHR.h"
52     #include "GLSL.ext.EXT.h"
53     #include "GLSL.ext.AMD.h"
54     #include "GLSL.ext.NV.h"
55     #include "GLSL.ext.ARM.h"
56     #include "GLSL.ext.QCOM.h"
57 }
58 
59 namespace spv {
60 
61 // Hook to visit each operand type and result type of an instruction.
62 // Will be called multiple times for one instruction, once for each typed
63 // operand and the result.
postProcessType(const Instruction & inst,Id typeId)64 void Builder::postProcessType(const Instruction& inst, Id typeId)
65 {
66     // Characterize the type being questioned
67     Id basicTypeOp = getMostBasicTypeClass(typeId);
68     int width = 0;
69     if (basicTypeOp == OpTypeFloat || basicTypeOp == OpTypeInt)
70         width = getScalarTypeWidth(typeId);
71 
72     // Do opcode-specific checks
73     switch (inst.getOpCode()) {
74     case OpLoad:
75     case OpStore:
76         if (basicTypeOp == OpTypeStruct) {
77             if (containsType(typeId, OpTypeInt, 8))
78                 addCapability(CapabilityInt8);
79             if (containsType(typeId, OpTypeInt, 16))
80                 addCapability(CapabilityInt16);
81             if (containsType(typeId, OpTypeFloat, 16))
82                 addCapability(CapabilityFloat16);
83         } else {
84             StorageClass storageClass = getStorageClass(inst.getIdOperand(0));
85             if (width == 8) {
86                 switch (storageClass) {
87                 case StorageClassPhysicalStorageBufferEXT:
88                 case StorageClassUniform:
89                 case StorageClassStorageBuffer:
90                 case StorageClassPushConstant:
91                     break;
92                 default:
93                     addCapability(CapabilityInt8);
94                     break;
95                 }
96             } else if (width == 16) {
97                 switch (storageClass) {
98                 case StorageClassPhysicalStorageBufferEXT:
99                 case StorageClassUniform:
100                 case StorageClassStorageBuffer:
101                 case StorageClassPushConstant:
102                 case StorageClassInput:
103                 case StorageClassOutput:
104                     break;
105                 default:
106                     if (basicTypeOp == OpTypeInt)
107                         addCapability(CapabilityInt16);
108                     if (basicTypeOp == OpTypeFloat)
109                         addCapability(CapabilityFloat16);
110                     break;
111                 }
112             }
113         }
114         break;
115     case OpCopyObject:
116         break;
117     case OpFConvert:
118     case OpSConvert:
119     case OpUConvert:
120         // Look for any 8/16-bit storage capabilities. If there are none, assume that
121         // the convert instruction requires the Float16/Int8/16 capability.
122         if (containsType(typeId, OpTypeFloat, 16) || containsType(typeId, OpTypeInt, 16)) {
123             bool foundStorage = false;
124             for (auto it = capabilities.begin(); it != capabilities.end(); ++it) {
125                 spv::Capability cap = *it;
126                 if (cap == spv::CapabilityStorageInputOutput16 ||
127                     cap == spv::CapabilityStoragePushConstant16 ||
128                     cap == spv::CapabilityStorageUniformBufferBlock16 ||
129                     cap == spv::CapabilityStorageUniform16) {
130                     foundStorage = true;
131                     break;
132                 }
133             }
134             if (!foundStorage) {
135                 if (containsType(typeId, OpTypeFloat, 16))
136                     addCapability(CapabilityFloat16);
137                 if (containsType(typeId, OpTypeInt, 16))
138                     addCapability(CapabilityInt16);
139             }
140         }
141         if (containsType(typeId, OpTypeInt, 8)) {
142             bool foundStorage = false;
143             for (auto it = capabilities.begin(); it != capabilities.end(); ++it) {
144                 spv::Capability cap = *it;
145                 if (cap == spv::CapabilityStoragePushConstant8 ||
146                     cap == spv::CapabilityUniformAndStorageBuffer8BitAccess ||
147                     cap == spv::CapabilityStorageBuffer8BitAccess) {
148                     foundStorage = true;
149                     break;
150                 }
151             }
152             if (!foundStorage) {
153                 addCapability(CapabilityInt8);
154             }
155         }
156         break;
157     case OpExtInst:
158         switch (inst.getImmediateOperand(1)) {
159         case GLSLstd450Frexp:
160         case GLSLstd450FrexpStruct:
161             if (getSpvVersion() < spv::Spv_1_3 && containsType(typeId, OpTypeInt, 16))
162                 addExtension(spv::E_SPV_AMD_gpu_shader_int16);
163             break;
164         case GLSLstd450InterpolateAtCentroid:
165         case GLSLstd450InterpolateAtSample:
166         case GLSLstd450InterpolateAtOffset:
167             if (getSpvVersion() < spv::Spv_1_3 && containsType(typeId, OpTypeFloat, 16))
168                 addExtension(spv::E_SPV_AMD_gpu_shader_half_float);
169             break;
170         default:
171             break;
172         }
173         break;
174     case OpAccessChain:
175     case OpPtrAccessChain:
176         if (isPointerType(typeId))
177             break;
178         if (basicTypeOp == OpTypeInt) {
179             if (width == 16)
180                 addCapability(CapabilityInt16);
181             else if (width == 8)
182                 addCapability(CapabilityInt8);
183         }
184         break;
185     default:
186         if (basicTypeOp == OpTypeInt) {
187             if (width == 16)
188                 addCapability(CapabilityInt16);
189             else if (width == 8)
190                 addCapability(CapabilityInt8);
191             else if (width == 64)
192                 addCapability(CapabilityInt64);
193         } else if (basicTypeOp == OpTypeFloat) {
194             if (width == 16)
195                 addCapability(CapabilityFloat16);
196             else if (width == 64)
197                 addCapability(CapabilityFloat64);
198         }
199         break;
200     }
201 }
202 
203 // Called for each instruction that resides in a block.
postProcess(Instruction & inst)204 void Builder::postProcess(Instruction& inst)
205 {
206     // Add capabilities based simply on the opcode.
207     switch (inst.getOpCode()) {
208     case OpExtInst:
209         switch (inst.getImmediateOperand(1)) {
210         case GLSLstd450InterpolateAtCentroid:
211         case GLSLstd450InterpolateAtSample:
212         case GLSLstd450InterpolateAtOffset:
213             addCapability(CapabilityInterpolationFunction);
214             break;
215         default:
216             break;
217         }
218         break;
219     case OpDPdxFine:
220     case OpDPdyFine:
221     case OpFwidthFine:
222     case OpDPdxCoarse:
223     case OpDPdyCoarse:
224     case OpFwidthCoarse:
225         addCapability(CapabilityDerivativeControl);
226         break;
227 
228     case OpImageQueryLod:
229     case OpImageQuerySize:
230     case OpImageQuerySizeLod:
231     case OpImageQuerySamples:
232     case OpImageQueryLevels:
233         addCapability(CapabilityImageQuery);
234         break;
235 
236     case OpGroupNonUniformPartitionNV:
237         addExtension(E_SPV_NV_shader_subgroup_partitioned);
238         addCapability(CapabilityGroupNonUniformPartitionedNV);
239         break;
240 
241     case OpLoad:
242     case OpStore:
243         {
244             // For any load/store to a PhysicalStorageBufferEXT, walk the accesschain
245             // index list to compute the misalignment. The pre-existing alignment value
246             // (set via Builder::AccessChain::alignment) only accounts for the base of
247             // the reference type and any scalar component selection in the accesschain,
248             // and this function computes the rest from the SPIR-V Offset decorations.
249             Instruction *accessChain = module.getInstruction(inst.getIdOperand(0));
250             if (accessChain->getOpCode() == OpAccessChain) {
251                 Instruction *base = module.getInstruction(accessChain->getIdOperand(0));
252                 // Get the type of the base of the access chain. It must be a pointer type.
253                 Id typeId = base->getTypeId();
254                 Instruction *type = module.getInstruction(typeId);
255                 assert(type->getOpCode() == OpTypePointer);
256                 if (type->getImmediateOperand(0) != StorageClassPhysicalStorageBufferEXT) {
257                     break;
258                 }
259                 // Get the pointee type.
260                 typeId = type->getIdOperand(1);
261                 type = module.getInstruction(typeId);
262                 // Walk the index list for the access chain. For each index, find any
263                 // misalignment that can apply when accessing the member/element via
264                 // Offset/ArrayStride/MatrixStride decorations, and bitwise OR them all
265                 // together.
266                 int alignment = 0;
267                 for (int i = 1; i < accessChain->getNumOperands(); ++i) {
268                     Instruction *idx = module.getInstruction(accessChain->getIdOperand(i));
269                     if (type->getOpCode() == OpTypeStruct) {
270                         assert(idx->getOpCode() == OpConstant);
271                         unsigned int c = idx->getImmediateOperand(0);
272 
273                         const auto function = [&](const std::unique_ptr<Instruction>& decoration) {
274                             if (decoration.get()->getOpCode() == OpMemberDecorate &&
275                                 decoration.get()->getIdOperand(0) == typeId &&
276                                 decoration.get()->getImmediateOperand(1) == c &&
277                                 (decoration.get()->getImmediateOperand(2) == DecorationOffset ||
278                                  decoration.get()->getImmediateOperand(2) == DecorationMatrixStride)) {
279                                 alignment |= decoration.get()->getImmediateOperand(3);
280                             }
281                         };
282                         std::for_each(decorations.begin(), decorations.end(), function);
283                         // get the next member type
284                         typeId = type->getIdOperand(c);
285                         type = module.getInstruction(typeId);
286                     } else if (type->getOpCode() == OpTypeArray ||
287                                type->getOpCode() == OpTypeRuntimeArray) {
288                         const auto function = [&](const std::unique_ptr<Instruction>& decoration) {
289                             if (decoration.get()->getOpCode() == OpDecorate &&
290                                 decoration.get()->getIdOperand(0) == typeId &&
291                                 decoration.get()->getImmediateOperand(1) == DecorationArrayStride) {
292                                 alignment |= decoration.get()->getImmediateOperand(2);
293                             }
294                         };
295                         std::for_each(decorations.begin(), decorations.end(), function);
296                         // Get the element type
297                         typeId = type->getIdOperand(0);
298                         type = module.getInstruction(typeId);
299                     } else {
300                         // Once we get to any non-aggregate type, we're done.
301                         break;
302                     }
303                 }
304                 assert(inst.getNumOperands() >= 3);
305                 unsigned int memoryAccess = inst.getImmediateOperand((inst.getOpCode() == OpStore) ? 2 : 1);
306                 assert(memoryAccess & MemoryAccessAlignedMask);
307                 static_cast<void>(memoryAccess);
308                 // Compute the index of the alignment operand.
309                 int alignmentIdx = 2;
310                 if (inst.getOpCode() == OpStore)
311                     alignmentIdx++;
312                 // Merge new and old (mis)alignment
313                 alignment |= inst.getImmediateOperand(alignmentIdx);
314                 // Pick the LSB
315                 alignment = alignment & ~(alignment & (alignment-1));
316                 // update the Aligned operand
317                 inst.setImmediateOperand(alignmentIdx, alignment);
318             }
319             break;
320         }
321 
322     default:
323         break;
324     }
325 
326     // Checks based on type
327     if (inst.getTypeId() != NoType)
328         postProcessType(inst, inst.getTypeId());
329     for (int op = 0; op < inst.getNumOperands(); ++op) {
330         if (inst.isIdOperand(op)) {
331             // In blocks, these are always result ids, but we are relying on
332             // getTypeId() to return NoType for things like OpLabel.
333             if (getTypeId(inst.getIdOperand(op)) != NoType)
334                 postProcessType(inst, getTypeId(inst.getIdOperand(op)));
335         }
336     }
337 }
338 
339 // comment in header
postProcessCFG()340 void Builder::postProcessCFG()
341 {
342     // reachableBlocks is the set of blockss reached via control flow, or which are
343     // unreachable continue targert or unreachable merge.
344     std::unordered_set<const Block*> reachableBlocks;
345     std::unordered_map<Block*, Block*> headerForUnreachableContinue;
346     std::unordered_set<Block*> unreachableMerges;
347     std::unordered_set<Id> unreachableDefinitions;
348     // Collect IDs defined in unreachable blocks. For each function, label the
349     // reachable blocks first. Then for each unreachable block, collect the
350     // result IDs of the instructions in it.
351     for (auto fi = module.getFunctions().cbegin(); fi != module.getFunctions().cend(); fi++) {
352         Function* f = *fi;
353         Block* entry = f->getEntryBlock();
354         inReadableOrder(entry,
355             [&reachableBlocks, &unreachableMerges, &headerForUnreachableContinue]
356             (Block* b, ReachReason why, Block* header) {
357                reachableBlocks.insert(b);
358                if (why == ReachDeadContinue) headerForUnreachableContinue[b] = header;
359                if (why == ReachDeadMerge) unreachableMerges.insert(b);
360             });
361         for (auto bi = f->getBlocks().cbegin(); bi != f->getBlocks().cend(); bi++) {
362             Block* b = *bi;
363             if (unreachableMerges.count(b) != 0 || headerForUnreachableContinue.count(b) != 0) {
364                 auto ii = b->getInstructions().cbegin();
365                 ++ii; // Keep potential decorations on the label.
366                 for (; ii != b->getInstructions().cend(); ++ii)
367                     unreachableDefinitions.insert(ii->get()->getResultId());
368             } else if (reachableBlocks.count(b) == 0) {
369                 // The normal case for unreachable code.  All definitions are considered dead.
370                 for (auto ii = b->getInstructions().cbegin(); ii != b->getInstructions().cend(); ++ii)
371                     unreachableDefinitions.insert(ii->get()->getResultId());
372             }
373         }
374     }
375 
376     // Modify unreachable merge blocks and unreachable continue targets.
377     // Delete their contents.
378     for (auto mergeIter = unreachableMerges.begin(); mergeIter != unreachableMerges.end(); ++mergeIter) {
379         (*mergeIter)->rewriteAsCanonicalUnreachableMerge();
380     }
381     for (auto continueIter = headerForUnreachableContinue.begin();
382          continueIter != headerForUnreachableContinue.end();
383          ++continueIter) {
384         Block* continue_target = continueIter->first;
385         Block* header = continueIter->second;
386         continue_target->rewriteAsCanonicalUnreachableContinue(header);
387     }
388 
389     // Remove unneeded decorations, for unreachable instructions
390     for (auto decorationIter = decorations.begin(); decorationIter != decorations.end();) {
391         Id decorationId = (*decorationIter)->getIdOperand(0);
392         if (unreachableDefinitions.count(decorationId) != 0) {
393             decorationIter = decorations.erase(decorationIter);
394         } else {
395             ++decorationIter;
396         }
397     }
398 }
399 
400 // comment in header
postProcessFeatures()401 void Builder::postProcessFeatures() {
402     // Add per-instruction capabilities, extensions, etc.,
403 
404     // Look for any 8/16 bit type in physical storage buffer class, and set the
405     // appropriate capability. This happens in createSpvVariable for other storage
406     // classes, but there isn't always a variable for physical storage buffer.
407     for (int t = 0; t < (int)groupedTypes[OpTypePointer].size(); ++t) {
408         Instruction* type = groupedTypes[OpTypePointer][t];
409         if (type->getImmediateOperand(0) == (unsigned)StorageClassPhysicalStorageBufferEXT) {
410             if (containsType(type->getIdOperand(1), OpTypeInt, 8)) {
411                 addIncorporatedExtension(spv::E_SPV_KHR_8bit_storage, spv::Spv_1_5);
412                 addCapability(spv::CapabilityStorageBuffer8BitAccess);
413             }
414             if (containsType(type->getIdOperand(1), OpTypeInt, 16) ||
415                 containsType(type->getIdOperand(1), OpTypeFloat, 16)) {
416                 addIncorporatedExtension(spv::E_SPV_KHR_16bit_storage, spv::Spv_1_3);
417                 addCapability(spv::CapabilityStorageBuffer16BitAccess);
418             }
419         }
420     }
421 
422     // process all block-contained instructions
423     for (auto fi = module.getFunctions().cbegin(); fi != module.getFunctions().cend(); fi++) {
424         Function* f = *fi;
425         for (auto bi = f->getBlocks().cbegin(); bi != f->getBlocks().cend(); bi++) {
426             Block* b = *bi;
427             for (auto ii = b->getInstructions().cbegin(); ii != b->getInstructions().cend(); ii++)
428                 postProcess(*ii->get());
429 
430             // For all local variables that contain pointers to PhysicalStorageBufferEXT, check whether
431             // there is an existing restrict/aliased decoration. If we don't find one, add Aliased as the
432             // default.
433             for (auto vi = b->getLocalVariables().cbegin(); vi != b->getLocalVariables().cend(); vi++) {
434                 const Instruction& inst = *vi->get();
435                 Id resultId = inst.getResultId();
436                 if (containsPhysicalStorageBufferOrArray(getDerefTypeId(resultId))) {
437                     bool foundDecoration = false;
438                     const auto function = [&](const std::unique_ptr<Instruction>& decoration) {
439                         if (decoration.get()->getIdOperand(0) == resultId &&
440                             decoration.get()->getOpCode() == OpDecorate &&
441                             (decoration.get()->getImmediateOperand(1) == spv::DecorationAliasedPointerEXT ||
442                              decoration.get()->getImmediateOperand(1) == spv::DecorationRestrictPointerEXT)) {
443                             foundDecoration = true;
444                         }
445                     };
446                     std::for_each(decorations.begin(), decorations.end(), function);
447                     if (!foundDecoration) {
448                         addDecoration(resultId, spv::DecorationAliasedPointerEXT);
449                     }
450                 }
451             }
452         }
453     }
454 
455     // If any Vulkan memory model-specific functionality is used, update the
456     // OpMemoryModel to match.
457     if (capabilities.find(spv::CapabilityVulkanMemoryModelKHR) != capabilities.end()) {
458         memoryModel = spv::MemoryModelVulkanKHR;
459         addIncorporatedExtension(spv::E_SPV_KHR_vulkan_memory_model, spv::Spv_1_5);
460     }
461 
462     // Add Aliased decoration if there's more than one Workgroup Block variable.
463     if (capabilities.find(spv::CapabilityWorkgroupMemoryExplicitLayoutKHR) != capabilities.end()) {
464         assert(entryPoints.size() == 1);
465         auto &ep = entryPoints[0];
466 
467         std::vector<Id> workgroup_variables;
468         for (int i = 0; i < (int)ep->getNumOperands(); i++) {
469             if (!ep->isIdOperand(i))
470                 continue;
471 
472             const Id id = ep->getIdOperand(i);
473             const Instruction *instr = module.getInstruction(id);
474             if (instr->getOpCode() != spv::OpVariable)
475                 continue;
476 
477             if (instr->getImmediateOperand(0) == spv::StorageClassWorkgroup)
478                 workgroup_variables.push_back(id);
479         }
480 
481         if (workgroup_variables.size() > 1) {
482             for (size_t i = 0; i < workgroup_variables.size(); i++)
483                 addDecoration(workgroup_variables[i], spv::DecorationAliased);
484         }
485     }
486 }
487 
488 // SPIR-V requires that any instruction consuming the result of an OpSampledImage
489 // be in the same block as the OpSampledImage instruction. This pass goes finds
490 // uses of OpSampledImage where that is not the case and duplicates the
491 // OpSampledImage to be immediately before the instruction that consumes it.
492 // The old OpSampledImage is left in place, potentially with no users.
postProcessSamplers()493 void Builder::postProcessSamplers()
494 {
495     // first, find all OpSampledImage instructions and store them in a map.
496     std::map<Id, Instruction*> sampledImageInstrs;
497     for (auto f: module.getFunctions()) {
498 	for (auto b: f->getBlocks()) {
499 	    for (auto &i: b->getInstructions()) {
500 		if (i->getOpCode() == spv::OpSampledImage) {
501 		    sampledImageInstrs[i->getResultId()] = i.get();
502 		}
503 	    }
504 	}
505     }
506     // next find all uses of the given ids and rewrite them if needed.
507     for (auto f: module.getFunctions()) {
508 	for (auto b: f->getBlocks()) {
509             auto &instrs = b->getInstructions();
510             for (size_t idx = 0; idx < instrs.size(); idx++) {
511                 Instruction *i = instrs[idx].get();
512                 for (int opnum = 0; opnum < i->getNumOperands(); opnum++) {
513                     // Is this operand of the current instruction the result of an OpSampledImage?
514                     if (i->isIdOperand(opnum) &&
515                         sampledImageInstrs.count(i->getIdOperand(opnum)))
516                     {
517                         Instruction *opSampImg = sampledImageInstrs[i->getIdOperand(opnum)];
518                         if (i->getBlock() != opSampImg->getBlock()) {
519                             Instruction *newInstr = new Instruction(getUniqueId(),
520                                                                     opSampImg->getTypeId(),
521                                                                     spv::OpSampledImage);
522                             newInstr->addIdOperand(opSampImg->getIdOperand(0));
523                             newInstr->addIdOperand(opSampImg->getIdOperand(1));
524                             newInstr->setBlock(b);
525 
526                             // rewrite the user of the OpSampledImage to use the new instruction.
527                             i->setIdOperand(opnum, newInstr->getResultId());
528                             // insert the new OpSampledImage right before the current instruction.
529                             instrs.insert(instrs.begin() + idx,
530                                     std::unique_ptr<Instruction>(newInstr));
531                             idx++;
532                         }
533                     }
534                 }
535             }
536 	}
537     }
538 }
539 
540 // comment in header
postProcess(bool compileOnly)541 void Builder::postProcess(bool compileOnly)
542 {
543     // postProcessCFG needs an entrypoint to determine what is reachable, but if we are not creating an "executable" shader, we don't have an entrypoint
544     if (!compileOnly)
545         postProcessCFG();
546 
547     postProcessFeatures();
548     postProcessSamplers();
549 }
550 
551 } // end spv namespace
552