1 // Copyright 2018 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef sw_SpirvShader_hpp 16 #define sw_SpirvShader_hpp 17 18 #include "SamplerCore.hpp" 19 #include "ShaderCore.hpp" 20 #include "SpirvBinary.hpp" 21 #include "SpirvID.hpp" 22 #include "Device/Config.hpp" 23 #include "Device/Sampler.hpp" 24 #include "System/Debug.hpp" 25 #include "System/Math.hpp" 26 #include "System/Types.hpp" 27 #include "Vulkan/VkConfig.hpp" 28 #include "Vulkan/VkDescriptorSet.hpp" 29 30 #define SPV_ENABLE_UTILITY_CODE 31 #include <spirv/unified1/spirv.hpp> 32 33 #include <array> 34 #include <atomic> 35 #include <cstdint> 36 #include <cstring> 37 #include <deque> 38 #include <functional> 39 #include <memory> 40 #include <string> 41 #include <type_traits> 42 #include <unordered_map> 43 #include <unordered_set> 44 #include <vector> 45 46 #undef Yield // b/127920555 47 48 namespace vk { 49 50 class Device; 51 class PipelineLayout; 52 class ImageView; 53 class Sampler; 54 class RenderPass; 55 struct Attachments; 56 struct SampledImageDescriptor; 57 struct SamplerState; 58 59 } // namespace vk 60 61 namespace sw { 62 63 // Forward declarations. 64 class SpirvRoutine; 65 66 // Incrementally constructed complex bundle of rvalues 67 // Effectively a restricted vector, supporting only: 68 // - allocation to a (runtime-known) fixed component count 69 // - in-place construction of elements 70 // - const operator[] 71 class Intermediate 72 { 73 public: Intermediate(uint32_t componentCount)74 Intermediate(uint32_t componentCount) 75 : componentCount(componentCount) 76 , scalar(new rr::Value *[componentCount]) 77 { 78 for(auto i = 0u; i < componentCount; i++) { scalar[i] = nullptr; } 79 } 80 ~Intermediate()81 ~Intermediate() 82 { 83 delete[] scalar; 84 } 85 86 // TypeHint is used as a hint for rr::PrintValue::Ty<sw::Intermediate> to 87 // decide the format used to print the intermediate data. 88 enum class TypeHint 89 { 90 Float, 91 Int, 92 UInt 93 }; 94 move(uint32_t i,RValue<SIMD::Float> && scalar)95 void move(uint32_t i, RValue<SIMD::Float> &&scalar) { emplace(i, scalar.value(), TypeHint::Float); } move(uint32_t i,RValue<SIMD::Int> && scalar)96 void move(uint32_t i, RValue<SIMD::Int> &&scalar) { emplace(i, scalar.value(), TypeHint::Int); } move(uint32_t i,RValue<SIMD::UInt> && scalar)97 void move(uint32_t i, RValue<SIMD::UInt> &&scalar) { emplace(i, scalar.value(), TypeHint::UInt); } 98 move(uint32_t i,const RValue<SIMD::Float> & scalar)99 void move(uint32_t i, const RValue<SIMD::Float> &scalar) { emplace(i, scalar.value(), TypeHint::Float); } move(uint32_t i,const RValue<SIMD::Int> & scalar)100 void move(uint32_t i, const RValue<SIMD::Int> &scalar) { emplace(i, scalar.value(), TypeHint::Int); } move(uint32_t i,const RValue<SIMD::UInt> & scalar)101 void move(uint32_t i, const RValue<SIMD::UInt> &scalar) { emplace(i, scalar.value(), TypeHint::UInt); } 102 103 // Value retrieval functions. Float(uint32_t i) const104 RValue<SIMD::Float> Float(uint32_t i) const 105 { 106 ASSERT(i < componentCount); 107 ASSERT(scalar[i] != nullptr); 108 RR_PRINT_ONLY(typeHint = TypeHint::Float;) 109 return As<SIMD::Float>(scalar[i]); // TODO(b/128539387): RValue<SIMD::Float>(scalar) 110 } 111 Int(uint32_t i) const112 RValue<SIMD::Int> Int(uint32_t i) const 113 { 114 ASSERT(i < componentCount); 115 ASSERT(scalar[i] != nullptr); 116 RR_PRINT_ONLY(typeHint = TypeHint::Int;) 117 return As<SIMD::Int>(scalar[i]); // TODO(b/128539387): RValue<SIMD::Int>(scalar) 118 } 119 UInt(uint32_t i) const120 RValue<SIMD::UInt> UInt(uint32_t i) const 121 { 122 ASSERT(i < componentCount); 123 ASSERT(scalar[i] != nullptr); 124 RR_PRINT_ONLY(typeHint = TypeHint::UInt;) 125 return As<SIMD::UInt>(scalar[i]); // TODO(b/128539387): RValue<SIMD::UInt>(scalar) 126 } 127 128 // No copy/move construction or assignment 129 Intermediate(const Intermediate &) = delete; 130 Intermediate(Intermediate &&) = delete; 131 Intermediate &operator=(const Intermediate &) = delete; 132 Intermediate &operator=(Intermediate &&) = delete; 133 134 const uint32_t componentCount; 135 136 private: emplace(uint32_t i,rr::Value * value,TypeHint type)137 void emplace(uint32_t i, rr::Value *value, TypeHint type) 138 { 139 ASSERT(i < componentCount); 140 ASSERT(scalar[i] == nullptr); 141 scalar[i] = value; 142 RR_PRINT_ONLY(typeHint = type;) 143 } 144 145 rr::Value **const scalar; 146 147 #ifdef ENABLE_RR_PRINT 148 friend struct rr::PrintValue::Ty<sw::Intermediate>; 149 mutable TypeHint typeHint = TypeHint::Float; 150 #endif // ENABLE_RR_PRINT 151 }; 152 153 // The Spirv class parses a SPIR-V binary and provides utilities for retrieving 154 // information about instructions, objects, types, etc. 155 class Spirv 156 { 157 public: 158 Spirv(VkShaderStageFlagBits stage, 159 const char *entryPointName, 160 const SpirvBinary &insns); 161 162 ~Spirv(); 163 164 SpirvBinary insns; 165 166 class Type; 167 class Object; 168 169 // Pseudo-iterator over SPIR-V instructions, designed to support range-based-for. 170 class InsnIterator 171 { 172 public: 173 InsnIterator() = default; 174 InsnIterator(const InsnIterator &other) = default; 175 InsnIterator &operator=(const InsnIterator &other) = default; 176 InsnIterator(SpirvBinary::const_iterator iter)177 explicit InsnIterator(SpirvBinary::const_iterator iter) 178 : iter{ iter } 179 { 180 } 181 opcode() const182 spv::Op opcode() const 183 { 184 return static_cast<spv::Op>(*iter & spv::OpCodeMask); 185 } 186 wordCount() const187 uint32_t wordCount() const 188 { 189 return *iter >> spv::WordCountShift; 190 } 191 word(uint32_t n) const192 uint32_t word(uint32_t n) const 193 { 194 ASSERT(n < wordCount()); 195 return iter[n]; 196 } 197 data() const198 const uint32_t *data() const 199 { 200 return &iter[0]; 201 } 202 string(uint32_t n) const203 const char *string(uint32_t n) const 204 { 205 return reinterpret_cast<const char *>(&iter[n]); 206 } 207 208 // Returns the number of whole-words that a string literal starting at 209 // word n consumes. If the end of the intruction is reached before the 210 // null-terminator is found, then the function DABORT()s and 0 is 211 // returned. stringSizeInWords(uint32_t n) const212 uint32_t stringSizeInWords(uint32_t n) const 213 { 214 uint32_t c = wordCount(); 215 for(uint32_t i = n; n < c; i++) 216 { 217 const char *s = string(i); 218 // SPIR-V spec 2.2.1. Instructions: 219 // A string is interpreted as a nul-terminated stream of 220 // characters. The character set is Unicode in the UTF-8 221 // encoding scheme. The UTF-8 octets (8-bit bytes) are packed 222 // four per word, following the little-endian convention (i.e., 223 // the first octet is in the lowest-order 8 bits of the word). 224 // The final word contains the string's nul-termination 225 // character (0), and all contents past the end of the string in 226 // the final word are padded with 0. 227 if(s[3] == 0) 228 { 229 return 1 + i - n; 230 } 231 } 232 DABORT("SPIR-V string literal was not null-terminated"); 233 return 0; 234 } 235 hasResultAndType() const236 bool hasResultAndType() const 237 { 238 bool hasResult = false, hasResultType = false; 239 spv::HasResultAndType(opcode(), &hasResult, &hasResultType); 240 241 return hasResultType; 242 } 243 resultTypeId() const244 SpirvID<Type> resultTypeId() const 245 { 246 ASSERT(hasResultAndType()); 247 return word(1); 248 } 249 resultId() const250 SpirvID<Object> resultId() const 251 { 252 ASSERT(hasResultAndType()); 253 return word(2); 254 } 255 distanceFrom(const InsnIterator & other) const256 uint32_t distanceFrom(const InsnIterator &other) const 257 { 258 return static_cast<uint32_t>(iter - other.iter); 259 } 260 operator ==(const InsnIterator & other) const261 bool operator==(const InsnIterator &other) const 262 { 263 return iter == other.iter; 264 } 265 operator !=(const InsnIterator & other) const266 bool operator!=(const InsnIterator &other) const 267 { 268 return iter != other.iter; 269 } 270 operator *() const271 InsnIterator operator*() const 272 { 273 return *this; 274 } 275 operator ++()276 InsnIterator &operator++() 277 { 278 iter += wordCount(); 279 return *this; 280 } 281 operator ++(int)282 InsnIterator const operator++(int) 283 { 284 InsnIterator ret{ *this }; 285 iter += wordCount(); 286 return ret; 287 } 288 289 private: 290 SpirvBinary::const_iterator iter; 291 }; 292 293 // Range-based-for interface begin() const294 InsnIterator begin() const 295 { 296 // Skip over the header words 297 return InsnIterator{ insns.cbegin() + 5 }; 298 } 299 end() const300 InsnIterator end() const 301 { 302 return InsnIterator{ insns.cend() }; 303 } 304 305 // A range of contiguous instruction words. 306 struct Span 307 { Spansw::Spirv::Span308 Span(const InsnIterator &insn, uint32_t offset, uint32_t size) 309 : insn(insn) 310 , offset(offset) 311 , wordCount(size) 312 {} 313 operator []sw::Spirv::Span314 uint32_t operator[](uint32_t index) const 315 { 316 ASSERT(index < wordCount); 317 return insn.word(offset + index); 318 } 319 sizesw::Spirv::Span320 uint32_t size() const 321 { 322 return wordCount; 323 } 324 325 private: 326 const InsnIterator &insn; 327 const uint32_t offset; 328 const uint32_t wordCount; 329 }; 330 331 class Type 332 { 333 public: 334 using ID = SpirvID<Type>; 335 opcode() const336 spv::Op opcode() const { return definition.opcode(); } 337 338 InsnIterator definition; 339 spv::StorageClass storageClass = static_cast<spv::StorageClass>(-1); 340 uint32_t componentCount = 0; 341 bool isBuiltInBlock = false; 342 343 // Inner element type for pointers, arrays, vectors and matrices. 344 ID element; 345 }; 346 347 class Object 348 { 349 public: 350 using ID = SpirvID<Object>; 351 opcode() const352 spv::Op opcode() const { return definition.opcode(); } typeId() const353 Type::ID typeId() const { return definition.resultTypeId(); } id() const354 Object::ID id() const { return definition.resultId(); } 355 356 bool isConstantZero() const; 357 358 InsnIterator definition; 359 std::vector<uint32_t> constantValue; 360 361 enum class Kind 362 { 363 // Invalid default kind. 364 // If we get left with an object in this state, the module was 365 // broken. 366 Unknown, 367 368 // TODO: Better document this kind. 369 // A shader interface variable pointer. 370 // Pointer with uniform address across all lanes. 371 // Pointer held by SpirvRoutine::pointers 372 InterfaceVariable, 373 374 // Constant value held by Object::constantValue. 375 Constant, 376 377 // Value held by SpirvRoutine::intermediates. 378 Intermediate, 379 380 // Pointer held by SpirvRoutine::pointers 381 Pointer, 382 383 // Combination of an image pointer and a sampler ID 384 SampledImage, 385 386 // A pointer to a vk::DescriptorSet*. 387 // Pointer held by SpirvRoutine::pointers. 388 DescriptorSet, 389 }; 390 391 Kind kind = Kind::Unknown; 392 }; 393 394 // Block is an interval of SPIR-V instructions, starting with the 395 // opening OpLabel, and ending with a termination instruction. 396 class Block 397 { 398 public: 399 using ID = SpirvID<Block>; 400 using Set = std::unordered_set<ID>; 401 402 // Edge represents the graph edge between two blocks. 403 struct Edge 404 { 405 ID from; 406 ID to; 407 operator ==sw::Spirv::Block::Edge408 bool operator==(const Edge &other) const { return from == other.from && to == other.to; } 409 410 struct Hash 411 { operator ()sw::Spirv::Block::Edge::Hash412 std::size_t operator()(const Edge &edge) const noexcept 413 { 414 return std::hash<uint32_t>()(edge.from.value() * 31 + edge.to.value()); 415 } 416 }; 417 }; 418 419 Block() = default; 420 Block(const Block &other) = default; 421 Block &operator=(const Block &other) = default; 422 explicit Block(InsnIterator begin, InsnIterator end); 423 424 /* range-based-for interface */ begin() const425 inline InsnIterator begin() const { return begin_; } end() const426 inline InsnIterator end() const { return end_; } 427 428 enum Kind 429 { 430 Simple, // OpBranch or other simple terminator. 431 StructuredBranchConditional, // OpSelectionMerge + OpBranchConditional 432 UnstructuredBranchConditional, // OpBranchConditional 433 StructuredSwitch, // OpSelectionMerge + OpSwitch 434 UnstructuredSwitch, // OpSwitch 435 Loop, // OpLoopMerge + [OpBranchConditional | OpBranch] 436 }; 437 438 Kind kind = Simple; 439 InsnIterator mergeInstruction; // Structured control flow merge instruction. 440 InsnIterator branchInstruction; // Branch instruction. 441 ID mergeBlock; // Structured flow merge block. 442 ID continueTarget; // Loop continue block. 443 Set ins; // Blocks that branch into this block. 444 Set outs; // Blocks that this block branches to. 445 bool isLoopMerge = false; 446 447 private: 448 InsnIterator begin_; 449 InsnIterator end_; 450 }; 451 452 class Function 453 { 454 public: 455 using ID = SpirvID<Function>; 456 457 // Walks all reachable the blocks starting from id adding them to 458 // reachable. 459 void TraverseReachableBlocks(Block::ID id, Block::Set &reachable) const; 460 461 // AssignBlockFields() performs the following for all reachable blocks: 462 // * Assigns Block::ins with the identifiers of all blocks that contain 463 // this block in their Block::outs. 464 // * Sets Block::isLoopMerge to true if the block is the merge of a 465 // another loop block. 466 void AssignBlockFields(); 467 468 // ForeachBlockDependency calls f with each dependency of the given 469 // block. A dependency is an incoming block that is not a loop-back 470 // edge. 471 void ForeachBlockDependency(Block::ID blockId, std::function<void(Block::ID)> f) const; 472 473 // ExistsPath returns true if there's a direct or indirect flow from 474 // the 'from' block to the 'to' block that does not pass through 475 // notPassingThrough. 476 bool ExistsPath(Block::ID from, Block::ID to, Block::ID notPassingThrough) const; 477 getBlock(Block::ID id) const478 const Block &getBlock(Block::ID id) const 479 { 480 auto it = blocks.find(id); 481 ASSERT_MSG(it != blocks.end(), "Unknown block %d", id.value()); 482 return it->second; 483 } 484 485 Block::ID entry; // function entry point block. 486 HandleMap<Block> blocks; // blocks belonging to this function. 487 Type::ID type; // type of the function. 488 Type::ID result; // return type. 489 }; 490 491 using String = std::string; 492 using StringID = SpirvID<std::string>; 493 494 class Extension 495 { 496 public: 497 using ID = SpirvID<Extension>; 498 499 enum Name 500 { 501 Unknown, 502 GLSLstd450, 503 OpenCLDebugInfo100, 504 NonSemanticInfo, 505 }; 506 507 Name name; 508 }; 509 510 struct TypeOrObject 511 {}; 512 513 // TypeOrObjectID is an identifier that represents a Type or an Object, 514 // and supports implicit casting to and from Type::ID or Object::ID. 515 class TypeOrObjectID : public SpirvID<TypeOrObject> 516 { 517 public: 518 using Hash = std::hash<SpirvID<TypeOrObject>>; 519 TypeOrObjectID(uint32_t id)520 inline TypeOrObjectID(uint32_t id) 521 : SpirvID(id) 522 {} TypeOrObjectID(Type::ID id)523 inline TypeOrObjectID(Type::ID id) 524 : SpirvID(id.value()) 525 {} TypeOrObjectID(Object::ID id)526 inline TypeOrObjectID(Object::ID id) 527 : SpirvID(id.value()) 528 {} operator Type::ID() const529 inline operator Type::ID() const { return Type::ID(value()); } operator Object::ID() const530 inline operator Object::ID() const { return Object::ID(value()); } 531 }; 532 533 // This method is for retrieving an ID that uniquely identifies the 534 // shader entry point represented by this object. getIdentifier() const535 uint64_t getIdentifier() const 536 { 537 return ((uint64_t)entryPoint.value() << 32) | insns.getIdentifier(); 538 } 539 540 struct ExecutionModes 541 { 542 bool EarlyFragmentTests : 1; 543 bool DepthReplacing : 1; 544 bool DepthGreater : 1; 545 bool DepthLess : 1; 546 bool DepthUnchanged : 1; 547 bool StencilRefReplacing : 1; 548 549 // Compute workgroup dimensions 550 Object::ID WorkgroupSizeX = 1; 551 Object::ID WorkgroupSizeY = 1; 552 Object::ID WorkgroupSizeZ = 1; 553 bool useWorkgroupSizeId = false; 554 }; 555 getExecutionModes() const556 const ExecutionModes &getExecutionModes() const 557 { 558 return executionModes; 559 } 560 561 struct Analysis 562 { 563 bool ContainsDiscard : 1; // OpKill, OpTerminateInvocation, or OpDemoteToHelperInvocation 564 bool ContainsControlBarriers : 1; 565 bool NeedsCentroid : 1; 566 bool ContainsSampleQualifier : 1; 567 bool ContainsImageWrite : 1; 568 }; 569 getAnalysis() const570 const Analysis &getAnalysis() const { return analysis; } containsImageWrite() const571 bool containsImageWrite() const { return analysis.ContainsImageWrite; } 572 coverageModified() const573 bool coverageModified() const 574 { 575 return analysis.ContainsDiscard || 576 (outputBuiltins.find(spv::BuiltInSampleMask) != outputBuiltins.end()); 577 } 578 579 struct Capabilities 580 { 581 bool Matrix : 1; 582 bool Shader : 1; 583 bool StorageImageMultisample : 1; 584 bool ClipDistance : 1; 585 bool CullDistance : 1; 586 bool ImageCubeArray : 1; 587 bool SampleRateShading : 1; 588 bool InputAttachment : 1; 589 bool Sampled1D : 1; 590 bool Image1D : 1; 591 bool SampledBuffer : 1; 592 bool SampledCubeArray : 1; 593 bool ImageBuffer : 1; 594 bool ImageMSArray : 1; 595 bool StorageImageExtendedFormats : 1; 596 bool ImageQuery : 1; 597 bool DerivativeControl : 1; 598 bool DotProductInputAll : 1; 599 bool DotProductInput4x8Bit : 1; 600 bool DotProductInput4x8BitPacked : 1; 601 bool DotProduct : 1; 602 bool InterpolationFunction : 1; 603 bool StorageImageWriteWithoutFormat : 1; 604 bool GroupNonUniform : 1; 605 bool GroupNonUniformVote : 1; 606 bool GroupNonUniformBallot : 1; 607 bool GroupNonUniformShuffle : 1; 608 bool GroupNonUniformShuffleRelative : 1; 609 bool GroupNonUniformArithmetic : 1; 610 bool GroupNonUniformQuad : 1; 611 bool DeviceGroup : 1; 612 bool MultiView : 1; 613 bool SignedZeroInfNanPreserve : 1; 614 bool DemoteToHelperInvocation : 1; 615 bool StencilExportEXT : 1; 616 bool VulkanMemoryModel : 1; 617 bool VulkanMemoryModelDeviceScope : 1; 618 bool ShaderNonUniform : 1; 619 bool RuntimeDescriptorArray : 1; 620 bool StorageBufferArrayNonUniformIndexing : 1; 621 bool StorageTexelBufferArrayNonUniformIndexing : 1; 622 bool StorageTexelBufferArrayDynamicIndexing : 1; 623 bool UniformTexelBufferArrayNonUniformIndexing : 1; 624 bool UniformTexelBufferArrayDynamicIndexing : 1; 625 bool UniformBufferArrayNonUniformIndex : 1; 626 bool SampledImageArrayNonUniformIndexing : 1; 627 bool StorageImageArrayNonUniformIndexing : 1; 628 bool PhysicalStorageBufferAddresses : 1; 629 }; 630 getUsedCapabilities() const631 const Capabilities &getUsedCapabilities() const 632 { 633 return capabilities; 634 } 635 636 // getNumOutputClipDistances() returns the number of ClipDistances 637 // outputted by this shader. getNumOutputClipDistances() const638 unsigned int getNumOutputClipDistances() const 639 { 640 if(getUsedCapabilities().ClipDistance) 641 { 642 auto it = outputBuiltins.find(spv::BuiltInClipDistance); 643 if(it != outputBuiltins.end()) 644 { 645 return it->second.SizeInComponents; 646 } 647 } 648 return 0; 649 } 650 651 // getNumOutputCullDistances() returns the number of CullDistances 652 // outputted by this shader. getNumOutputCullDistances() const653 unsigned int getNumOutputCullDistances() const 654 { 655 if(getUsedCapabilities().CullDistance) 656 { 657 auto it = outputBuiltins.find(spv::BuiltInCullDistance); 658 if(it != outputBuiltins.end()) 659 { 660 return it->second.SizeInComponents; 661 } 662 } 663 return 0; 664 } 665 666 enum AttribType : unsigned char 667 { 668 ATTRIBTYPE_FLOAT, 669 ATTRIBTYPE_INT, 670 ATTRIBTYPE_UINT, 671 ATTRIBTYPE_UNUSED, 672 673 ATTRIBTYPE_LAST = ATTRIBTYPE_UINT 674 }; 675 hasBuiltinInput(spv::BuiltIn b) const676 bool hasBuiltinInput(spv::BuiltIn b) const 677 { 678 return inputBuiltins.find(b) != inputBuiltins.end(); 679 } 680 hasBuiltinOutput(spv::BuiltIn b) const681 bool hasBuiltinOutput(spv::BuiltIn b) const 682 { 683 return outputBuiltins.find(b) != outputBuiltins.end(); 684 } 685 686 struct Decorations 687 { 688 int32_t Location = -1; 689 int32_t Component = 0; 690 spv::BuiltIn BuiltIn = static_cast<spv::BuiltIn>(-1); 691 int32_t Offset = -1; 692 int32_t ArrayStride = -1; 693 int32_t MatrixStride = 1; 694 695 bool HasLocation : 1; 696 bool HasComponent : 1; 697 bool HasBuiltIn : 1; 698 bool HasOffset : 1; 699 bool HasArrayStride : 1; 700 bool HasMatrixStride : 1; 701 bool HasRowMajor : 1; // whether RowMajor bit is valid. 702 703 bool Flat : 1; 704 bool Centroid : 1; 705 bool NoPerspective : 1; 706 bool Block : 1; 707 bool BufferBlock : 1; 708 bool RelaxedPrecision : 1; 709 bool RowMajor : 1; // RowMajor if true; ColMajor if false 710 bool InsideMatrix : 1; // pseudo-decoration for whether we're inside a matrix. 711 bool NonUniform : 1; 712 Decorationssw::Spirv::Decorations713 Decorations() 714 : Location{ -1 } 715 , Component{ 0 } 716 , BuiltIn{ static_cast<spv::BuiltIn>(-1) } 717 , Offset{ -1 } 718 , ArrayStride{ -1 } 719 , MatrixStride{ -1 } 720 , HasLocation{ false } 721 , HasComponent{ false } 722 , HasBuiltIn{ false } 723 , HasOffset{ false } 724 , HasArrayStride{ false } 725 , HasMatrixStride{ false } 726 , HasRowMajor{ false } 727 , Flat{ false } 728 , Centroid{ false } 729 , NoPerspective{ false } 730 , Block{ false } 731 , BufferBlock{ false } 732 , RelaxedPrecision{ false } 733 , RowMajor{ false } 734 , InsideMatrix{ false } 735 , NonUniform{ false } 736 { 737 } 738 739 Decorations(const Decorations &) = default; 740 Decorations& operator= (const Decorations &) = default; 741 742 void Apply(const Decorations &src); 743 744 void Apply(spv::Decoration decoration, uint32_t arg); 745 }; 746 747 std::unordered_map<TypeOrObjectID, Decorations, TypeOrObjectID::Hash> decorations; 748 std::unordered_map<Type::ID, std::vector<Decorations>> memberDecorations; 749 750 struct DescriptorDecorations 751 { 752 int32_t DescriptorSet = -1; 753 int32_t Binding = -1; 754 int32_t InputAttachmentIndex = -1; 755 756 void Apply(const DescriptorDecorations &src); 757 }; 758 759 std::unordered_map<Object::ID, DescriptorDecorations> descriptorDecorations; 760 761 struct InterfaceComponent 762 { 763 AttribType Type; 764 765 union 766 { 767 struct 768 { 769 bool Flat : 1; 770 bool Centroid : 1; 771 bool NoPerspective : 1; 772 }; 773 774 uint8_t DecorationBits; 775 }; 776 InterfaceComponentsw::Spirv::InterfaceComponent777 InterfaceComponent() 778 : Type{ ATTRIBTYPE_UNUSED } 779 , DecorationBits{ 0 } 780 { 781 } 782 }; 783 784 struct BuiltinMapping 785 { 786 Object::ID Id; 787 uint32_t FirstComponent; 788 uint32_t SizeInComponents; 789 }; 790 791 struct WorkgroupMemory 792 { 793 // allocates a new variable of size bytes with the given identifier. allocatesw::Spirv::WorkgroupMemory794 inline void allocate(Object::ID id, uint32_t size) 795 { 796 uint32_t offset = totalSize; 797 auto it = offsets.emplace(id, offset); 798 ASSERT_MSG(it.second, "WorkgroupMemory already has an allocation for object %d", int(id.value())); 799 totalSize += size; 800 } 801 // returns the byte offset of the variable with the given identifier. offsetOfsw::Spirv::WorkgroupMemory802 inline uint32_t offsetOf(Object::ID id) const 803 { 804 auto it = offsets.find(id); 805 ASSERT_MSG(it != offsets.end(), "WorkgroupMemory has no allocation for object %d", int(id.value())); 806 return it->second; 807 } 808 // returns the total allocated size in bytes. sizesw::Spirv::WorkgroupMemory809 inline uint32_t size() const { return totalSize; } 810 811 private: 812 uint32_t totalSize = 0; // in bytes 813 std::unordered_map<Object::ID, uint32_t> offsets; // in bytes 814 }; 815 816 std::vector<InterfaceComponent> inputs; 817 std::vector<InterfaceComponent> outputs; 818 819 uint32_t getWorkgroupSizeX() const; 820 uint32_t getWorkgroupSizeY() const; 821 uint32_t getWorkgroupSizeZ() const; 822 823 using BuiltInHash = std::hash<std::underlying_type<spv::BuiltIn>::type>; 824 std::unordered_map<spv::BuiltIn, BuiltinMapping, BuiltInHash> inputBuiltins; 825 std::unordered_map<spv::BuiltIn, BuiltinMapping, BuiltInHash> outputBuiltins; 826 WorkgroupMemory workgroupMemory; 827 828 Function::ID entryPoint; 829 spv::ExecutionModel executionModel = spv::ExecutionModelMax; // Invalid prior to OpEntryPoint parsing. 830 ExecutionModes executionModes = {}; 831 Capabilities capabilities = {}; 832 spv::AddressingModel addressingModel = spv::AddressingModelLogical; 833 spv::MemoryModel memoryModel = spv::MemoryModelSimple; 834 HandleMap<Extension> extensionsByID; 835 std::unordered_set<uint32_t> extensionsImported; 836 837 Analysis analysis = {}; 838 839 HandleMap<Type> types; 840 HandleMap<Object> defs; 841 842 // TODO(b/247020580): Encapsulate 843 public: 844 HandleMap<Function> functions; 845 std::unordered_map<StringID, String> strings; 846 847 // DeclareType creates a Type for the given OpTypeX instruction, storing 848 // it into the types map. It is called from the analysis pass (constructor). 849 void DeclareType(InsnIterator insn); 850 851 void ProcessExecutionMode(InsnIterator it); 852 853 uint32_t ComputeTypeSize(InsnIterator insn); 854 Decorations GetDecorationsForId(TypeOrObjectID id) const; 855 void ApplyDecorationsForId(Decorations *d, TypeOrObjectID id) const; 856 void ApplyDecorationsForIdMember(Decorations *d, Type::ID id, uint32_t member) const; 857 void ApplyDecorationsForAccessChain(Decorations *d, DescriptorDecorations *dd, Object::ID baseId, const Span &indexIds) const; 858 859 // Creates an Object for the instruction's result in 'defs'. 860 void DefineResult(const InsnIterator &insn); 861 862 using InterfaceVisitor = std::function<void(Decorations const, AttribType)>; 863 864 void VisitInterface(Object::ID id, const InterfaceVisitor &v) const; 865 866 int VisitInterfaceInner(Type::ID id, Decorations d, const InterfaceVisitor &v) const; 867 868 // MemoryElement describes a scalar element within a structure, and is 869 // used by the callback function of VisitMemoryObject(). 870 struct MemoryElement 871 { 872 uint32_t index; // index of the scalar element 873 uint32_t offset; // offset (in bytes) from the base of the object 874 const Type &type; // element type 875 }; 876 877 using MemoryVisitor = std::function<void(const MemoryElement &)>; 878 879 // VisitMemoryObject() walks a type tree in an explicitly laid out 880 // storage class, calling the MemoryVisitor for each scalar element 881 // within the 882 void VisitMemoryObject(Object::ID id, bool resultIsPointer, const MemoryVisitor &v) const; 883 884 // VisitMemoryObjectInner() is internally called by VisitMemoryObject() 885 void VisitMemoryObjectInner(Type::ID id, Decorations d, uint32_t &index, uint32_t offset, bool resultIsPointer, const MemoryVisitor &v) const; 886 887 Object &CreateConstant(InsnIterator it); 888 889 void ProcessInterfaceVariable(Object &object); 890 getType(Type::ID id) const891 const Type &getType(Type::ID id) const 892 { 893 auto it = types.find(id); 894 ASSERT_MSG(it != types.end(), "Unknown type %d", id.value()); 895 return it->second; 896 } 897 getType(const Object & object) const898 const Type &getType(const Object &object) const 899 { 900 return getType(object.typeId()); 901 } 902 getObject(Object::ID id) const903 const Object &getObject(Object::ID id) const 904 { 905 auto it = defs.find(id); 906 ASSERT_MSG(it != defs.end(), "Unknown object %d", id.value()); 907 return it->second; 908 } 909 getObjectType(Object::ID id) const910 const Type &getObjectType(Object::ID id) const 911 { 912 return getType(getObject(id)); 913 } 914 getFunction(Function::ID id) const915 const Function &getFunction(Function::ID id) const 916 { 917 auto it = functions.find(id); 918 ASSERT_MSG(it != functions.end(), "Unknown function %d", id.value()); 919 return it->second; 920 } 921 getString(StringID id) const922 const String &getString(StringID id) const 923 { 924 auto it = strings.find(id); 925 ASSERT_MSG(it != strings.end(), "Unknown string %d", id.value()); 926 return it->second; 927 } 928 getExtension(Extension::ID id) const929 const Extension &getExtension(Extension::ID id) const 930 { 931 auto it = extensionsByID.find(id); 932 ASSERT_MSG(it != extensionsByID.end(), "Unknown extension %d", id.value()); 933 return it->second; 934 } 935 936 // Returns the *component* offset in the literal for the given access chain. 937 uint32_t WalkLiteralAccessChain(Type::ID id, const Span &indexes) const; 938 939 uint32_t GetConstScalarInt(Object::ID id) const; 940 void EvalSpecConstantOp(InsnIterator insn); 941 void EvalSpecConstantUnaryOp(InsnIterator insn); 942 void EvalSpecConstantBinaryOp(InsnIterator insn); 943 944 // Fragment input interpolation functions 945 uint32_t GetNumInputComponents(int32_t location) const; 946 uint32_t GetPackedInterpolant(int32_t location) const; 947 948 // WriteCFGGraphVizDotFile() writes a graphviz dot file of the shader's 949 // control flow to the given file path. 950 void WriteCFGGraphVizDotFile(const char *path) const; 951 952 // OpcodeName() returns the name of the opcode op. 953 static const char *OpcodeName(spv::Op opcode); 954 static std::memory_order MemoryOrder(spv::MemorySemanticsMask memorySemantics); 955 956 // IsStatement() returns true if the given opcode actually performs 957 // work (as opposed to declaring a type, defining a function start / end, 958 // etc). 959 static bool IsStatement(spv::Op opcode); 960 961 // HasTypeAndResult() returns true if the given opcode's instruction 962 // has a result type ID and result ID, i.e. defines an Object. 963 static bool HasTypeAndResult(spv::Op opcode); 964 965 // Returns 0 when invalid. 966 static VkShaderStageFlagBits executionModelToStage(spv::ExecutionModel model); 967 968 static bool StoresInHelperInvocationsHaveNoEffect(spv::StorageClass storageClass); 969 static bool IsExplicitLayout(spv::StorageClass storageClass); 970 static bool IsTerminator(spv::Op opcode); 971 }; 972 973 // The SpirvShader class holds a parsed SPIR-V shader but also the pipeline 974 // state which affects code emission when passing it to SpirvEmitter. 975 class SpirvShader : public Spirv 976 { 977 public: 978 SpirvShader(VkShaderStageFlagBits stage, 979 const char *entryPointName, 980 const SpirvBinary &insns, 981 const vk::RenderPass *renderPass, 982 uint32_t subpassIndex, 983 const VkRenderingInputAttachmentIndexInfoKHR *inputAttachmentMapping, 984 bool robustBufferAccess); 985 986 ~SpirvShader(); 987 988 // TODO(b/247020580): Move to SpirvRoutine 989 void emitProlog(SpirvRoutine *routine) const; 990 void emit(SpirvRoutine *routine, const RValue<SIMD::Int> &activeLaneMask, const RValue<SIMD::Int> &storesAndAtomicsMask, const vk::DescriptorSet::Bindings &descriptorSets, const vk::Attachments *attachments = nullptr, unsigned int multiSampleCount = 0) const; 991 void emitEpilog(SpirvRoutine *routine) const; 992 getRobustBufferAccess() const993 bool getRobustBufferAccess() const { return robustBufferAccess; } 994 OutOfBoundsBehavior getOutOfBoundsBehavior(Object::ID pointerId, const vk::PipelineLayout *pipelineLayout) const; 995 996 vk::Format getInputAttachmentFormat(const vk::Attachments &attachments, int32_t index) const; 997 998 private: 999 const bool robustBufferAccess; 1000 1001 // When reading from an input attachment, its format is needed. When the fragment shader 1002 // pipeline library is created, the formats are available with render pass objects, but not 1003 // with dynamic rendering. Instead, with dynamic rendering the formats are provided to the 1004 // fragment output interface pipeline library. 1005 // 1006 // This class is instantiated by the fragment shader pipeline library. With dynamic 1007 // rendering, the mapping from input attachment indices to render pass attachments are 1008 // stored here at that point. Later, when the formats are needed, the information is taken 1009 // out of the information provided to the fragment output interface pipeline library. 1010 // 1011 // In the following, `inputIndexToColorIndex` maps from an input attachment index docoration 1012 // in the shader to the attachment index (not the remapped location). 1013 // 1014 // The depthInputIndex and stencilInputIndex values are only valid for dynamic rendering and 1015 // indicate what input attachment index is supposed to map to each. They are optional, as 1016 // the shader no longer has to decorate depth and stencil input attachments with 1017 // an InputAttachmentIndex decoration. 1018 // 1019 // Note: If SpirvEmitter::EmitImageRead were to take the format from the bound descriptor, 1020 // none of the following would be necessary. With the current implementation, read-only 1021 // input attachments cannot be supported with dynamic rendering because they don't map to 1022 // any attachment. 1023 const bool isUsedWithDynamicRendering; 1024 std::unordered_map<uint32_t, uint32_t> inputIndexToColorIndex; 1025 int32_t depthInputIndex = -1; 1026 int32_t stencilInputIndex = -1; 1027 1028 // With render passes objects, all formats are derived early from 1029 // VkSubpassDescription::pInputAttachments. 1030 std::vector<vk::Format> inputAttachmentFormats; 1031 }; 1032 1033 // The SpirvEmitter class translates the parsed SPIR-V shader into Reactor code. 1034 class SpirvEmitter 1035 { 1036 using Type = Spirv::Type; 1037 using Object = Spirv::Object; 1038 using Block = Spirv::Block; 1039 using InsnIterator = Spirv::InsnIterator; 1040 using Decorations = Spirv::Decorations; 1041 using Span = Spirv::Span; 1042 1043 public: 1044 static void emit(const SpirvShader &shader, 1045 SpirvRoutine *routine, 1046 Spirv::Function::ID entryPoint, 1047 RValue<SIMD::Int> activeLaneMask, 1048 RValue<SIMD::Int> storesAndAtomicsMask, 1049 const vk::Attachments *attachments, 1050 const vk::DescriptorSet::Bindings &descriptorSets, 1051 unsigned int multiSampleCount); 1052 1053 // Helper for calling rr::Yield with result cast to an rr::Int. 1054 enum class YieldResult 1055 { 1056 ControlBarrier = 0, 1057 }; 1058 1059 private: 1060 SpirvEmitter(const SpirvShader &shader, 1061 SpirvRoutine *routine, 1062 Spirv::Function::ID entryPoint, 1063 RValue<SIMD::Int> activeLaneMask, 1064 RValue<SIMD::Int> storesAndAtomicsMask, 1065 const vk::Attachments *attachments, 1066 const vk::DescriptorSet::Bindings &descriptorSets, 1067 unsigned int multiSampleCount); 1068 1069 // Returns the mask describing the active lanes as updated by dynamic 1070 // control flow. Active lanes include helper invocations, used for 1071 // calculating fragment derivitives, which must not perform memory 1072 // stores or atomic writes. 1073 // 1074 // Use activeStoresAndAtomicsMask() to consider both control flow and 1075 // lanes which are permitted to perform memory stores and atomic 1076 // operations activeLaneMask() const1077 RValue<SIMD::Int> activeLaneMask() const 1078 { 1079 ASSERT(activeLaneMaskValue != nullptr); 1080 return RValue<SIMD::Int>(activeLaneMaskValue); 1081 } 1082 1083 // Returns the immutable lane mask that describes which lanes are 1084 // permitted to perform memory stores and atomic operations. 1085 // Note that unlike activeStoresAndAtomicsMask() this mask *does not* 1086 // consider lanes that have been made inactive due to control flow. storesAndAtomicsMask() const1087 RValue<SIMD::Int> storesAndAtomicsMask() const 1088 { 1089 ASSERT(storesAndAtomicsMaskValue != nullptr); 1090 return RValue<SIMD::Int>(storesAndAtomicsMaskValue); 1091 } 1092 1093 // Returns a lane mask that describes which lanes are permitted to 1094 // perform memory stores and atomic operations, considering lanes that 1095 // may have been made inactive due to control flow. activeStoresAndAtomicsMask() const1096 RValue<SIMD::Int> activeStoresAndAtomicsMask() const 1097 { 1098 return activeLaneMask() & storesAndAtomicsMask(); 1099 } 1100 1101 // Add a new active lane mask edge from the current block to out. 1102 // The edge mask value will be (mask AND activeLaneMaskValue). 1103 // If multiple active lane masks are added for the same edge, then 1104 // they will be ORed together. 1105 void addOutputActiveLaneMaskEdge(Block::ID out, RValue<SIMD::Int> mask); 1106 1107 // Add a new active lane mask for the edge from -> to. 1108 // If multiple active lane masks are added for the same edge, then 1109 // they will be ORed together. 1110 void addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask); 1111 1112 // OpImageSample variants 1113 enum Variant : uint32_t 1114 { 1115 None, // No Dref or Proj. Also used by OpImageFetch and OpImageQueryLod. 1116 Dref, 1117 Proj, 1118 ProjDref, 1119 VARIANT_LAST = ProjDref 1120 }; 1121 1122 // Compact representation of image instruction state that is passed to the 1123 // trampoline function for retrieving/generating the corresponding sampling routine. 1124 struct ImageInstructionSignature 1125 { ImageInstructionSignaturesw::SpirvEmitter::ImageInstructionSignature1126 ImageInstructionSignature(Variant variant, SamplerMethod samplerMethod) 1127 { 1128 this->variant = variant; 1129 this->samplerMethod = samplerMethod; 1130 } 1131 1132 // Unmarshal from raw 32-bit data ImageInstructionSignaturesw::SpirvEmitter::ImageInstructionSignature1133 explicit ImageInstructionSignature(uint32_t signature) 1134 : signature(signature) 1135 {} 1136 getSamplerFunctionsw::SpirvEmitter::ImageInstructionSignature1137 SamplerFunction getSamplerFunction() const 1138 { 1139 return { samplerMethod, offset != 0, sample != 0 }; 1140 } 1141 isDrefsw::SpirvEmitter::ImageInstructionSignature1142 bool isDref() const 1143 { 1144 return (variant == Dref) || (variant == ProjDref); 1145 } 1146 isProjsw::SpirvEmitter::ImageInstructionSignature1147 bool isProj() const 1148 { 1149 return (variant == Proj) || (variant == ProjDref); 1150 } 1151 hasLodsw::SpirvEmitter::ImageInstructionSignature1152 bool hasLod() const 1153 { 1154 return samplerMethod == Lod || samplerMethod == Fetch; // We always pass a Lod operand for Fetch operations. 1155 } 1156 hasGradsw::SpirvEmitter::ImageInstructionSignature1157 bool hasGrad() const 1158 { 1159 return samplerMethod == Grad; 1160 } 1161 1162 union 1163 { 1164 struct 1165 { 1166 Variant variant : BITS(VARIANT_LAST); 1167 SamplerMethod samplerMethod : BITS(SAMPLER_METHOD_LAST); 1168 uint32_t gatherComponent : 2; 1169 uint32_t dim : BITS(spv::DimSubpassData); // spv::Dim 1170 uint32_t arrayed : 1; 1171 uint32_t imageFormat : BITS(spv::ImageFormatR64i); // spv::ImageFormat 1172 1173 // Parameters are passed to the sampling routine in this order: 1174 uint32_t coordinates : 3; // 1-4 (does not contain projection component) 1175 /* uint32_t dref : 1; */ // Indicated by Variant::ProjDref|Dref 1176 /* uint32_t lodOrBias : 1; */ // Indicated by SamplerMethod::Lod|Bias|Fetch 1177 uint32_t grad : 2; // 0-3 components (for each of dx / dy) 1178 uint32_t offset : 2; // 0-3 components 1179 uint32_t sample : 1; // 0-1 scalar integer 1180 }; 1181 1182 uint32_t signature = 0; 1183 }; 1184 }; 1185 1186 // This gets stored as a literal in the generated code, so it should be compact. 1187 static_assert(sizeof(ImageInstructionSignature) == sizeof(uint32_t), "ImageInstructionSignature must be 32-bit"); 1188 1189 struct ImageInstruction : public ImageInstructionSignature 1190 { 1191 ImageInstruction(InsnIterator insn, const Spirv &shader, const SpirvEmitter &state); 1192 1193 const uint32_t position; 1194 1195 Type::ID resultTypeId = 0; 1196 Object::ID resultId = 0; 1197 Object::ID imageId = 0; 1198 Object::ID samplerId = 0; 1199 Object::ID coordinateId = 0; 1200 Object::ID texelId = 0; 1201 Object::ID drefId = 0; 1202 Object::ID lodOrBiasId = 0; 1203 Object::ID gradDxId = 0; 1204 Object::ID gradDyId = 0; 1205 Object::ID offsetId = 0; 1206 Object::ID sampleId = 0; 1207 1208 private: 1209 static ImageInstructionSignature parseVariantAndMethod(InsnIterator insn); 1210 static uint32_t getImageOperandsIndex(InsnIterator insn); 1211 static uint32_t getImageOperandsMask(InsnIterator insn); 1212 }; 1213 1214 class SampledImagePointer : public SIMD::Pointer 1215 { 1216 public: SampledImagePointer(SIMD::Pointer image,Object::ID sampler)1217 SampledImagePointer(SIMD::Pointer image, Object::ID sampler) 1218 : SIMD::Pointer(image) 1219 , samplerId(sampler) 1220 {} 1221 Object::ID samplerId; 1222 }; 1223 1224 // Generic wrapper over either per-lane intermediate value, or a constant. 1225 // Constants are transparently widened to per-lane values in operator[]. 1226 // This is appropriate in most cases -- if we're not going to do something 1227 // significantly different based on whether the value is uniform across lanes. 1228 class Operand 1229 { 1230 public: 1231 Operand(const Spirv &shader, const SpirvEmitter &state, Object::ID objectId); 1232 Operand(const Intermediate &value); 1233 Float(uint32_t i) const1234 RValue<SIMD::Float> Float(uint32_t i) const 1235 { 1236 ASSERT(i < componentCount); 1237 1238 if(intermediate) 1239 { 1240 return intermediate->Float(i); 1241 } 1242 1243 // Constructing a constant SIMD::Float is not guaranteed to preserve the data's exact 1244 // bit pattern, but SPIR-V provides 32-bit words representing "the bit pattern for the constant". 1245 // Thus we must first construct an integer constant, and bitcast to float. 1246 return As<SIMD::Float>(SIMD::UInt(constant[i])); 1247 } 1248 Int(uint32_t i) const1249 RValue<SIMD::Int> Int(uint32_t i) const 1250 { 1251 ASSERT(i < componentCount); 1252 1253 if(intermediate) 1254 { 1255 return intermediate->Int(i); 1256 } 1257 1258 return SIMD::Int(constant[i]); 1259 } 1260 UInt(uint32_t i) const1261 RValue<SIMD::UInt> UInt(uint32_t i) const 1262 { 1263 ASSERT(i < componentCount); 1264 1265 if(intermediate) 1266 { 1267 return intermediate->UInt(i); 1268 } 1269 1270 return SIMD::UInt(constant[i]); 1271 } 1272 Pointer() const1273 const SIMD::Pointer &Pointer() const 1274 { 1275 ASSERT(intermediate == nullptr); 1276 1277 return *pointer; 1278 } 1279 isPointer() const1280 bool isPointer() const 1281 { 1282 return (pointer != nullptr); 1283 } 1284 SampledImage() const1285 const SampledImagePointer &SampledImage() const 1286 { 1287 ASSERT(intermediate == nullptr); 1288 1289 return *sampledImage; 1290 } 1291 isSampledImage() const1292 bool isSampledImage() const 1293 { 1294 return (sampledImage != nullptr); 1295 } 1296 1297 private: 1298 RR_PRINT_ONLY(friend struct rr::PrintValue::Ty<Operand>;) 1299 1300 // Delegate constructor 1301 Operand(const SpirvEmitter &state, const Object &object); 1302 1303 const uint32_t *constant = nullptr; 1304 const Intermediate *intermediate = nullptr; 1305 const SIMD::Pointer *pointer = nullptr; 1306 const SampledImagePointer *sampledImage = nullptr; 1307 1308 public: 1309 const uint32_t componentCount; 1310 }; 1311 RR_PRINT_ONLY(friend struct rr::PrintValue::Ty<Operand>;)1312 RR_PRINT_ONLY(friend struct rr::PrintValue::Ty<Operand>;) 1313 1314 Intermediate &createIntermediate(Object::ID id, uint32_t componentCount) 1315 { 1316 auto it = intermediates.emplace(std::piecewise_construct, 1317 std::forward_as_tuple(id), 1318 std::forward_as_tuple(componentCount)); 1319 ASSERT_MSG(it.second, "Intermediate %d created twice", id.value()); 1320 return it.first->second; 1321 } 1322 getIntermediate(Object::ID id) const1323 const Intermediate &getIntermediate(Object::ID id) const 1324 { 1325 auto it = intermediates.find(id); 1326 ASSERT_MSG(it != intermediates.end(), "Unknown intermediate %d", id.value()); 1327 return it->second; 1328 } 1329 createPointer(Object::ID id,SIMD::Pointer ptr)1330 void createPointer(Object::ID id, SIMD::Pointer ptr) 1331 { 1332 bool added = pointers.emplace(id, ptr).second; 1333 ASSERT_MSG(added, "Pointer %d created twice", id.value()); 1334 } 1335 getPointer(Object::ID id) const1336 const SIMD::Pointer &getPointer(Object::ID id) const 1337 { 1338 auto it = pointers.find(id); 1339 ASSERT_MSG(it != pointers.end(), "Unknown pointer %d", id.value()); 1340 return it->second; 1341 } 1342 createSampledImage(Object::ID id,SampledImagePointer ptr)1343 void createSampledImage(Object::ID id, SampledImagePointer ptr) 1344 { 1345 bool added = sampledImages.emplace(id, ptr).second; 1346 ASSERT_MSG(added, "Sampled image %d created twice", id.value()); 1347 } 1348 getSampledImage(Object::ID id) const1349 const SampledImagePointer &getSampledImage(Object::ID id) const 1350 { 1351 auto it = sampledImages.find(id); 1352 ASSERT_MSG(it != sampledImages.end(), "Unknown sampled image %d", id.value()); 1353 return it->second; 1354 } 1355 isSampledImage(Object::ID id) const1356 bool isSampledImage(Object::ID id) const 1357 { 1358 return sampledImages.find(id) != sampledImages.end(); 1359 } 1360 getImage(Object::ID id) const1361 const SIMD::Pointer &getImage(Object::ID id) const 1362 { 1363 return isSampledImage(id) ? getSampledImage(id) : getPointer(id); 1364 } 1365 1366 void EmitVariable(InsnIterator insn); 1367 void EmitLoad(InsnIterator insn); 1368 void EmitStore(InsnIterator insn); 1369 void EmitAccessChain(InsnIterator insn); 1370 void EmitCompositeConstruct(InsnIterator insn); 1371 void EmitCompositeInsert(InsnIterator insn); 1372 void EmitCompositeExtract(InsnIterator insn); 1373 void EmitVectorShuffle(InsnIterator insn); 1374 void EmitVectorTimesScalar(InsnIterator insn); 1375 void EmitMatrixTimesVector(InsnIterator insn); 1376 void EmitVectorTimesMatrix(InsnIterator insn); 1377 void EmitMatrixTimesMatrix(InsnIterator insn); 1378 void EmitOuterProduct(InsnIterator insn); 1379 void EmitTranspose(InsnIterator insn); 1380 void EmitVectorExtractDynamic(InsnIterator insn); 1381 void EmitVectorInsertDynamic(InsnIterator insn); 1382 void EmitUnaryOp(InsnIterator insn); 1383 void EmitBinaryOp(InsnIterator insn); 1384 void EmitDot(InsnIterator insn); 1385 void EmitSelect(InsnIterator insn); 1386 void EmitExtendedInstruction(InsnIterator insn); 1387 void EmitExtGLSLstd450(InsnIterator insn); 1388 void EmitAny(InsnIterator insn); 1389 void EmitAll(InsnIterator insn); 1390 void EmitBranch(InsnIterator insn); 1391 void EmitBranchConditional(InsnIterator insn); 1392 void EmitSwitch(InsnIterator insn); 1393 void EmitUnreachable(InsnIterator insn); 1394 void EmitReturn(InsnIterator insn); 1395 void EmitTerminateInvocation(InsnIterator insn); 1396 void EmitDemoteToHelperInvocation(InsnIterator insn); 1397 void EmitIsHelperInvocation(InsnIterator insn); 1398 void EmitFunctionCall(InsnIterator insn); 1399 void EmitPhi(InsnIterator insn); 1400 void EmitImageSample(const ImageInstruction &instruction); 1401 void EmitImageQuerySizeLod(InsnIterator insn); 1402 void EmitImageQuerySize(InsnIterator insn); 1403 void EmitImageQueryLevels(InsnIterator insn); 1404 void EmitImageQuerySamples(InsnIterator insn); 1405 void EmitImageRead(const ImageInstruction &instruction); 1406 void EmitImageWrite(const ImageInstruction &instruction); 1407 void EmitImageTexelPointer(const ImageInstruction &instruction); 1408 void EmitAtomicOp(InsnIterator insn); 1409 void EmitAtomicCompareExchange(InsnIterator insn); 1410 void EmitSampledImage(InsnIterator insn); 1411 void EmitImage(InsnIterator insn); 1412 void EmitCopyObject(InsnIterator insn); 1413 void EmitCopyMemory(InsnIterator insn); 1414 void EmitControlBarrier(InsnIterator insn); 1415 void EmitMemoryBarrier(InsnIterator insn); 1416 void EmitGroupNonUniform(InsnIterator insn); 1417 void EmitArrayLength(InsnIterator insn); 1418 void EmitBitcastPointer(Object::ID resultID, Operand &src); 1419 1420 enum InterpolationType 1421 { 1422 Centroid, 1423 AtSample, 1424 AtOffset, 1425 }; 1426 SIMD::Float EmitInterpolate(const SIMD::Pointer &ptr, int32_t location, Object::ID paramId, 1427 uint32_t component, InterpolationType type) const; 1428 1429 SIMD::Pointer WalkExplicitLayoutAccessChain(Object::ID id, Object::ID elementId, const Span &indexIds, bool nonUniform) const; 1430 SIMD::Pointer WalkAccessChain(Object::ID id, Object::ID elementId, const Span &indexIds, bool nonUniform) const; 1431 1432 // Returns true if data in the given storage class is word-interleaved 1433 // by each SIMD vector lane, otherwise data is stored linerally. 1434 // 1435 // Each lane addresses a single word, picked by a base pointer and an 1436 // integer offset. 1437 // 1438 // A word is currently 32 bits (single float, int32_t, uint32_t). 1439 // A lane is a single element of a SIMD vector register. 1440 // 1441 // Storage interleaved by lane - (IsStorageInterleavedByLane() == true): 1442 // --------------------------------------------------------------------- 1443 // 1444 // Address = PtrBase + sizeof(Word) * (SIMD::Width * LaneOffset + LaneIndex) 1445 // 1446 // Assuming SIMD::Width == 4: 1447 // 1448 // Lane[0] | Lane[1] | Lane[2] | Lane[3] 1449 // ===========+===========+===========+========== 1450 // LaneOffset=0: | Word[0] | Word[1] | Word[2] | Word[3] 1451 // ---------------+-----------+-----------+-----------+---------- 1452 // LaneOffset=1: | Word[4] | Word[5] | Word[6] | Word[7] 1453 // ---------------+-----------+-----------+-----------+---------- 1454 // LaneOffset=2: | Word[8] | Word[9] | Word[a] | Word[b] 1455 // ---------------+-----------+-----------+-----------+---------- 1456 // LaneOffset=3: | Word[c] | Word[d] | Word[e] | Word[f] 1457 // 1458 // 1459 // Linear storage - (IsStorageInterleavedByLane() == false): 1460 // --------------------------------------------------------- 1461 // 1462 // Address = PtrBase + sizeof(Word) * LaneOffset 1463 // 1464 // Lane[0] | Lane[1] | Lane[2] | Lane[3] 1465 // ===========+===========+===========+========== 1466 // LaneOffset=0: | Word[0] | Word[0] | Word[0] | Word[0] 1467 // ---------------+-----------+-----------+-----------+---------- 1468 // LaneOffset=1: | Word[1] | Word[1] | Word[1] | Word[1] 1469 // ---------------+-----------+-----------+-----------+---------- 1470 // LaneOffset=2: | Word[2] | Word[2] | Word[2] | Word[2] 1471 // ---------------+-----------+-----------+-----------+---------- 1472 // LaneOffset=3: | Word[3] | Word[3] | Word[3] | Word[3] 1473 // 1474 1475 static bool IsStorageInterleavedByLane(spv::StorageClass storageClass); 1476 static SIMD::Pointer GetElementPointer(SIMD::Pointer structure, uint32_t offset, spv::StorageClass storageClass); 1477 1478 // Returns a SIMD::Pointer to the underlying data for the given pointer 1479 // object. 1480 // Handles objects of the following kinds: 1481 // - DescriptorSet 1482 // - Pointer 1483 // - InterfaceVariable 1484 // Calling GetPointerToData with objects of any other kind will assert. 1485 SIMD::Pointer GetPointerToData(Object::ID id, SIMD::Int arrayIndex, bool nonUniform) const; 1486 void OffsetToElement(SIMD::Pointer &ptr, Object::ID elementId, int32_t arrayStride) const; 1487 1488 /* image istructions */ 1489 1490 // Emits code to sample an image, regardless of whether any SIMD lanes are active. 1491 void EmitImageSampleUnconditional(Array<SIMD::Float> &out, const ImageInstruction &instruction) const; 1492 1493 Pointer<Byte> getSamplerDescriptor(Pointer<Byte> imageDescriptor, const ImageInstruction &instruction) const; 1494 Pointer<Byte> getSamplerDescriptor(Pointer<Byte> imageDescriptor, const ImageInstruction &instruction, int laneIdx) const; 1495 Pointer<Byte> lookupSamplerFunction(Pointer<Byte> imageDescriptor, Pointer<Byte> samplerDescriptor, const ImageInstruction &instruction) const; 1496 void callSamplerFunction(Pointer<Byte> samplerFunction, Array<SIMD::Float> &out, Pointer<Byte> imageDescriptor, const ImageInstruction &instruction) const; 1497 1498 void GetImageDimensions(const Type &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const; 1499 struct TexelAddressData 1500 { 1501 bool isArrayed; 1502 spv::Dim dim; 1503 int dims, texelSize; 1504 SIMD::Int u, v, w, ptrOffset; 1505 }; 1506 static TexelAddressData setupTexelAddressData(SIMD::Int rowPitch, SIMD::Int slicePitch, SIMD::Int samplePitch, ImageInstructionSignature instruction, SIMD::Int coordinate[], SIMD::Int sample, vk::Format imageFormat, const SpirvRoutine *routine); 1507 static SIMD::Pointer GetNonUniformTexelAddress(ImageInstructionSignature instruction, SIMD::Pointer descriptor, SIMD::Int coordinate[], SIMD::Int sample, vk::Format imageFormat, OutOfBoundsBehavior outOfBoundsBehavior, SIMD::Int activeLaneMask, const SpirvRoutine *routine); 1508 static SIMD::Pointer GetTexelAddress(ImageInstructionSignature instruction, Pointer<Byte> descriptor, SIMD::Int coordinate[], SIMD::Int sample, vk::Format imageFormat, OutOfBoundsBehavior outOfBoundsBehavior, const SpirvRoutine *routine); 1509 static void WriteImage(ImageInstructionSignature instruction, Pointer<Byte> descriptor, const Pointer<SIMD::Int> &coord, const Pointer<SIMD::Int> &texelAndMask, vk::Format imageFormat); 1510 1511 /* control flow */ 1512 1513 // Lookup the active lane mask for the edge from -> to. 1514 // If from is unreachable, then a mask of all zeros is returned. 1515 // Asserts if from is reachable and the edge does not exist. 1516 RValue<SIMD::Int> GetActiveLaneMaskEdge(Block::ID from, Block::ID to) const; 1517 1518 // Updates the current active lane mask. 1519 void SetActiveLaneMask(RValue<SIMD::Int> mask); 1520 void SetStoresAndAtomicsMask(RValue<SIMD::Int> mask); 1521 1522 // Emit all the unvisited blocks (except for ignore) in DFS order, 1523 // starting with id. 1524 void EmitBlocks(Block::ID id, Block::ID ignore = 0); 1525 void EmitNonLoop(); 1526 void EmitLoop(); 1527 1528 void EmitInstructions(InsnIterator begin, InsnIterator end); 1529 void EmitInstruction(InsnIterator insn); 1530 1531 // Helper for implementing OpStore, which doesn't take an InsnIterator so it 1532 // can also store independent operands. 1533 void Store(Object::ID pointerId, const Operand &value, bool atomic, std::memory_order memoryOrder) const; 1534 1535 // LoadPhi loads the phi values from the alloca storage and places the 1536 // load values into the intermediate with the phi's result id. 1537 void LoadPhi(InsnIterator insn); 1538 1539 // StorePhi updates the phi's alloca storage value using the incoming 1540 // values from blocks that are both in the OpPhi instruction and in 1541 // filter. 1542 void StorePhi(Block::ID blockID, InsnIterator insn, const std::unordered_set<Block::ID> &filter); 1543 1544 // Emits a rr::Fence for the given MemorySemanticsMask. 1545 void Fence(spv::MemorySemanticsMask semantics) const; 1546 1547 void Yield(YieldResult res) const; 1548 1549 // Helper as we often need to take dot products as part of doing other things. 1550 static SIMD::Float FDot(unsigned numComponents, const Operand &x, const Operand &y); 1551 static SIMD::Int SDot(unsigned numComponents, const Operand &x, const Operand &y, const Operand *accum); 1552 static SIMD::UInt UDot(unsigned numComponents, const Operand &x, const Operand &y, const Operand *accum); 1553 static SIMD::Int SUDot(unsigned numComponents, const Operand &x, const Operand &y, const Operand *accum); 1554 static SIMD::Int AddSat(RValue<SIMD::Int> a, RValue<SIMD::Int> b); 1555 static SIMD::UInt AddSat(RValue<SIMD::UInt> a, RValue<SIMD::UInt> b); 1556 1557 using ImageSampler = void(void *texture, void *uvsIn, void *texelOut, void *constants); 1558 static ImageSampler *getImageSampler(const vk::Device *device, uint32_t signature, uint32_t samplerId, uint32_t imageViewId); 1559 static std::shared_ptr<rr::Routine> emitSamplerRoutine(ImageInstructionSignature instruction, const Sampler &samplerState); 1560 static std::shared_ptr<rr::Routine> emitWriteRoutine(ImageInstructionSignature instruction, const Sampler &samplerState); 1561 1562 // TODO(b/129523279): Eliminate conversion and use vk::Sampler members directly. 1563 static sw::FilterType convertFilterMode(const vk::SamplerState *samplerState, VkImageViewType imageViewType, SamplerMethod samplerMethod); 1564 static sw::MipmapType convertMipmapMode(const vk::SamplerState *samplerState); 1565 static sw::AddressingMode convertAddressingMode(int coordinateIndex, const vk::SamplerState *samplerState, VkImageViewType imageViewType); 1566 1567 const SpirvShader &shader; 1568 SpirvRoutine *const routine; // The current routine being built. 1569 Spirv::Function::ID function; // The current function being built. 1570 Block::ID block; // The current block being built. 1571 rr::Value *activeLaneMaskValue = nullptr; // The current active lane mask. 1572 rr::Value *storesAndAtomicsMaskValue = nullptr; // The current atomics mask. 1573 Spirv::Block::Set visited; // Blocks already built. 1574 std::unordered_map<Block::Edge, RValue<SIMD::Int>, Block::Edge::Hash> edgeActiveLaneMasks; 1575 std::deque<Block::ID> *pending; 1576 1577 const vk::Attachments *attachments; 1578 const vk::DescriptorSet::Bindings &descriptorSets; 1579 1580 std::unordered_map<Object::ID, Intermediate> intermediates; 1581 std::unordered_map<Object::ID, std::vector<SIMD::Float>> phis; 1582 std::unordered_map<Object::ID, SIMD::Pointer> pointers; 1583 std::unordered_map<Object::ID, SampledImagePointer> sampledImages; 1584 1585 const unsigned int multiSampleCount; 1586 }; 1587 1588 class SpirvRoutine 1589 { 1590 using Object = Spirv::Object; 1591 1592 public: 1593 SpirvRoutine(const vk::PipelineLayout *pipelineLayout); 1594 1595 using Variable = Array<SIMD::Float>; 1596 1597 // Single-entry 'inline' sampler routine cache. 1598 struct SamplerCache 1599 { 1600 Pointer<Byte> imageDescriptor = nullptr; 1601 Int samplerId; 1602 1603 Pointer<Byte> function; 1604 }; 1605 1606 enum Interpolation 1607 { 1608 Perspective = 0, 1609 Linear, 1610 Flat, 1611 }; 1612 1613 struct InterpolationData 1614 { 1615 Pointer<Byte> primitive; 1616 SIMD::Float x; 1617 SIMD::Float y; 1618 SIMD::Float rhw; 1619 SIMD::Float xCentroid; 1620 SIMD::Float yCentroid; 1621 SIMD::Float rhwCentroid; 1622 }; 1623 1624 const vk::PipelineLayout *const pipelineLayout; 1625 1626 std::unordered_map<Object::ID, Variable> variables; 1627 std::unordered_map<uint32_t, SamplerCache> samplerCache; // Indexed by the instruction position, in words. 1628 SIMD::Float inputs[MAX_INTERFACE_COMPONENTS]; 1629 Interpolation inputsInterpolation[MAX_INTERFACE_COMPONENTS]; 1630 SIMD::Float outputs[MAX_INTERFACE_COMPONENTS]; 1631 InterpolationData interpolationData; 1632 1633 Pointer<Byte> device; 1634 Pointer<Byte> workgroupMemory; 1635 Pointer<Pointer<Byte>> descriptorSets; 1636 Pointer<Int> descriptorDynamicOffsets; 1637 Pointer<Byte> pushConstants; 1638 Pointer<Byte> constants; 1639 Int discardMask = 0; 1640 1641 // Shader invocation state. 1642 // Not all of these variables are used for every type of shader, and some 1643 // are only used when debugging. See b/146486064 for more information. 1644 // Give careful consideration to the runtime performance loss before adding 1645 // more state here. 1646 std::array<SIMD::Int, 2> windowSpacePosition; // TODO(b/236162233): SIMD::Int2 1647 Int layer; // slice offset into input attachments for multiview, even if the shader doesn't use ViewIndex 1648 Int instanceID; 1649 SIMD::Int vertexIndex; 1650 std::array<SIMD::Float, 4> fragCoord; // TODO(b/236162233): SIMD::Float4 1651 std::array<SIMD::Float, 2> pointCoord; // TODO(b/236162233): SIMD::Float2 1652 SIMD::Int helperInvocation; 1653 Int4 numWorkgroups; 1654 Int4 workgroupID; 1655 Int4 workgroupSize; 1656 Int subgroupsPerWorkgroup; 1657 Int invocationsPerSubgroup; 1658 Int subgroupIndex; 1659 SIMD::Int localInvocationIndex; 1660 std::array<SIMD::Int, 3> localInvocationID; // TODO(b/236162233): SIMD::Int3 1661 std::array<SIMD::Int, 3> globalInvocationID; // TODO(b/236162233): SIMD::Int3 1662 createVariable(Object::ID id,uint32_t componentCount)1663 void createVariable(Object::ID id, uint32_t componentCount) 1664 { 1665 bool added = variables.emplace(id, Variable(componentCount)).second; 1666 ASSERT_MSG(added, "Variable %d created twice", id.value()); 1667 } 1668 getVariable(Object::ID id)1669 Variable &getVariable(Object::ID id) 1670 { 1671 auto it = variables.find(id); 1672 ASSERT_MSG(it != variables.end(), "Unknown variables %d", id.value()); 1673 return it->second; 1674 } 1675 1676 // setImmutableInputBuiltins() sets all the immutable input builtins, 1677 // common for all shader types. 1678 void setImmutableInputBuiltins(const SpirvShader *shader); 1679 1680 static SIMD::Float interpolateAtXY(const SIMD::Float &x, const SIMD::Float &y, const SIMD::Float &rhw, Pointer<Byte> planeEquation, Interpolation interpolation); 1681 1682 // setInputBuiltin() calls f() with the builtin and value if the shader 1683 // uses the input builtin, otherwise the call is a no-op. 1684 // F is a function with the signature: 1685 // void(const Spirv::BuiltinMapping& builtin, Array<SIMD::Float>& value) 1686 template<typename F> setInputBuiltin(const SpirvShader * shader,spv::BuiltIn id,F && f)1687 inline void setInputBuiltin(const SpirvShader *shader, spv::BuiltIn id, F &&f) 1688 { 1689 auto it = shader->inputBuiltins.find(id); 1690 if(it != shader->inputBuiltins.end()) 1691 { 1692 const auto &builtin = it->second; 1693 f(builtin, getVariable(builtin.Id)); 1694 } 1695 } 1696 }; 1697 1698 } // namespace sw 1699 1700 #endif // sw_SpirvShader_hpp 1701