1 // Copyright 2019 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "SpirvShader.hpp"
16
17 #include "System/Types.hpp"
18
19 #include "Vulkan/VkDescriptorSetLayout.hpp"
20 #include "Vulkan/VkPipelineLayout.hpp"
21
22 #include <spirv/unified1/spirv.hpp>
23
24 namespace sw {
25
SpirvFormatToVulkanFormat(spv::ImageFormat format)26 static vk::Format SpirvFormatToVulkanFormat(spv::ImageFormat format)
27 {
28 switch(format)
29 {
30 case spv::ImageFormatUnknown: return VK_FORMAT_UNDEFINED;
31 case spv::ImageFormatRgba32f: return VK_FORMAT_R32G32B32A32_SFLOAT;
32 case spv::ImageFormatRgba16f: return VK_FORMAT_R16G16B16A16_SFLOAT;
33 case spv::ImageFormatR32f: return VK_FORMAT_R32_SFLOAT;
34 case spv::ImageFormatRgba8: return VK_FORMAT_R8G8B8A8_UNORM;
35 case spv::ImageFormatRgba8Snorm: return VK_FORMAT_R8G8B8A8_SNORM;
36 case spv::ImageFormatRg32f: return VK_FORMAT_R32G32_SFLOAT;
37 case spv::ImageFormatRg16f: return VK_FORMAT_R16G16_SFLOAT;
38 case spv::ImageFormatR11fG11fB10f: return VK_FORMAT_B10G11R11_UFLOAT_PACK32;
39 case spv::ImageFormatR16f: return VK_FORMAT_R16_SFLOAT;
40 case spv::ImageFormatRgba16: return VK_FORMAT_R16G16B16A16_UNORM;
41 case spv::ImageFormatRgb10A2: return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
42 case spv::ImageFormatRg16: return VK_FORMAT_R16G16_UNORM;
43 case spv::ImageFormatRg8: return VK_FORMAT_R8G8_UNORM;
44 case spv::ImageFormatR16: return VK_FORMAT_R16_UNORM;
45 case spv::ImageFormatR8: return VK_FORMAT_R8_UNORM;
46 case spv::ImageFormatRgba16Snorm: return VK_FORMAT_R16G16B16A16_SNORM;
47 case spv::ImageFormatRg16Snorm: return VK_FORMAT_R16G16_SNORM;
48 case spv::ImageFormatRg8Snorm: return VK_FORMAT_R8G8_SNORM;
49 case spv::ImageFormatR16Snorm: return VK_FORMAT_R16_SNORM;
50 case spv::ImageFormatR8Snorm: return VK_FORMAT_R8_SNORM;
51 case spv::ImageFormatRgba32i: return VK_FORMAT_R32G32B32A32_SINT;
52 case spv::ImageFormatRgba16i: return VK_FORMAT_R16G16B16A16_SINT;
53 case spv::ImageFormatRgba8i: return VK_FORMAT_R8G8B8A8_SINT;
54 case spv::ImageFormatR32i: return VK_FORMAT_R32_SINT;
55 case spv::ImageFormatRg32i: return VK_FORMAT_R32G32_SINT;
56 case spv::ImageFormatRg16i: return VK_FORMAT_R16G16_SINT;
57 case spv::ImageFormatRg8i: return VK_FORMAT_R8G8_SINT;
58 case spv::ImageFormatR16i: return VK_FORMAT_R16_SINT;
59 case spv::ImageFormatR8i: return VK_FORMAT_R8_SINT;
60 case spv::ImageFormatRgba32ui: return VK_FORMAT_R32G32B32A32_UINT;
61 case spv::ImageFormatRgba16ui: return VK_FORMAT_R16G16B16A16_UINT;
62 case spv::ImageFormatRgba8ui: return VK_FORMAT_R8G8B8A8_UINT;
63 case spv::ImageFormatR32ui: return VK_FORMAT_R32_UINT;
64 case spv::ImageFormatRgb10a2ui: return VK_FORMAT_A2B10G10R10_UINT_PACK32;
65 case spv::ImageFormatRg32ui: return VK_FORMAT_R32G32_UINT;
66 case spv::ImageFormatRg16ui: return VK_FORMAT_R16G16_UINT;
67 case spv::ImageFormatRg8ui: return VK_FORMAT_R8G8_UINT;
68 case spv::ImageFormatR16ui: return VK_FORMAT_R16_UINT;
69 case spv::ImageFormatR8ui: return VK_FORMAT_R8_UINT;
70
71 default:
72 UNSUPPORTED("SPIR-V ImageFormat %u", format);
73 return VK_FORMAT_UNDEFINED;
74 }
75 }
76
ImageInstruction(InsnIterator insn,const Spirv & shader,const SpirvEmitter & state)77 SpirvEmitter::ImageInstruction::ImageInstruction(InsnIterator insn, const Spirv &shader, const SpirvEmitter &state)
78 : ImageInstructionSignature(parseVariantAndMethod(insn))
79 , position(insn.distanceFrom(shader.begin()))
80 {
81 if(samplerMethod == Write)
82 {
83 imageId = insn.word(1);
84 coordinateId = insn.word(2);
85 texelId = insn.word(3);
86 }
87 else
88 {
89 resultTypeId = insn.resultTypeId(); // word(1)
90 resultId = insn.resultId(); // word(2)
91
92 if(samplerMethod == Fetch || samplerMethod == Read || samplerMethod == TexelPointer) // Samplerless
93 {
94 imageId = insn.word(3);
95 }
96 else
97 {
98 // sampledImageId is either the result of an OpSampledImage instruction or
99 // an externally combined sampler and image.
100 Object::ID sampledImageId = insn.word(3);
101
102 if(state.isSampledImage(sampledImageId)) // Result of an OpSampledImage instruction
103 {
104 const SampledImagePointer &sampledImage = state.getSampledImage(sampledImageId);
105 imageId = shader.getObject(sampledImageId).definition.word(3);
106 samplerId = sampledImage.samplerId;
107 }
108 else // Combined image/sampler
109 {
110 imageId = sampledImageId;
111 samplerId = sampledImageId;
112 }
113 }
114
115 coordinateId = insn.word(4);
116 }
117
118 // `imageId` can represent either a Sampled Image, a samplerless Image, or a pointer to an Image.
119 // To get to the OpTypeImage operands, traverse the OpTypeSampledImage or OpTypePointer.
120 const Type &imageObjectType = shader.getObjectType(imageId);
121 const Type &imageReferenceType = (imageObjectType.opcode() == spv::OpTypeSampledImage)
122 ? shader.getType(imageObjectType.definition.word(2))
123 : imageObjectType;
124 const Type &imageType = ((imageReferenceType.opcode() == spv::OpTypePointer)
125 ? shader.getType(imageReferenceType.element)
126 : imageReferenceType);
127
128 ASSERT(imageType.opcode() == spv::OpTypeImage);
129 dim = imageType.definition.word(3);
130 arrayed = imageType.definition.word(5);
131 imageFormat = imageType.definition.word(8);
132
133 const Object &coordinateObject = shader.getObject(coordinateId);
134 const Type &coordinateType = shader.getType(coordinateObject);
135 coordinates = coordinateType.componentCount - (isProj() ? 1 : 0);
136
137 if(samplerMethod == TexelPointer)
138 {
139 sampleId = insn.word(5);
140 sample = !shader.getObject(sampleId).isConstantZero();
141 }
142
143 if(isDref())
144 {
145 drefId = insn.word(5);
146 }
147
148 if(samplerMethod == Gather)
149 {
150 gatherComponent = !isDref() ? shader.getObject(insn.word(5)).constantValue[0] : 0;
151 }
152
153 uint32_t operandsIndex = getImageOperandsIndex(insn);
154 uint32_t imageOperands = (operandsIndex != 0) ? insn.word(operandsIndex) : 0; // The mask which indicates which operands are provided.
155
156 operandsIndex += 1; // Advance to the first actual operand <id> location.
157
158 if(imageOperands & spv::ImageOperandsBiasMask)
159 {
160 ASSERT(samplerMethod == Bias);
161 lodOrBiasId = insn.word(operandsIndex);
162 operandsIndex += 1;
163 imageOperands &= ~spv::ImageOperandsBiasMask;
164 }
165
166 if(imageOperands & spv::ImageOperandsLodMask)
167 {
168 ASSERT(samplerMethod == Lod || samplerMethod == Fetch);
169 lodOrBiasId = insn.word(operandsIndex);
170 operandsIndex += 1;
171 imageOperands &= ~spv::ImageOperandsLodMask;
172 }
173
174 if(imageOperands & spv::ImageOperandsGradMask)
175 {
176 ASSERT(samplerMethod == Grad);
177 gradDxId = insn.word(operandsIndex + 0);
178 gradDyId = insn.word(operandsIndex + 1);
179 operandsIndex += 2;
180 imageOperands &= ~spv::ImageOperandsGradMask;
181
182 grad = shader.getObjectType(gradDxId).componentCount;
183 }
184
185 if(imageOperands & spv::ImageOperandsConstOffsetMask)
186 {
187 offsetId = insn.word(operandsIndex);
188 operandsIndex += 1;
189 imageOperands &= ~spv::ImageOperandsConstOffsetMask;
190
191 offset = shader.getObjectType(offsetId).componentCount;
192 }
193
194 if(imageOperands & spv::ImageOperandsSampleMask)
195 {
196 ASSERT(samplerMethod == Fetch || samplerMethod == Read || samplerMethod == Write);
197 sampleId = insn.word(operandsIndex);
198 operandsIndex += 1;
199 imageOperands &= ~spv::ImageOperandsSampleMask;
200
201 sample = !shader.getObject(sampleId).isConstantZero();
202 }
203
204 // TODO(b/174475384)
205 if(imageOperands & spv::ImageOperandsZeroExtendMask)
206 {
207 ASSERT(samplerMethod == Read || samplerMethod == Write);
208 imageOperands &= ~spv::ImageOperandsZeroExtendMask;
209 }
210 else if(imageOperands & spv::ImageOperandsSignExtendMask)
211 {
212 ASSERT(samplerMethod == Read || samplerMethod == Write);
213 imageOperands &= ~spv::ImageOperandsSignExtendMask;
214 }
215
216 [[maybe_unused]] spv::Scope scope = spv::ScopeCrossDevice; // "Whilst the CrossDevice scope is defined in SPIR-V, it is disallowed in Vulkan."
217
218 if(imageOperands & spv::ImageOperandsMakeTexelAvailableMask)
219 {
220 scope = static_cast<spv::Scope>(insn.word(operandsIndex));
221 operandsIndex += 1;
222 imageOperands &= ~spv::ImageOperandsMakeTexelAvailableMask;
223 }
224
225 if(imageOperands & spv::ImageOperandsMakeTexelVisibleMask)
226 {
227 scope = static_cast<spv::Scope>(insn.word(operandsIndex));
228 operandsIndex += 1;
229 imageOperands &= ~spv::ImageOperandsMakeTexelVisibleMask;
230 }
231
232 if(imageOperands & spv::ImageOperandsNonPrivateTexelMask)
233 {
234 imageOperands &= ~spv::ImageOperandsNonPrivateTexelMask;
235 }
236
237 if(imageOperands & spv::ImageOperandsVolatileTexelMask)
238 {
239 UNIMPLEMENTED("b/176819536");
240 imageOperands &= ~spv::ImageOperandsVolatileTexelMask;
241 }
242
243 if(imageOperands & spv::ImageOperandsNontemporalMask)
244 {
245 // Hints that the accessed texels are not likely
246 // to be accessed again in the near future.
247 imageOperands &= ~spv::ImageOperandsNontemporalMask;
248 }
249
250 // There should be no remaining image operands.
251 if(imageOperands != 0)
252 {
253 UNSUPPORTED("Image operands 0x%08X", imageOperands);
254 }
255 }
256
parseVariantAndMethod(InsnIterator insn)257 SpirvEmitter::ImageInstructionSignature SpirvEmitter::ImageInstruction::parseVariantAndMethod(InsnIterator insn)
258 {
259 uint32_t imageOperands = getImageOperandsMask(insn);
260 bool bias = imageOperands & spv::ImageOperandsBiasMask;
261 bool grad = imageOperands & spv::ImageOperandsGradMask;
262
263 switch(insn.opcode())
264 {
265 case spv::OpImageSampleImplicitLod: return { None, bias ? Bias : Implicit };
266 case spv::OpImageSampleExplicitLod: return { None, grad ? Grad : Lod };
267 case spv::OpImageSampleDrefImplicitLod: return { Dref, bias ? Bias : Implicit };
268 case spv::OpImageSampleDrefExplicitLod: return { Dref, grad ? Grad : Lod };
269 case spv::OpImageSampleProjImplicitLod: return { Proj, bias ? Bias : Implicit };
270 case spv::OpImageSampleProjExplicitLod: return { Proj, grad ? Grad : Lod };
271 case spv::OpImageSampleProjDrefImplicitLod: return { ProjDref, bias ? Bias : Implicit };
272 case spv::OpImageSampleProjDrefExplicitLod: return { ProjDref, grad ? Grad : Lod };
273 case spv::OpImageGather: return { None, Gather };
274 case spv::OpImageDrefGather: return { Dref, Gather };
275 case spv::OpImageFetch: return { None, Fetch };
276 case spv::OpImageQueryLod: return { None, Query };
277 case spv::OpImageRead: return { None, Read };
278 case spv::OpImageWrite: return { None, Write };
279 case spv::OpImageTexelPointer: return { None, TexelPointer };
280
281 default:
282 ASSERT(false);
283 return { None, Implicit };
284 }
285 }
286
287 // Returns the instruction word index at which the Image Operands mask is located, or 0 if not present.
getImageOperandsIndex(InsnIterator insn)288 uint32_t SpirvEmitter::ImageInstruction::getImageOperandsIndex(InsnIterator insn)
289 {
290 switch(insn.opcode())
291 {
292 case spv::OpImageSampleImplicitLod:
293 case spv::OpImageSampleProjImplicitLod:
294 return insn.wordCount() > 5 ? 5 : 0; // Optional
295 case spv::OpImageSampleExplicitLod:
296 case spv::OpImageSampleProjExplicitLod:
297 return 5; // "Either Lod or Grad image operands must be present."
298 case spv::OpImageSampleDrefImplicitLod:
299 case spv::OpImageSampleProjDrefImplicitLod:
300 return insn.wordCount() > 6 ? 6 : 0; // Optional
301 case spv::OpImageSampleDrefExplicitLod:
302 case spv::OpImageSampleProjDrefExplicitLod:
303 return 6; // "Either Lod or Grad image operands must be present."
304 case spv::OpImageGather:
305 case spv::OpImageDrefGather:
306 return insn.wordCount() > 6 ? 6 : 0; // Optional
307 case spv::OpImageFetch:
308 return insn.wordCount() > 5 ? 5 : 0; // Optional
309 case spv::OpImageQueryLod:
310 ASSERT(insn.wordCount() == 5);
311 return 0; // No image operands.
312 case spv::OpImageRead:
313 return insn.wordCount() > 5 ? 5 : 0; // Optional
314 case spv::OpImageWrite:
315 return insn.wordCount() > 4 ? 4 : 0; // Optional
316 case spv::OpImageTexelPointer:
317 ASSERT(insn.wordCount() == 6);
318 return 0; // No image operands.
319
320 default:
321 ASSERT(false);
322 return 0;
323 }
324 }
325
getImageOperandsMask(InsnIterator insn)326 uint32_t SpirvEmitter::ImageInstruction::getImageOperandsMask(InsnIterator insn)
327 {
328 uint32_t operandsIndex = getImageOperandsIndex(insn);
329 return (operandsIndex != 0) ? insn.word(operandsIndex) : 0;
330 }
331
EmitImageSample(const ImageInstruction & instruction)332 void SpirvEmitter::EmitImageSample(const ImageInstruction &instruction)
333 {
334 auto &resultType = shader.getType(instruction.resultTypeId);
335 auto &result = createIntermediate(instruction.resultId, resultType.componentCount);
336 Array<SIMD::Float> out(4);
337
338 // TODO(b/153380916): When we're in a code path that is always executed,
339 // i.e. post-dominators of the entry block, we don't have to dynamically
340 // check whether any lanes are active, and can elide the jump.
341 If(AnyTrue(activeLaneMask()))
342 {
343 EmitImageSampleUnconditional(out, instruction);
344 }
345
346 for(auto i = 0u; i < resultType.componentCount; i++) { result.move(i, out[i]); }
347 }
348
EmitImageSampleUnconditional(Array<SIMD::Float> & out,const ImageInstruction & instruction) const349 void SpirvEmitter::EmitImageSampleUnconditional(Array<SIMD::Float> &out, const ImageInstruction &instruction) const
350 {
351 auto decorations = shader.GetDecorationsForId(instruction.imageId);
352
353 if(decorations.NonUniform)
354 {
355 SIMD::Int activeLaneMask = this->activeLaneMask();
356 SIMD::Pointer imagePointer = getImage(instruction.imageId);
357 // PerLane output
358 for(int laneIdx = 0; laneIdx < SIMD::Width; laneIdx++)
359 {
360 Array<SIMD::Float> laneOut(out.getArraySize());
361 If(Extract(activeLaneMask, laneIdx) != 0)
362 {
363 Pointer<Byte> imageDescriptor = imagePointer.getPointerForLane(laneIdx); // vk::SampledImageDescriptor*
364 Pointer<Byte> samplerDescriptor = getSamplerDescriptor(imageDescriptor, instruction, laneIdx);
365
366 Pointer<Byte> samplerFunction = lookupSamplerFunction(imageDescriptor, samplerDescriptor, instruction);
367
368 callSamplerFunction(samplerFunction, laneOut, imageDescriptor, instruction);
369 }
370
371 for(int outIdx = 0; outIdx < out.getArraySize(); outIdx++)
372 {
373 out[outIdx] = Insert(out[outIdx], Extract(laneOut[outIdx], laneIdx), laneIdx);
374 }
375 }
376 }
377 else
378 {
379 Pointer<Byte> imageDescriptor = getImage(instruction.imageId).getUniformPointer(); // vk::SampledImageDescriptor*
380 Pointer<Byte> samplerDescriptor = getSamplerDescriptor(imageDescriptor, instruction);
381
382 Pointer<Byte> samplerFunction = lookupSamplerFunction(imageDescriptor, samplerDescriptor, instruction);
383
384 callSamplerFunction(samplerFunction, out, imageDescriptor, instruction);
385 }
386 }
387
getSamplerDescriptor(Pointer<Byte> imageDescriptor,const ImageInstruction & instruction) const388 Pointer<Byte> SpirvEmitter::getSamplerDescriptor(Pointer<Byte> imageDescriptor, const ImageInstruction &instruction) const
389 {
390 return ((instruction.samplerId == instruction.imageId) || (instruction.samplerId == 0)) ? imageDescriptor : getImage(instruction.samplerId).getUniformPointer();
391 }
392
getSamplerDescriptor(Pointer<Byte> imageDescriptor,const ImageInstruction & instruction,int laneIdx) const393 Pointer<Byte> SpirvEmitter::getSamplerDescriptor(Pointer<Byte> imageDescriptor, const ImageInstruction &instruction, int laneIdx) const
394 {
395 return ((instruction.samplerId == instruction.imageId) || (instruction.samplerId == 0)) ? imageDescriptor : getImage(instruction.samplerId).getPointerForLane(laneIdx);
396 }
397
lookupSamplerFunction(Pointer<Byte> imageDescriptor,Pointer<Byte> samplerDescriptor,const ImageInstruction & instruction) const398 Pointer<Byte> SpirvEmitter::lookupSamplerFunction(Pointer<Byte> imageDescriptor, Pointer<Byte> samplerDescriptor, const ImageInstruction &instruction) const
399 {
400 Int samplerId = (instruction.samplerId != 0) ? *Pointer<rr::Int>(samplerDescriptor + OFFSET(vk::SampledImageDescriptor, samplerId)) : Int(0);
401
402 auto &cache = routine->samplerCache.at(instruction.position);
403 Bool cacheHit = (cache.imageDescriptor == imageDescriptor) && (cache.samplerId == samplerId); // TODO(b/205566405): Skip sampler ID check for samplerless instructions.
404
405 If(!cacheHit)
406 {
407 rr::Int imageViewId = *Pointer<rr::Int>(imageDescriptor + OFFSET(vk::ImageDescriptor, imageViewId));
408 cache.function = Call(getImageSampler, routine->device, instruction.signature, samplerId, imageViewId);
409 cache.imageDescriptor = imageDescriptor;
410 cache.samplerId = samplerId;
411 }
412
413 return cache.function;
414 }
415
callSamplerFunction(Pointer<Byte> samplerFunction,Array<SIMD::Float> & out,Pointer<Byte> imageDescriptor,const ImageInstruction & instruction) const416 void SpirvEmitter::callSamplerFunction(Pointer<Byte> samplerFunction, Array<SIMD::Float> &out, Pointer<Byte> imageDescriptor, const ImageInstruction &instruction) const
417 {
418 Array<SIMD::Float> in(16); // Maximum 16 input parameter components.
419
420 auto coordinate = Operand(shader, *this, instruction.coordinateId);
421
422 uint32_t i = 0;
423 for(; i < instruction.coordinates; i++)
424 {
425 if(instruction.isProj())
426 {
427 in[i] = coordinate.Float(i) / coordinate.Float(instruction.coordinates); // TODO(b/129523279): Optimize using reciprocal.
428 }
429 else
430 {
431 in[i] = coordinate.Float(i);
432 }
433 }
434
435 if(instruction.isDref())
436 {
437 auto drefValue = Operand(shader, *this, instruction.drefId);
438
439 if(instruction.isProj())
440 {
441 in[i] = drefValue.Float(0) / coordinate.Float(instruction.coordinates); // TODO(b/129523279): Optimize using reciprocal.
442 }
443 else
444 {
445 in[i] = drefValue.Float(0);
446 }
447
448 i++;
449 }
450
451 if(instruction.lodOrBiasId != 0)
452 {
453 auto lodValue = Operand(shader, *this, instruction.lodOrBiasId);
454 in[i] = lodValue.Float(0);
455 i++;
456 }
457 else if(instruction.gradDxId != 0)
458 {
459 auto dxValue = Operand(shader, *this, instruction.gradDxId);
460 auto dyValue = Operand(shader, *this, instruction.gradDyId);
461 ASSERT(dxValue.componentCount == dxValue.componentCount);
462
463 for(uint32_t j = 0; j < dxValue.componentCount; j++, i++)
464 {
465 in[i] = dxValue.Float(j);
466 }
467
468 for(uint32_t j = 0; j < dxValue.componentCount; j++, i++)
469 {
470 in[i] = dyValue.Float(j);
471 }
472 }
473 else if(instruction.samplerMethod == Fetch)
474 {
475 // The instruction didn't provide a lod operand, but the sampler's Fetch
476 // function requires one to be present. If no lod is supplied, the default
477 // is zero.
478 in[i] = As<SIMD::Float>(SIMD::Int(0));
479 i++;
480 }
481
482 if(instruction.offsetId != 0)
483 {
484 auto offsetValue = Operand(shader, *this, instruction.offsetId);
485
486 for(uint32_t j = 0; j < offsetValue.componentCount; j++, i++)
487 {
488 in[i] = As<SIMD::Float>(offsetValue.Int(j)); // Integer values, but transfered as float.
489 }
490 }
491
492 if(instruction.sample)
493 {
494 auto sampleValue = Operand(shader, *this, instruction.sampleId);
495 in[i] = As<SIMD::Float>(sampleValue.Int(0));
496 }
497
498 Pointer<Byte> texture = imageDescriptor + OFFSET(vk::SampledImageDescriptor, texture); // sw::Texture*
499
500 Call<ImageSampler>(samplerFunction, texture, &in, &out, routine->constants);
501 }
502
EmitImageQuerySizeLod(InsnIterator insn)503 void SpirvEmitter::EmitImageQuerySizeLod(InsnIterator insn)
504 {
505 auto &resultTy = shader.getType(insn.resultTypeId());
506 auto imageId = Object::ID(insn.word(3));
507 auto lodId = Object::ID(insn.word(4));
508
509 auto &dst = createIntermediate(insn.resultId(), resultTy.componentCount);
510 GetImageDimensions(resultTy, imageId, lodId, dst);
511 }
512
EmitImageQuerySize(InsnIterator insn)513 void SpirvEmitter::EmitImageQuerySize(InsnIterator insn)
514 {
515 auto &resultTy = shader.getType(insn.resultTypeId());
516 auto imageId = Object::ID(insn.word(3));
517 auto lodId = Object::ID(0);
518
519 auto &dst = createIntermediate(insn.resultId(), resultTy.componentCount);
520 GetImageDimensions(resultTy, imageId, lodId, dst);
521 }
522
GetImageDimensions(const Type & resultTy,Object::ID imageId,Object::ID lodId,Intermediate & dst) const523 void SpirvEmitter::GetImageDimensions(const Type &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const
524 {
525 auto &image = shader.getObject(imageId);
526 auto &imageType = shader.getType(image);
527
528 ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
529 bool isArrayed = imageType.definition.word(5) != 0;
530 uint32_t dimensions = resultTy.componentCount - (isArrayed ? 1 : 0);
531
532 const Spirv::DescriptorDecorations &d = shader.descriptorDecorations.at(imageId);
533 auto descriptorType = routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding);
534
535 Pointer<Byte> descriptor = getPointer(imageId).getUniformPointer();
536
537 Int width;
538 Int height;
539 Int depth;
540
541 switch(descriptorType)
542 {
543 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
544 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
545 width = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, width));
546 height = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, height));
547 depth = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, depth));
548 break;
549 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
550 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
551 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
552 width = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, width));
553 height = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, height));
554 depth = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, depth));
555 break;
556 default:
557 UNREACHABLE("Image descriptorType: %d", int(descriptorType));
558 }
559
560 if(lodId != 0)
561 {
562 auto lodVal = Operand(shader, *this, lodId);
563 ASSERT(lodVal.componentCount == 1);
564 auto lod = lodVal.Int(0);
565 auto one = SIMD::Int(1);
566
567 if(dimensions >= 1) dst.move(0, Max(SIMD::Int(width) >> lod, one));
568 if(dimensions >= 2) dst.move(1, Max(SIMD::Int(height) >> lod, one));
569 if(dimensions >= 3) dst.move(2, Max(SIMD::Int(depth) >> lod, one));
570 }
571 else
572 {
573
574 if(dimensions >= 1) dst.move(0, SIMD::Int(width));
575 if(dimensions >= 2) dst.move(1, SIMD::Int(height));
576 if(dimensions >= 3) dst.move(2, SIMD::Int(depth));
577 }
578
579 if(isArrayed)
580 {
581 dst.move(dimensions, SIMD::Int(depth));
582 }
583 }
584
EmitImageQueryLevels(InsnIterator insn)585 void SpirvEmitter::EmitImageQueryLevels(InsnIterator insn)
586 {
587 auto &resultTy = shader.getType(insn.resultTypeId());
588 ASSERT(resultTy.componentCount == 1);
589 auto imageId = Object::ID(insn.word(3));
590
591 const Spirv::DescriptorDecorations &d = shader.descriptorDecorations.at(imageId);
592 auto descriptorType = routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding);
593
594 Pointer<Byte> descriptor = getPointer(imageId).getUniformPointer();
595 Int mipLevels = 0;
596 switch(descriptorType)
597 {
598 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
599 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
600 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
601 mipLevels = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, mipLevels)); // uint32_t
602 break;
603 default:
604 UNREACHABLE("Image descriptorType: %d", int(descriptorType));
605 }
606
607 auto &dst = createIntermediate(insn.resultId(), 1);
608 dst.move(0, SIMD::Int(mipLevels));
609 }
610
EmitImageQuerySamples(InsnIterator insn)611 void SpirvEmitter::EmitImageQuerySamples(InsnIterator insn)
612 {
613 auto &resultTy = shader.getType(insn.resultTypeId());
614 ASSERT(resultTy.componentCount == 1);
615 auto imageId = Object::ID(insn.word(3));
616 auto imageTy = shader.getObjectType(imageId);
617 ASSERT(imageTy.definition.opcode() == spv::OpTypeImage);
618 ASSERT(imageTy.definition.word(3) == spv::Dim2D);
619 ASSERT(imageTy.definition.word(6 /* MS */) == 1);
620
621 const Spirv::DescriptorDecorations &d = shader.descriptorDecorations.at(imageId);
622 auto descriptorType = routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding);
623
624 Pointer<Byte> descriptor = getPointer(imageId).getUniformPointer();
625 Int sampleCount = 0;
626 switch(descriptorType)
627 {
628 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
629 sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, sampleCount)); // uint32_t
630 break;
631 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
632 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
633 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
634 sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, sampleCount)); // uint32_t
635 break;
636 default:
637 UNREACHABLE("Image descriptorType: %d", int(descriptorType));
638 }
639
640 auto &dst = createIntermediate(insn.resultId(), 1);
641 dst.move(0, SIMD::Int(sampleCount));
642 }
643
setupTexelAddressData(SIMD::Int rowPitch,SIMD::Int slicePitch,SIMD::Int samplePitch,ImageInstructionSignature instruction,SIMD::Int coordinate[],SIMD::Int sample,vk::Format imageFormat,const SpirvRoutine * routine)644 SpirvEmitter::TexelAddressData SpirvEmitter::setupTexelAddressData(SIMD::Int rowPitch, SIMD::Int slicePitch, SIMD::Int samplePitch, ImageInstructionSignature instruction, SIMD::Int coordinate[], SIMD::Int sample, vk::Format imageFormat, const SpirvRoutine *routine)
645 {
646 TexelAddressData data;
647
648 data.isArrayed = instruction.arrayed;
649 data.dim = static_cast<spv::Dim>(instruction.dim);
650 data.texelSize = imageFormat.bytes();
651 data.dims = instruction.coordinates - (data.isArrayed ? 1 : 0);
652
653 data.u = coordinate[0];
654 data.v = SIMD::Int(0);
655
656 if(data.dims > 1)
657 {
658 data.v = coordinate[1];
659 }
660
661 if(data.dim == spv::DimSubpassData)
662 {
663 data.u += routine->windowSpacePosition[0];
664 data.v += routine->windowSpacePosition[1];
665 }
666
667 data.ptrOffset = data.u * SIMD::Int(data.texelSize);
668
669 if(data.dims > 1)
670 {
671 data.ptrOffset += data.v * rowPitch;
672 }
673
674 data.w = 0;
675 if((data.dims > 2) || data.isArrayed)
676 {
677 if(data.dims > 2)
678 {
679 data.w += coordinate[2];
680 }
681
682 if(data.isArrayed)
683 {
684 data.w += coordinate[data.dims];
685 }
686
687 data.ptrOffset += data.w * slicePitch;
688 }
689
690 if(data.dim == spv::DimSubpassData)
691 {
692 // Multiview input attachment access is to the layer corresponding to the current view
693 data.ptrOffset += SIMD::Int(routine->layer) * slicePitch;
694 }
695
696 if(instruction.sample)
697 {
698 data.ptrOffset += sample * samplePitch;
699 }
700
701 return data;
702 }
703
GetNonUniformTexelAddress(ImageInstructionSignature instruction,SIMD::Pointer descriptor,SIMD::Int coordinate[],SIMD::Int sample,vk::Format imageFormat,OutOfBoundsBehavior outOfBoundsBehavior,SIMD::Int activeLaneMask,const SpirvRoutine * routine)704 SIMD::Pointer SpirvEmitter::GetNonUniformTexelAddress(ImageInstructionSignature instruction, SIMD::Pointer descriptor, SIMD::Int coordinate[], SIMD::Int sample, vk::Format imageFormat, OutOfBoundsBehavior outOfBoundsBehavior, SIMD::Int activeLaneMask, const SpirvRoutine *routine)
705 {
706 const bool useStencilAspect = (imageFormat == VK_FORMAT_S8_UINT);
707 auto rowPitch = (descriptor + (useStencilAspect
708 ? OFFSET(vk::StorageImageDescriptor, stencilRowPitchBytes)
709 : OFFSET(vk::StorageImageDescriptor, rowPitchBytes)))
710 .Load<SIMD::Int>(outOfBoundsBehavior, activeLaneMask);
711 auto slicePitch = (descriptor + (useStencilAspect
712 ? OFFSET(vk::StorageImageDescriptor, stencilSlicePitchBytes)
713 : OFFSET(vk::StorageImageDescriptor, slicePitchBytes)))
714 .Load<SIMD::Int>(outOfBoundsBehavior, activeLaneMask);
715 auto samplePitch = (descriptor + (useStencilAspect
716 ? OFFSET(vk::StorageImageDescriptor, stencilSamplePitchBytes)
717 : OFFSET(vk::StorageImageDescriptor, samplePitchBytes)))
718 .Load<SIMD::Int>(outOfBoundsBehavior, activeLaneMask);
719
720 auto texelData = setupTexelAddressData(rowPitch, slicePitch, samplePitch, instruction, coordinate, sample, imageFormat, routine);
721
722 // If the out-of-bounds behavior is set to nullify, then each coordinate must be tested individually.
723 // Other out-of-bounds behaviors work properly by just comparing the offset against the total size.
724 if(outOfBoundsBehavior == OutOfBoundsBehavior::Nullify)
725 {
726 SIMD::UInt width = (descriptor + OFFSET(vk::StorageImageDescriptor, width)).Load<SIMD::Int>(outOfBoundsBehavior, activeLaneMask);
727 SIMD::Int oobMask = As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(texelData.u), width));
728
729 if(texelData.dims > 1)
730 {
731 SIMD::UInt height = As<SIMD::UInt>((descriptor + OFFSET(vk::StorageImageDescriptor, height)).Load<SIMD::Int>(outOfBoundsBehavior, activeLaneMask));
732 oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(texelData.v), height));
733 }
734
735 if((texelData.dims > 2) || texelData.isArrayed)
736 {
737 SIMD::UInt depth = As<SIMD::UInt>((descriptor + OFFSET(vk::StorageImageDescriptor, depth)).Load<SIMD::Int>(outOfBoundsBehavior, activeLaneMask));
738 if(texelData.dim == spv::DimCube) { depth *= 6; }
739 oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(texelData.w), depth));
740 }
741
742 if(instruction.sample)
743 {
744 SIMD::UInt sampleCount = As<SIMD::UInt>((descriptor + OFFSET(vk::StorageImageDescriptor, sampleCount)).Load<SIMD::Int>(outOfBoundsBehavior, activeLaneMask));
745 oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(sample), sampleCount));
746 }
747
748 constexpr int32_t OOB_OFFSET = 0x7FFFFFFF - 16; // SIMD pointer offsets are signed 32-bit, so this is the largest offset (for 16-byte texels).
749 static_assert(OOB_OFFSET >= vk::MAX_MEMORY_ALLOCATION_SIZE, "the largest offset must be guaranteed to be out-of-bounds");
750
751 texelData.ptrOffset = (texelData.ptrOffset & ~oobMask) | (oobMask & SIMD::Int(OOB_OFFSET)); // oob ? OOB_OFFSET : ptrOffset // TODO: IfThenElse()
752 }
753
754 std::vector<Pointer<Byte>> imageBase(SIMD::Width);
755 for(int i = 0; i < SIMD::Width; i++)
756 {
757 imageBase[i] = *Pointer<Pointer<Byte>>(descriptor.getPointerForLane(i) + (useStencilAspect
758 ? OFFSET(vk::StorageImageDescriptor, stencilPtr)
759 : OFFSET(vk::StorageImageDescriptor, ptr)));
760 }
761
762 return SIMD::Pointer(imageBase) + texelData.ptrOffset;
763 }
764
GetTexelAddress(ImageInstructionSignature instruction,Pointer<Byte> descriptor,SIMD::Int coordinate[],SIMD::Int sample,vk::Format imageFormat,OutOfBoundsBehavior outOfBoundsBehavior,const SpirvRoutine * routine)765 SIMD::Pointer SpirvEmitter::GetTexelAddress(ImageInstructionSignature instruction, Pointer<Byte> descriptor, SIMD::Int coordinate[], SIMD::Int sample, vk::Format imageFormat, OutOfBoundsBehavior outOfBoundsBehavior, const SpirvRoutine *routine)
766 {
767 const bool useStencilAspect = (imageFormat == VK_FORMAT_S8_UINT);
768 auto rowPitch = SIMD::Int(*Pointer<Int>(descriptor + (useStencilAspect
769 ? OFFSET(vk::StorageImageDescriptor, stencilRowPitchBytes)
770 : OFFSET(vk::StorageImageDescriptor, rowPitchBytes))));
771 auto slicePitch = SIMD::Int(
772 *Pointer<Int>(descriptor + (useStencilAspect
773 ? OFFSET(vk::StorageImageDescriptor, stencilSlicePitchBytes)
774 : OFFSET(vk::StorageImageDescriptor, slicePitchBytes))));
775 auto samplePitch = SIMD::Int(
776 *Pointer<Int>(descriptor + (useStencilAspect
777 ? OFFSET(vk::StorageImageDescriptor, stencilSamplePitchBytes)
778 : OFFSET(vk::StorageImageDescriptor, samplePitchBytes))));
779
780 auto texelData = setupTexelAddressData(rowPitch, slicePitch, samplePitch, instruction, coordinate, sample, imageFormat, routine);
781
782 // If the out-of-bounds behavior is set to nullify, then each coordinate must be tested individually.
783 // Other out-of-bounds behaviors work properly by just comparing the offset against the total size.
784 if(outOfBoundsBehavior == OutOfBoundsBehavior::Nullify)
785 {
786 SIMD::UInt width = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, width));
787 SIMD::Int oobMask = As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(texelData.u), width));
788
789 if(texelData.dims > 1)
790 {
791 SIMD::UInt height = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, height));
792 oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(texelData.v), height));
793 }
794
795 if((texelData.dims > 2) || texelData.isArrayed)
796 {
797 UInt depth = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, depth));
798 if(texelData.dim == spv::DimCube) { depth *= 6; }
799 oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(texelData.w), SIMD::UInt(depth)));
800 }
801
802 if(instruction.sample)
803 {
804 SIMD::UInt sampleCount = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, sampleCount));
805 oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(sample), sampleCount));
806 }
807
808 constexpr int32_t OOB_OFFSET = 0x7FFFFFFF - 16; // SIMD pointer offsets are signed 32-bit, so this is the largest offset (for 16-byte texels).
809 static_assert(OOB_OFFSET >= vk::MAX_MEMORY_ALLOCATION_SIZE, "the largest offset must be guaranteed to be out-of-bounds");
810
811 texelData.ptrOffset = (texelData.ptrOffset & ~oobMask) | (oobMask & SIMD::Int(OOB_OFFSET)); // oob ? OOB_OFFSET : ptrOffset // TODO: IfThenElse()
812 }
813
814 Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(descriptor + (useStencilAspect
815 ? OFFSET(vk::StorageImageDescriptor, stencilPtr)
816 : OFFSET(vk::StorageImageDescriptor, ptr)));
817
818 Int imageSizeInBytes = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
819
820 return SIMD::Pointer(imageBase, imageSizeInBytes, texelData.ptrOffset);
821 }
822
EmitImageRead(const ImageInstruction & instruction)823 void SpirvEmitter::EmitImageRead(const ImageInstruction &instruction)
824 {
825 auto &resultType = shader.getObjectType(instruction.resultId);
826 auto &image = shader.getObject(instruction.imageId);
827 auto &imageType = shader.getType(image);
828
829 ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
830 auto dim = static_cast<spv::Dim>(instruction.dim);
831
832 auto coordinate = Operand(shader, *this, instruction.coordinateId);
833 const Spirv::DescriptorDecorations &d = shader.descriptorDecorations.at(instruction.imageId);
834
835 // For subpass data, format in the instruction is spv::ImageFormatUnknown. Get it from
836 // the renderpass data instead. In all other cases, we can use the format in the instruction.
837 ASSERT(dim != spv::DimSubpassData || attachments != nullptr);
838 vk::Format imageFormat = (dim == spv::DimSubpassData)
839 ? shader.getInputAttachmentFormat(*attachments, d.InputAttachmentIndex)
840 : SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(instruction.imageFormat));
841
842 // Depth+Stencil image attachments select aspect based on the Sampled Type of the
843 // OpTypeImage. If float, then we want the depth aspect. If int, we want the stencil aspect.
844 bool useStencilAspect = (imageFormat == VK_FORMAT_D32_SFLOAT_S8_UINT &&
845 shader.getType(imageType.definition.word(2)).opcode() == spv::OpTypeInt);
846
847 if(useStencilAspect)
848 {
849 imageFormat = VK_FORMAT_S8_UINT;
850 }
851
852 auto &dst = createIntermediate(instruction.resultId, resultType.componentCount);
853 SIMD::Pointer ptr = getPointer(instruction.imageId);
854
855 SIMD::Int uvwa[4];
856 SIMD::Int sample;
857 const int texelSize = imageFormat.bytes();
858 // VK_EXT_image_robustness requires replacing out-of-bounds access with zero.
859 // TODO(b/162327166): Only perform bounds checks when VK_EXT_image_robustness is enabled.
860 auto robustness = OutOfBoundsBehavior::Nullify;
861
862 for(uint32_t i = 0; i < instruction.coordinates; i++)
863 {
864 uvwa[i] = coordinate.Int(i);
865 }
866 if(instruction.sample)
867 {
868 sample = Operand(shader, *this, instruction.sampleId).Int(0);
869 }
870
871 // Gather packed texel data. Texels larger than 4 bytes occupy multiple SIMD::Int elements.
872 // TODO(b/160531165): Provide gather abstractions for various element sizes.
873 SIMD::Int packed[4];
874
875 SIMD::Pointer texelPtr = ptr.isBasePlusOffset
876 ? GetTexelAddress(instruction, ptr.getUniformPointer(), uvwa, sample, imageFormat, robustness, routine)
877 : GetNonUniformTexelAddress(instruction, ptr, uvwa, sample, imageFormat, robustness, activeLaneMask(), routine);
878 if(texelSize == 4 || texelSize == 8 || texelSize == 16)
879 {
880 for(auto i = 0; i < texelSize / 4; i++)
881 {
882 packed[i] = texelPtr.Load<SIMD::Int>(robustness, activeLaneMask());
883 texelPtr += sizeof(float);
884 }
885 }
886 else if(texelSize == 2)
887 {
888 SIMD::Int mask = activeLaneMask() & texelPtr.isInBounds(2, robustness);
889
890 for(int i = 0; i < SIMD::Width; i++)
891 {
892 If(Extract(mask, i) != 0)
893 {
894 packed[0] = Insert(packed[0], Int(*Pointer<Short>(texelPtr.getPointerForLane(i))), i);
895 }
896 }
897 }
898 else if(texelSize == 1)
899 {
900 SIMD::Int mask = activeLaneMask() & texelPtr.isInBounds(1, robustness);
901 for(int i = 0; i < SIMD::Width; i++)
902 {
903 If(Extract(mask, i) != 0)
904 {
905 packed[0] = Insert(packed[0], Int(*Pointer<Byte>(texelPtr.getPointerForLane(i))), i);
906 }
907 }
908 }
909 else
910 UNREACHABLE("texelSize: %d", int(texelSize));
911
912 // Format support requirements here come from two sources:
913 // - Minimum required set of formats for loads from storage images
914 // - Any format supported as a color or depth/stencil attachment, for input attachments
915 switch(imageFormat)
916 {
917 case VK_FORMAT_R32G32B32A32_SFLOAT:
918 case VK_FORMAT_R32G32B32A32_SINT:
919 case VK_FORMAT_R32G32B32A32_UINT:
920 dst.move(0, packed[0]);
921 dst.move(1, packed[1]);
922 dst.move(2, packed[2]);
923 dst.move(3, packed[3]);
924 break;
925 case VK_FORMAT_R32_SINT:
926 case VK_FORMAT_R32_UINT:
927 dst.move(0, packed[0]);
928 // Fill remaining channels with 0,0,1 (of the correct type)
929 dst.move(1, SIMD::Int(0));
930 dst.move(2, SIMD::Int(0));
931 dst.move(3, SIMD::Int(1));
932 break;
933 case VK_FORMAT_R32_SFLOAT:
934 case VK_FORMAT_D32_SFLOAT:
935 case VK_FORMAT_D32_SFLOAT_S8_UINT:
936 dst.move(0, packed[0]);
937 // Fill remaining channels with 0,0,1 (of the correct type)
938 dst.move(1, SIMD::Float(0.0f));
939 dst.move(2, SIMD::Float(0.0f));
940 dst.move(3, SIMD::Float(1.0f));
941 break;
942 case VK_FORMAT_D16_UNORM:
943 dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
944 dst.move(1, SIMD::Float(0.0f));
945 dst.move(2, SIMD::Float(0.0f));
946 dst.move(3, SIMD::Float(1.0f));
947 break;
948 case VK_FORMAT_R16G16B16A16_UNORM:
949 dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
950 dst.move(1, SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
951 dst.move(2, SIMD::Float(packed[1] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
952 dst.move(3, SIMD::Float((packed[1] >> 16) & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
953 break;
954 case VK_FORMAT_R16G16B16A16_SNORM:
955 dst.move(0, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
956 dst.move(1, Max(SIMD::Float(packed[0] & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
957 dst.move(2, Max(SIMD::Float((packed[1] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
958 dst.move(3, Max(SIMD::Float(packed[1] & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
959 break;
960 case VK_FORMAT_R16G16B16A16_SINT:
961 dst.move(0, (packed[0] << 16) >> 16);
962 dst.move(1, packed[0] >> 16);
963 dst.move(2, (packed[1] << 16) >> 16);
964 dst.move(3, packed[1] >> 16);
965 break;
966 case VK_FORMAT_R16G16B16A16_UINT:
967 dst.move(0, packed[0] & SIMD::Int(0xFFFF));
968 dst.move(1, (packed[0] >> 16) & SIMD::Int(0xFFFF));
969 dst.move(2, packed[1] & SIMD::Int(0xFFFF));
970 dst.move(3, (packed[1] >> 16) & SIMD::Int(0xFFFF));
971 break;
972 case VK_FORMAT_R16G16B16A16_SFLOAT:
973 dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
974 dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
975 dst.move(2, halfToFloatBits(As<SIMD::UInt>(packed[1]) & SIMD::UInt(0x0000FFFF)));
976 dst.move(3, halfToFloatBits((As<SIMD::UInt>(packed[1]) & SIMD::UInt(0xFFFF0000)) >> 16));
977 break;
978 case VK_FORMAT_R8G8B8A8_SNORM:
979 case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
980 dst.move(0, Max(SIMD::Float((packed[0] << 24) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
981 dst.move(1, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
982 dst.move(2, Max(SIMD::Float((packed[0] << 8) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
983 dst.move(3, Max(SIMD::Float((packed[0]) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
984 break;
985 case VK_FORMAT_R8G8B8A8_UNORM:
986 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
987 dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
988 dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
989 dst.move(2, SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
990 dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
991 break;
992 case VK_FORMAT_R8G8B8A8_SRGB:
993 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
994 dst.move(0, sRGBtoLinear(SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
995 dst.move(1, sRGBtoLinear(SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
996 dst.move(2, sRGBtoLinear(SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
997 dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
998 break;
999 case VK_FORMAT_B8G8R8A8_UNORM:
1000 dst.move(0, SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
1001 dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
1002 dst.move(2, SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
1003 dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
1004 break;
1005 case VK_FORMAT_B8G8R8A8_SRGB:
1006 dst.move(0, sRGBtoLinear(SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
1007 dst.move(1, sRGBtoLinear(SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
1008 dst.move(2, sRGBtoLinear(SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
1009 dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
1010 break;
1011 case VK_FORMAT_R8G8B8A8_UINT:
1012 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1013 dst.move(0, As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF));
1014 dst.move(1, (As<SIMD::UInt>(packed[0]) >> 8) & SIMD::UInt(0xFF));
1015 dst.move(2, (As<SIMD::UInt>(packed[0]) >> 16) & SIMD::UInt(0xFF));
1016 dst.move(3, (As<SIMD::UInt>(packed[0]) >> 24) & SIMD::UInt(0xFF));
1017 break;
1018 case VK_FORMAT_R8G8B8A8_SINT:
1019 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
1020 dst.move(0, (packed[0] << 24) >> 24);
1021 dst.move(1, (packed[0] << 16) >> 24);
1022 dst.move(2, (packed[0] << 8) >> 24);
1023 dst.move(3, packed[0] >> 24);
1024 break;
1025 case VK_FORMAT_R8_UNORM:
1026 dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 0xFF));
1027 dst.move(1, SIMD::Float(0.0f));
1028 dst.move(2, SIMD::Float(0.0f));
1029 dst.move(3, SIMD::Float(1.0f));
1030 break;
1031 case VK_FORMAT_R8_SNORM:
1032 dst.move(0, Max(SIMD::Float((packed[0] << 24) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
1033 dst.move(1, SIMD::Float(0.0f));
1034 dst.move(2, SIMD::Float(0.0f));
1035 dst.move(3, SIMD::Float(1.0f));
1036 break;
1037 case VK_FORMAT_R8_UINT:
1038 case VK_FORMAT_S8_UINT:
1039 dst.move(0, As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF));
1040 dst.move(1, SIMD::UInt(0));
1041 dst.move(2, SIMD::UInt(0));
1042 dst.move(3, SIMD::UInt(1));
1043 break;
1044 case VK_FORMAT_R8_SINT:
1045 dst.move(0, (packed[0] << 24) >> 24);
1046 dst.move(1, SIMD::Int(0));
1047 dst.move(2, SIMD::Int(0));
1048 dst.move(3, SIMD::Int(1));
1049 break;
1050 case VK_FORMAT_R8G8_UNORM:
1051 dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
1052 dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
1053 dst.move(2, SIMD::Float(0.0f));
1054 dst.move(3, SIMD::Float(1.0f));
1055 break;
1056 case VK_FORMAT_R8G8_SNORM:
1057 dst.move(0, Max(SIMD::Float((packed[0] << 24) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
1058 dst.move(1, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
1059 dst.move(2, SIMD::Float(0.0f));
1060 dst.move(3, SIMD::Float(1.0f));
1061 break;
1062 case VK_FORMAT_R8G8_UINT:
1063 dst.move(0, As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF));
1064 dst.move(1, (As<SIMD::UInt>(packed[0]) >> 8) & SIMD::UInt(0xFF));
1065 dst.move(2, SIMD::UInt(0));
1066 dst.move(3, SIMD::UInt(1));
1067 break;
1068 case VK_FORMAT_R8G8_SINT:
1069 dst.move(0, (packed[0] << 24) >> 24);
1070 dst.move(1, (packed[0] << 16) >> 24);
1071 dst.move(2, SIMD::Int(0));
1072 dst.move(3, SIMD::Int(1));
1073 break;
1074 case VK_FORMAT_R16_SFLOAT:
1075 dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
1076 dst.move(1, SIMD::Float(0.0f));
1077 dst.move(2, SIMD::Float(0.0f));
1078 dst.move(3, SIMD::Float(1.0f));
1079 break;
1080 case VK_FORMAT_R16_UNORM:
1081 dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
1082 dst.move(1, SIMD::Float(0.0f));
1083 dst.move(2, SIMD::Float(0.0f));
1084 dst.move(3, SIMD::Float(1.0f));
1085 break;
1086 case VK_FORMAT_R16_SNORM:
1087 dst.move(0, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
1088 dst.move(1, SIMD::Float(0.0f));
1089 dst.move(2, SIMD::Float(0.0f));
1090 dst.move(3, SIMD::Float(1.0f));
1091 break;
1092 case VK_FORMAT_R16_UINT:
1093 dst.move(0, packed[0] & SIMD::Int(0xFFFF));
1094 dst.move(1, SIMD::UInt(0));
1095 dst.move(2, SIMD::UInt(0));
1096 dst.move(3, SIMD::UInt(1));
1097 break;
1098 case VK_FORMAT_R16_SINT:
1099 dst.move(0, (packed[0] << 16) >> 16);
1100 dst.move(1, SIMD::Int(0));
1101 dst.move(2, SIMD::Int(0));
1102 dst.move(3, SIMD::Int(1));
1103 break;
1104 case VK_FORMAT_R16G16_SFLOAT:
1105 dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
1106 dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
1107 dst.move(2, SIMD::Float(0.0f));
1108 dst.move(3, SIMD::Float(1.0f));
1109 break;
1110 case VK_FORMAT_R16G16_UNORM:
1111 dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
1112 dst.move(1, SIMD::Float(As<SIMD::UInt>(packed[0]) >> 16) * SIMD::Float(1.0f / 0xFFFF));
1113 dst.move(2, SIMD::Float(0.0f));
1114 dst.move(3, SIMD::Float(1.0f));
1115 break;
1116 case VK_FORMAT_R16G16_SNORM:
1117 dst.move(0, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
1118 dst.move(1, Max(SIMD::Float(packed[0] & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
1119 dst.move(2, SIMD::Float(0.0f));
1120 dst.move(3, SIMD::Float(1.0f));
1121 break;
1122 case VK_FORMAT_R16G16_UINT:
1123 dst.move(0, packed[0] & SIMD::Int(0xFFFF));
1124 dst.move(1, (packed[0] >> 16) & SIMD::Int(0xFFFF));
1125 dst.move(2, SIMD::UInt(0));
1126 dst.move(3, SIMD::UInt(1));
1127 break;
1128 case VK_FORMAT_R16G16_SINT:
1129 dst.move(0, (packed[0] << 16) >> 16);
1130 dst.move(1, packed[0] >> 16);
1131 dst.move(2, SIMD::Int(0));
1132 dst.move(3, SIMD::Int(1));
1133 break;
1134 case VK_FORMAT_R32G32_SINT:
1135 case VK_FORMAT_R32G32_UINT:
1136 dst.move(0, packed[0]);
1137 dst.move(1, packed[1]);
1138 dst.move(2, SIMD::Int(0));
1139 dst.move(3, SIMD::Int(1));
1140 break;
1141 case VK_FORMAT_R32G32_SFLOAT:
1142 dst.move(0, packed[0]);
1143 dst.move(1, packed[1]);
1144 dst.move(2, SIMD::Float(0.0f));
1145 dst.move(3, SIMD::Float(1.0f));
1146 break;
1147 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
1148 dst.move(0, packed[0] & SIMD::Int(0x3FF));
1149 dst.move(1, (packed[0] >> 10) & SIMD::Int(0x3FF));
1150 dst.move(2, (packed[0] >> 20) & SIMD::Int(0x3FF));
1151 dst.move(3, (packed[0] >> 30) & SIMD::Int(0x3));
1152 break;
1153 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
1154 dst.move(2, packed[0] & SIMD::Int(0x3FF));
1155 dst.move(1, (packed[0] >> 10) & SIMD::Int(0x3FF));
1156 dst.move(0, (packed[0] >> 20) & SIMD::Int(0x3FF));
1157 dst.move(3, (packed[0] >> 30) & SIMD::Int(0x3));
1158 break;
1159 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
1160 dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1161 dst.move(1, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1162 dst.move(2, SIMD::Float((packed[0] >> 20) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1163 dst.move(3, SIMD::Float((packed[0] >> 30) & SIMD::Int(0x3)) * SIMD::Float(1.0f / 0x3));
1164 break;
1165 case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
1166 dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1167 dst.move(1, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1168 dst.move(0, SIMD::Float((packed[0] >> 20) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1169 dst.move(3, SIMD::Float((packed[0] >> 30) & SIMD::Int(0x3)) * SIMD::Float(1.0f / 0x3));
1170 break;
1171 case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
1172 dst.move(0, SIMD::Float((packed[0] >> 12) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1173 dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1174 dst.move(2, SIMD::Float((packed[0] >> 4) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1175 dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1176 break;
1177 case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
1178 dst.move(0, SIMD::Float((packed[0] >> 4) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1179 dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1180 dst.move(2, SIMD::Float((packed[0] >> 12) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1181 dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1182 break;
1183 case VK_FORMAT_A4R4G4B4_UNORM_PACK16:
1184 dst.move(0, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1185 dst.move(1, SIMD::Float((packed[0] >> 4) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1186 dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1187 dst.move(3, SIMD::Float((packed[0] >> 12) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1188 break;
1189 case VK_FORMAT_A4B4G4R4_UNORM_PACK16:
1190 dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1191 dst.move(1, SIMD::Float((packed[0] >> 4) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1192 dst.move(2, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1193 dst.move(3, SIMD::Float((packed[0] >> 12) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1194 break;
1195 case VK_FORMAT_R5G6B5_UNORM_PACK16:
1196 dst.move(0, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1197 dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x3F)) * SIMD::Float(1.0f / 0x3F));
1198 dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1199 dst.move(3, SIMD::Float(1.0f));
1200 break;
1201 case VK_FORMAT_B5G6R5_UNORM_PACK16:
1202 dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1203 dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x3F)) * SIMD::Float(1.0f / 0x3F));
1204 dst.move(2, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1205 dst.move(3, SIMD::Float(1.0f));
1206 break;
1207 case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
1208 dst.move(0, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1209 dst.move(1, SIMD::Float((packed[0] >> 6) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1210 dst.move(2, SIMD::Float((packed[0] >> 1) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1211 dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0x1)));
1212 break;
1213 case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
1214 dst.move(0, SIMD::Float((packed[0] >> 1) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1215 dst.move(1, SIMD::Float((packed[0] >> 6) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1216 dst.move(2, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1217 dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0x1)));
1218 break;
1219 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
1220 dst.move(0, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1221 dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1222 dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1223 dst.move(3, SIMD::Float((packed[0] >> 15) & SIMD::Int(0x1)));
1224 break;
1225 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
1226 dst.move(0, halfToFloatBits((packed[0] << 4) & SIMD::Int(0x7FF0)));
1227 dst.move(1, halfToFloatBits((packed[0] >> 7) & SIMD::Int(0x7FF0)));
1228 dst.move(2, halfToFloatBits((packed[0] >> 17) & SIMD::Int(0x7FE0)));
1229 dst.move(3, SIMD::Float(1.0f));
1230 break;
1231 default:
1232 UNSUPPORTED("VkFormat %d", int(imageFormat));
1233 break;
1234 }
1235 }
1236
EmitImageWrite(const ImageInstruction & instruction)1237 void SpirvEmitter::EmitImageWrite(const ImageInstruction &instruction)
1238 {
1239 auto &image = shader.getObject(instruction.imageId);
1240 auto &imageType = shader.getType(image);
1241
1242 ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
1243 ASSERT(static_cast<spv::Dim>(instruction.dim) != spv::DimSubpassData); // "Its Dim operand must not be SubpassData."
1244
1245 auto coordinate = Operand(shader, *this, instruction.coordinateId);
1246 auto texel = Operand(shader, *this, instruction.texelId);
1247
1248 Array<SIMD::Int> coord(5); // uvwa & sample
1249
1250 uint32_t i = 0;
1251 for(; i < instruction.coordinates; i++)
1252 {
1253 coord[i] = coordinate.Int(i);
1254 }
1255
1256 if(instruction.sample)
1257 {
1258 coord[i] = Operand(shader, *this, instruction.sampleId).Int(0);
1259 }
1260
1261 Array<SIMD::Int> texelAndMask(5);
1262 for(uint32_t i = 0; i < texel.componentCount; ++i)
1263 {
1264 texelAndMask[i] = texel.Int(i);
1265 }
1266 for(uint32_t i = texel.componentCount; i < 4; ++i)
1267 {
1268 texelAndMask[i] = SIMD::Int(0);
1269 }
1270 texelAndMask[4] = activeStoresAndAtomicsMask();
1271
1272 vk::Format imageFormat = SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(instruction.imageFormat));
1273
1274 SIMD::Pointer ptr = getPointer(instruction.imageId);
1275 if(ptr.isBasePlusOffset)
1276 {
1277 Pointer<Byte> imageDescriptor = ptr.getUniformPointer(); // vk::StorageImageDescriptor* or vk::SampledImageDescriptor*
1278 Pointer<Byte> samplerDescriptor = getSamplerDescriptor(imageDescriptor, instruction);
1279
1280 if(imageFormat == VK_FORMAT_UNDEFINED) // spv::ImageFormatUnknown
1281 {
1282 Pointer<Byte> samplerFunction = lookupSamplerFunction(imageDescriptor, samplerDescriptor, instruction);
1283
1284 Call<ImageSampler>(samplerFunction, imageDescriptor, &coord, &texelAndMask, routine->constants);
1285 }
1286 else
1287 {
1288 WriteImage(instruction, imageDescriptor, &coord, &texelAndMask, imageFormat);
1289 }
1290 }
1291 else
1292 {
1293 for(int j = 0; j < SIMD::Width; j++)
1294 {
1295 SIMD::Int singleLaneMask = 0;
1296 singleLaneMask = Insert(singleLaneMask, 0xffffffff, j);
1297 texelAndMask[4] = activeStoresAndAtomicsMask() & singleLaneMask;
1298 Pointer<Byte> imageDescriptor = ptr.getPointerForLane(j);
1299 Pointer<Byte> samplerDescriptor = getSamplerDescriptor(imageDescriptor, instruction, j);
1300
1301 if(imageFormat == VK_FORMAT_UNDEFINED) // spv::ImageFormatUnknown
1302 {
1303 Pointer<Byte> samplerFunction = lookupSamplerFunction(imageDescriptor, samplerDescriptor, instruction);
1304
1305 Call<ImageSampler>(samplerFunction, imageDescriptor, &coord, &texelAndMask, routine->constants);
1306 }
1307 else
1308 {
1309 WriteImage(instruction, imageDescriptor, &coord, &texelAndMask, imageFormat);
1310 }
1311 }
1312 }
1313 }
1314
WriteImage(ImageInstructionSignature instruction,Pointer<Byte> descriptor,const Pointer<SIMD::Int> & coord,const Pointer<SIMD::Int> & texelAndMask,vk::Format imageFormat)1315 void SpirvEmitter::WriteImage(ImageInstructionSignature instruction, Pointer<Byte> descriptor, const Pointer<SIMD::Int> &coord, const Pointer<SIMD::Int> &texelAndMask, vk::Format imageFormat)
1316 {
1317 SIMD::Int texel[4];
1318 texel[0] = texelAndMask[0];
1319 texel[1] = texelAndMask[1];
1320 texel[2] = texelAndMask[2];
1321 texel[3] = texelAndMask[3];
1322 SIMD::Int mask = texelAndMask[4];
1323
1324 SIMD::Int packed[4];
1325 switch(imageFormat)
1326 {
1327 case VK_FORMAT_R32G32B32A32_SFLOAT:
1328 case VK_FORMAT_R32G32B32A32_SINT:
1329 case VK_FORMAT_R32G32B32A32_UINT:
1330 packed[0] = texel[0];
1331 packed[1] = texel[1];
1332 packed[2] = texel[2];
1333 packed[3] = texel[3];
1334 break;
1335 case VK_FORMAT_R32_SFLOAT:
1336 case VK_FORMAT_R32_SINT:
1337 case VK_FORMAT_R32_UINT:
1338 packed[0] = texel[0];
1339 break;
1340 case VK_FORMAT_R8G8B8A8_UNORM:
1341 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
1342 packed[0] = (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
1343 ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
1344 ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
1345 ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24);
1346 break;
1347 case VK_FORMAT_B8G8R8A8_UNORM:
1348 packed[0] = (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
1349 ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
1350 ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
1351 ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24);
1352 break;
1353 case VK_FORMAT_B8G8R8A8_SRGB:
1354 packed[0] = (SIMD::UInt(Round(Min(Max(linearToSRGB(As<SIMD::Float>(texel[2])), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
1355 ((SIMD::UInt(Round(Min(Max(linearToSRGB(As<SIMD::Float>(texel[1])), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
1356 ((SIMD::UInt(Round(Min(Max(linearToSRGB(As<SIMD::Float>(texel[0])), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
1357 ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24);
1358 break;
1359 case VK_FORMAT_R8G8B8A8_SNORM:
1360 case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
1361 packed[0] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
1362 SIMD::Int(0xFF)) |
1363 ((SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
1364 SIMD::Int(0xFF))
1365 << 8) |
1366 ((SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
1367 SIMD::Int(0xFF))
1368 << 16) |
1369 ((SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
1370 SIMD::Int(0xFF))
1371 << 24);
1372 break;
1373 case VK_FORMAT_R8G8B8A8_SINT:
1374 case VK_FORMAT_R8G8B8A8_UINT:
1375 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
1376 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1377 packed[0] = (SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xff))) |
1378 (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0xff)) << 8) |
1379 (SIMD::UInt(As<SIMD::UInt>(texel[2]) & SIMD::UInt(0xff)) << 16) |
1380 (SIMD::UInt(As<SIMD::UInt>(texel[3]) & SIMD::UInt(0xff)) << 24);
1381 break;
1382 case VK_FORMAT_R16G16B16A16_SFLOAT:
1383 packed[0] = floatToHalfBits(As<SIMD::UInt>(texel[0]), false) | floatToHalfBits(As<SIMD::UInt>(texel[1]), true);
1384 packed[1] = floatToHalfBits(As<SIMD::UInt>(texel[2]), false) | floatToHalfBits(As<SIMD::UInt>(texel[3]), true);
1385 break;
1386 case VK_FORMAT_R16G16B16A16_SINT:
1387 case VK_FORMAT_R16G16B16A16_UINT:
1388 packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFFFF)) | (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0xFFFF)) << 16);
1389 packed[1] = SIMD::UInt(As<SIMD::UInt>(texel[2]) & SIMD::UInt(0xFFFF)) | (SIMD::UInt(As<SIMD::UInt>(texel[3]) & SIMD::UInt(0xFFFF)) << 16);
1390 break;
1391 case VK_FORMAT_R32G32_SFLOAT:
1392 case VK_FORMAT_R32G32_SINT:
1393 case VK_FORMAT_R32G32_UINT:
1394 packed[0] = texel[0];
1395 packed[1] = texel[1];
1396 break;
1397 case VK_FORMAT_R16G16_SFLOAT:
1398 packed[0] = floatToHalfBits(As<SIMD::UInt>(texel[0]), false) | floatToHalfBits(As<SIMD::UInt>(texel[1]), true);
1399 break;
1400 case VK_FORMAT_R16G16_SINT:
1401 case VK_FORMAT_R16G16_UINT:
1402 packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFFFF)) | (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0xFFFF)) << 16);
1403 break;
1404 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
1405 // Truncates instead of rounding. See b/147900455
1406 packed[0] = ((floatToHalfBits(As<SIMD::UInt>(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f))), false) & SIMD::UInt(0x7FF0)) >> 4) |
1407 ((floatToHalfBits(As<SIMD::UInt>(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f))), false) & SIMD::UInt(0x7FF0)) << 7) |
1408 ((floatToHalfBits(As<SIMD::UInt>(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f))), false) & SIMD::UInt(0x7FE0)) << 17);
1409 break;
1410 case VK_FORMAT_R16_SFLOAT:
1411 packed[0] = floatToHalfBits(As<SIMD::UInt>(texel[0]), false);
1412 break;
1413 case VK_FORMAT_R16G16B16A16_UNORM:
1414 packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) |
1415 (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) << 16);
1416 packed[1] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) |
1417 (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) << 16);
1418 break;
1419 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
1420 packed[0] = (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3FF)))) |
1421 ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3FF)))) << 10) |
1422 ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3FF)))) << 20) |
1423 ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3)))) << 30);
1424 break;
1425 case VK_FORMAT_R16G16_UNORM:
1426 packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) |
1427 (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) << 16);
1428 break;
1429 case VK_FORMAT_R8G8_UNORM:
1430 packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFF))) |
1431 (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFF))) << 8);
1432 break;
1433 case VK_FORMAT_R16_UNORM:
1434 packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF)));
1435 break;
1436 case VK_FORMAT_R8_UNORM:
1437 packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFF)));
1438 break;
1439 case VK_FORMAT_R16G16B16A16_SNORM:
1440 packed[0] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) & SIMD::Int(0xFFFF)) |
1441 (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) << 16);
1442 packed[1] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) & SIMD::Int(0xFFFF)) |
1443 (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) << 16);
1444 break;
1445 case VK_FORMAT_R16G16_SNORM:
1446 packed[0] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) & SIMD::Int(0xFFFF)) |
1447 (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) << 16);
1448 break;
1449 case VK_FORMAT_R8G8_SNORM:
1450 packed[0] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7F))) & SIMD::Int(0xFF)) |
1451 (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7F))) << 8);
1452 break;
1453 case VK_FORMAT_R16_SNORM:
1454 packed[0] = SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF)));
1455 break;
1456 case VK_FORMAT_R8_SNORM:
1457 packed[0] = SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7F)));
1458 break;
1459 case VK_FORMAT_R8G8_SINT:
1460 case VK_FORMAT_R8G8_UINT:
1461 packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFF)) | (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0xFF)) << 8);
1462 break;
1463 case VK_FORMAT_R16_SINT:
1464 case VK_FORMAT_R16_UINT:
1465 packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFFFF));
1466 break;
1467 case VK_FORMAT_R8_SINT:
1468 case VK_FORMAT_R8_UINT:
1469 packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFF));
1470 break;
1471 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
1472 packed[0] = (SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0x3FF))) |
1473 (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0x3FF)) << 10) |
1474 (SIMD::UInt(As<SIMD::UInt>(texel[2]) & SIMD::UInt(0x3FF)) << 20) |
1475 (SIMD::UInt(As<SIMD::UInt>(texel[3]) & SIMD::UInt(0x3)) << 30);
1476 break;
1477 default:
1478 UNSUPPORTED("VkFormat %d", int(imageFormat));
1479 break;
1480 }
1481
1482 // "The integer texel coordinates are validated according to the same rules as for texel input coordinate
1483 // validation. If the texel fails integer texel coordinate validation, then the write has no effect."
1484 // - https://www.khronos.org/registry/vulkan/specs/1.2/html/chap16.html#textures-output-coordinate-validation
1485 auto robustness = OutOfBoundsBehavior::Nullify;
1486 // GetTexelAddress() only needs the SpirvRoutine* for SubpassData accesses (i.e. input attachments).
1487 const SpirvRoutine *routine = nullptr;
1488
1489 SIMD::Int uvwa[4];
1490 SIMD::Int sample;
1491
1492 uint32_t i = 0;
1493 for(; i < instruction.coordinates; i++)
1494 {
1495 uvwa[i] = As<SIMD::Int>(coord[i]);
1496 }
1497
1498 if(instruction.sample)
1499 {
1500 sample = As<SIMD::Int>(coord[i]);
1501 }
1502
1503 auto texelPtr = GetTexelAddress(instruction, descriptor, uvwa, sample, imageFormat, robustness, routine);
1504
1505 const int texelSize = imageFormat.bytes();
1506
1507 // Scatter packed texel data.
1508 // TODO(b/160531165): Provide scatter abstractions for various element sizes.
1509 if(texelSize == 4 || texelSize == 8 || texelSize == 16)
1510 {
1511 for(auto i = 0; i < texelSize / 4; i++)
1512 {
1513 texelPtr.Store(packed[i], robustness, mask);
1514 texelPtr += sizeof(float);
1515 }
1516 }
1517 else if(texelSize == 2)
1518 {
1519 mask = mask & texelPtr.isInBounds(2, robustness);
1520
1521 for(int i = 0; i < SIMD::Width; i++)
1522 {
1523 If(Extract(mask, i) != 0)
1524 {
1525 *Pointer<Short>(texelPtr.getPointerForLane(i)) = Short(Extract(packed[0], i));
1526 }
1527 }
1528 }
1529 else if(texelSize == 1)
1530 {
1531 mask = mask & texelPtr.isInBounds(1, robustness);
1532
1533 for(int i = 0; i < SIMD::Width; i++)
1534 {
1535 If(Extract(mask, i) != 0)
1536 {
1537 *Pointer<Byte>(texelPtr.getPointerForLane(i)) = Byte(Extract(packed[0], i));
1538 }
1539 }
1540 }
1541 else
1542 UNREACHABLE("texelSize: %d", int(texelSize));
1543 }
1544
EmitImageTexelPointer(const ImageInstruction & instruction)1545 void SpirvEmitter::EmitImageTexelPointer(const ImageInstruction &instruction)
1546 {
1547 auto coordinate = Operand(shader, *this, instruction.coordinateId);
1548
1549 SIMD::Pointer ptr = getPointer(instruction.imageId);
1550
1551 // VK_EXT_image_robustness requires checking for out-of-bounds accesses.
1552 // TODO(b/162327166): Only perform bounds checks when VK_EXT_image_robustness is enabled.
1553 auto robustness = OutOfBoundsBehavior::Nullify;
1554 vk::Format imageFormat = SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(instruction.imageFormat));
1555
1556 SIMD::Int uvwa[4];
1557
1558 for(uint32_t i = 0; i < instruction.coordinates; i++)
1559 {
1560 uvwa[i] = coordinate.Int(i);
1561 }
1562
1563 SIMD::Int sample = Operand(shader, *this, instruction.sampleId).Int(0);
1564
1565 auto texelPtr = ptr.isBasePlusOffset
1566 ? GetTexelAddress(instruction, ptr.getUniformPointer(), uvwa, sample, imageFormat, robustness, routine)
1567 : GetNonUniformTexelAddress(instruction, ptr, uvwa, sample, imageFormat, robustness, activeLaneMask(), routine);
1568
1569 createPointer(instruction.resultId, texelPtr);
1570 }
1571
EmitSampledImage(InsnIterator insn)1572 void SpirvEmitter::EmitSampledImage(InsnIterator insn)
1573 {
1574 Object::ID resultId = insn.word(2);
1575 Object::ID imageId = insn.word(3);
1576 Object::ID samplerId = insn.word(4);
1577
1578 // Create a sampled image, containing both a sampler and an image
1579 createSampledImage(resultId, { getPointer(imageId), samplerId });
1580 }
1581
EmitImage(InsnIterator insn)1582 void SpirvEmitter::EmitImage(InsnIterator insn)
1583 {
1584 Object::ID resultId = insn.word(2);
1585 Object::ID imageId = insn.word(3);
1586
1587 // Extract the image from a sampled image.
1588 createPointer(resultId, getImage(imageId));
1589 }
1590
1591 } // namespace sw
1592