xref: /aosp_15_r20/external/swiftshader/src/Pipeline/SpirvShaderImage.cpp (revision 03ce13f70fcc45d86ee91b7ee4cab1936a95046e)
1 // Copyright 2019 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "SpirvShader.hpp"
16 
17 #include "System/Types.hpp"
18 
19 #include "Vulkan/VkDescriptorSetLayout.hpp"
20 #include "Vulkan/VkPipelineLayout.hpp"
21 
22 #include <spirv/unified1/spirv.hpp>
23 
24 namespace sw {
25 
SpirvFormatToVulkanFormat(spv::ImageFormat format)26 static vk::Format SpirvFormatToVulkanFormat(spv::ImageFormat format)
27 {
28 	switch(format)
29 	{
30 	case spv::ImageFormatUnknown: return VK_FORMAT_UNDEFINED;
31 	case spv::ImageFormatRgba32f: return VK_FORMAT_R32G32B32A32_SFLOAT;
32 	case spv::ImageFormatRgba16f: return VK_FORMAT_R16G16B16A16_SFLOAT;
33 	case spv::ImageFormatR32f: return VK_FORMAT_R32_SFLOAT;
34 	case spv::ImageFormatRgba8: return VK_FORMAT_R8G8B8A8_UNORM;
35 	case spv::ImageFormatRgba8Snorm: return VK_FORMAT_R8G8B8A8_SNORM;
36 	case spv::ImageFormatRg32f: return VK_FORMAT_R32G32_SFLOAT;
37 	case spv::ImageFormatRg16f: return VK_FORMAT_R16G16_SFLOAT;
38 	case spv::ImageFormatR11fG11fB10f: return VK_FORMAT_B10G11R11_UFLOAT_PACK32;
39 	case spv::ImageFormatR16f: return VK_FORMAT_R16_SFLOAT;
40 	case spv::ImageFormatRgba16: return VK_FORMAT_R16G16B16A16_UNORM;
41 	case spv::ImageFormatRgb10A2: return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
42 	case spv::ImageFormatRg16: return VK_FORMAT_R16G16_UNORM;
43 	case spv::ImageFormatRg8: return VK_FORMAT_R8G8_UNORM;
44 	case spv::ImageFormatR16: return VK_FORMAT_R16_UNORM;
45 	case spv::ImageFormatR8: return VK_FORMAT_R8_UNORM;
46 	case spv::ImageFormatRgba16Snorm: return VK_FORMAT_R16G16B16A16_SNORM;
47 	case spv::ImageFormatRg16Snorm: return VK_FORMAT_R16G16_SNORM;
48 	case spv::ImageFormatRg8Snorm: return VK_FORMAT_R8G8_SNORM;
49 	case spv::ImageFormatR16Snorm: return VK_FORMAT_R16_SNORM;
50 	case spv::ImageFormatR8Snorm: return VK_FORMAT_R8_SNORM;
51 	case spv::ImageFormatRgba32i: return VK_FORMAT_R32G32B32A32_SINT;
52 	case spv::ImageFormatRgba16i: return VK_FORMAT_R16G16B16A16_SINT;
53 	case spv::ImageFormatRgba8i: return VK_FORMAT_R8G8B8A8_SINT;
54 	case spv::ImageFormatR32i: return VK_FORMAT_R32_SINT;
55 	case spv::ImageFormatRg32i: return VK_FORMAT_R32G32_SINT;
56 	case spv::ImageFormatRg16i: return VK_FORMAT_R16G16_SINT;
57 	case spv::ImageFormatRg8i: return VK_FORMAT_R8G8_SINT;
58 	case spv::ImageFormatR16i: return VK_FORMAT_R16_SINT;
59 	case spv::ImageFormatR8i: return VK_FORMAT_R8_SINT;
60 	case spv::ImageFormatRgba32ui: return VK_FORMAT_R32G32B32A32_UINT;
61 	case spv::ImageFormatRgba16ui: return VK_FORMAT_R16G16B16A16_UINT;
62 	case spv::ImageFormatRgba8ui: return VK_FORMAT_R8G8B8A8_UINT;
63 	case spv::ImageFormatR32ui: return VK_FORMAT_R32_UINT;
64 	case spv::ImageFormatRgb10a2ui: return VK_FORMAT_A2B10G10R10_UINT_PACK32;
65 	case spv::ImageFormatRg32ui: return VK_FORMAT_R32G32_UINT;
66 	case spv::ImageFormatRg16ui: return VK_FORMAT_R16G16_UINT;
67 	case spv::ImageFormatRg8ui: return VK_FORMAT_R8G8_UINT;
68 	case spv::ImageFormatR16ui: return VK_FORMAT_R16_UINT;
69 	case spv::ImageFormatR8ui: return VK_FORMAT_R8_UINT;
70 
71 	default:
72 		UNSUPPORTED("SPIR-V ImageFormat %u", format);
73 		return VK_FORMAT_UNDEFINED;
74 	}
75 }
76 
ImageInstruction(InsnIterator insn,const Spirv & shader,const SpirvEmitter & state)77 SpirvEmitter::ImageInstruction::ImageInstruction(InsnIterator insn, const Spirv &shader, const SpirvEmitter &state)
78     : ImageInstructionSignature(parseVariantAndMethod(insn))
79     , position(insn.distanceFrom(shader.begin()))
80 {
81 	if(samplerMethod == Write)
82 	{
83 		imageId = insn.word(1);
84 		coordinateId = insn.word(2);
85 		texelId = insn.word(3);
86 	}
87 	else
88 	{
89 		resultTypeId = insn.resultTypeId();  // word(1)
90 		resultId = insn.resultId();          // word(2)
91 
92 		if(samplerMethod == Fetch || samplerMethod == Read || samplerMethod == TexelPointer)  // Samplerless
93 		{
94 			imageId = insn.word(3);
95 		}
96 		else
97 		{
98 			// sampledImageId is either the result of an OpSampledImage instruction or
99 			// an externally combined sampler and image.
100 			Object::ID sampledImageId = insn.word(3);
101 
102 			if(state.isSampledImage(sampledImageId))  // Result of an OpSampledImage instruction
103 			{
104 				const SampledImagePointer &sampledImage = state.getSampledImage(sampledImageId);
105 				imageId = shader.getObject(sampledImageId).definition.word(3);
106 				samplerId = sampledImage.samplerId;
107 			}
108 			else  // Combined image/sampler
109 			{
110 				imageId = sampledImageId;
111 				samplerId = sampledImageId;
112 			}
113 		}
114 
115 		coordinateId = insn.word(4);
116 	}
117 
118 	// `imageId` can represent either a Sampled Image, a samplerless Image, or a pointer to an Image.
119 	// To get to the OpTypeImage operands, traverse the OpTypeSampledImage or OpTypePointer.
120 	const Type &imageObjectType = shader.getObjectType(imageId);
121 	const Type &imageReferenceType = (imageObjectType.opcode() == spv::OpTypeSampledImage)
122 	                                     ? shader.getType(imageObjectType.definition.word(2))
123 	                                     : imageObjectType;
124 	const Type &imageType = ((imageReferenceType.opcode() == spv::OpTypePointer)
125 	                             ? shader.getType(imageReferenceType.element)
126 	                             : imageReferenceType);
127 
128 	ASSERT(imageType.opcode() == spv::OpTypeImage);
129 	dim = imageType.definition.word(3);
130 	arrayed = imageType.definition.word(5);
131 	imageFormat = imageType.definition.word(8);
132 
133 	const Object &coordinateObject = shader.getObject(coordinateId);
134 	const Type &coordinateType = shader.getType(coordinateObject);
135 	coordinates = coordinateType.componentCount - (isProj() ? 1 : 0);
136 
137 	if(samplerMethod == TexelPointer)
138 	{
139 		sampleId = insn.word(5);
140 		sample = !shader.getObject(sampleId).isConstantZero();
141 	}
142 
143 	if(isDref())
144 	{
145 		drefId = insn.word(5);
146 	}
147 
148 	if(samplerMethod == Gather)
149 	{
150 		gatherComponent = !isDref() ? shader.getObject(insn.word(5)).constantValue[0] : 0;
151 	}
152 
153 	uint32_t operandsIndex = getImageOperandsIndex(insn);
154 	uint32_t imageOperands = (operandsIndex != 0) ? insn.word(operandsIndex) : 0;  // The mask which indicates which operands are provided.
155 
156 	operandsIndex += 1;  // Advance to the first actual operand <id> location.
157 
158 	if(imageOperands & spv::ImageOperandsBiasMask)
159 	{
160 		ASSERT(samplerMethod == Bias);
161 		lodOrBiasId = insn.word(operandsIndex);
162 		operandsIndex += 1;
163 		imageOperands &= ~spv::ImageOperandsBiasMask;
164 	}
165 
166 	if(imageOperands & spv::ImageOperandsLodMask)
167 	{
168 		ASSERT(samplerMethod == Lod || samplerMethod == Fetch);
169 		lodOrBiasId = insn.word(operandsIndex);
170 		operandsIndex += 1;
171 		imageOperands &= ~spv::ImageOperandsLodMask;
172 	}
173 
174 	if(imageOperands & spv::ImageOperandsGradMask)
175 	{
176 		ASSERT(samplerMethod == Grad);
177 		gradDxId = insn.word(operandsIndex + 0);
178 		gradDyId = insn.word(operandsIndex + 1);
179 		operandsIndex += 2;
180 		imageOperands &= ~spv::ImageOperandsGradMask;
181 
182 		grad = shader.getObjectType(gradDxId).componentCount;
183 	}
184 
185 	if(imageOperands & spv::ImageOperandsConstOffsetMask)
186 	{
187 		offsetId = insn.word(operandsIndex);
188 		operandsIndex += 1;
189 		imageOperands &= ~spv::ImageOperandsConstOffsetMask;
190 
191 		offset = shader.getObjectType(offsetId).componentCount;
192 	}
193 
194 	if(imageOperands & spv::ImageOperandsSampleMask)
195 	{
196 		ASSERT(samplerMethod == Fetch || samplerMethod == Read || samplerMethod == Write);
197 		sampleId = insn.word(operandsIndex);
198 		operandsIndex += 1;
199 		imageOperands &= ~spv::ImageOperandsSampleMask;
200 
201 		sample = !shader.getObject(sampleId).isConstantZero();
202 	}
203 
204 	// TODO(b/174475384)
205 	if(imageOperands & spv::ImageOperandsZeroExtendMask)
206 	{
207 		ASSERT(samplerMethod == Read || samplerMethod == Write);
208 		imageOperands &= ~spv::ImageOperandsZeroExtendMask;
209 	}
210 	else if(imageOperands & spv::ImageOperandsSignExtendMask)
211 	{
212 		ASSERT(samplerMethod == Read || samplerMethod == Write);
213 		imageOperands &= ~spv::ImageOperandsSignExtendMask;
214 	}
215 
216 	[[maybe_unused]] spv::Scope scope = spv::ScopeCrossDevice;  // "Whilst the CrossDevice scope is defined in SPIR-V, it is disallowed in Vulkan."
217 
218 	if(imageOperands & spv::ImageOperandsMakeTexelAvailableMask)
219 	{
220 		scope = static_cast<spv::Scope>(insn.word(operandsIndex));
221 		operandsIndex += 1;
222 		imageOperands &= ~spv::ImageOperandsMakeTexelAvailableMask;
223 	}
224 
225 	if(imageOperands & spv::ImageOperandsMakeTexelVisibleMask)
226 	{
227 		scope = static_cast<spv::Scope>(insn.word(operandsIndex));
228 		operandsIndex += 1;
229 		imageOperands &= ~spv::ImageOperandsMakeTexelVisibleMask;
230 	}
231 
232 	if(imageOperands & spv::ImageOperandsNonPrivateTexelMask)
233 	{
234 		imageOperands &= ~spv::ImageOperandsNonPrivateTexelMask;
235 	}
236 
237 	if(imageOperands & spv::ImageOperandsVolatileTexelMask)
238 	{
239 		UNIMPLEMENTED("b/176819536");
240 		imageOperands &= ~spv::ImageOperandsVolatileTexelMask;
241 	}
242 
243 	if(imageOperands & spv::ImageOperandsNontemporalMask)
244 	{
245 		// Hints that the accessed texels are not likely
246 		// to be accessed again in the near future.
247 		imageOperands &= ~spv::ImageOperandsNontemporalMask;
248 	}
249 
250 	// There should be no remaining image operands.
251 	if(imageOperands != 0)
252 	{
253 		UNSUPPORTED("Image operands 0x%08X", imageOperands);
254 	}
255 }
256 
parseVariantAndMethod(InsnIterator insn)257 SpirvEmitter::ImageInstructionSignature SpirvEmitter::ImageInstruction::parseVariantAndMethod(InsnIterator insn)
258 {
259 	uint32_t imageOperands = getImageOperandsMask(insn);
260 	bool bias = imageOperands & spv::ImageOperandsBiasMask;
261 	bool grad = imageOperands & spv::ImageOperandsGradMask;
262 
263 	switch(insn.opcode())
264 	{
265 	case spv::OpImageSampleImplicitLod: return { None, bias ? Bias : Implicit };
266 	case spv::OpImageSampleExplicitLod: return { None, grad ? Grad : Lod };
267 	case spv::OpImageSampleDrefImplicitLod: return { Dref, bias ? Bias : Implicit };
268 	case spv::OpImageSampleDrefExplicitLod: return { Dref, grad ? Grad : Lod };
269 	case spv::OpImageSampleProjImplicitLod: return { Proj, bias ? Bias : Implicit };
270 	case spv::OpImageSampleProjExplicitLod: return { Proj, grad ? Grad : Lod };
271 	case spv::OpImageSampleProjDrefImplicitLod: return { ProjDref, bias ? Bias : Implicit };
272 	case spv::OpImageSampleProjDrefExplicitLod: return { ProjDref, grad ? Grad : Lod };
273 	case spv::OpImageGather: return { None, Gather };
274 	case spv::OpImageDrefGather: return { Dref, Gather };
275 	case spv::OpImageFetch: return { None, Fetch };
276 	case spv::OpImageQueryLod: return { None, Query };
277 	case spv::OpImageRead: return { None, Read };
278 	case spv::OpImageWrite: return { None, Write };
279 	case spv::OpImageTexelPointer: return { None, TexelPointer };
280 
281 	default:
282 		ASSERT(false);
283 		return { None, Implicit };
284 	}
285 }
286 
287 // Returns the instruction word index at which the Image Operands mask is located, or 0 if not present.
getImageOperandsIndex(InsnIterator insn)288 uint32_t SpirvEmitter::ImageInstruction::getImageOperandsIndex(InsnIterator insn)
289 {
290 	switch(insn.opcode())
291 	{
292 	case spv::OpImageSampleImplicitLod:
293 	case spv::OpImageSampleProjImplicitLod:
294 		return insn.wordCount() > 5 ? 5 : 0;  // Optional
295 	case spv::OpImageSampleExplicitLod:
296 	case spv::OpImageSampleProjExplicitLod:
297 		return 5;  // "Either Lod or Grad image operands must be present."
298 	case spv::OpImageSampleDrefImplicitLod:
299 	case spv::OpImageSampleProjDrefImplicitLod:
300 		return insn.wordCount() > 6 ? 6 : 0;  // Optional
301 	case spv::OpImageSampleDrefExplicitLod:
302 	case spv::OpImageSampleProjDrefExplicitLod:
303 		return 6;  // "Either Lod or Grad image operands must be present."
304 	case spv::OpImageGather:
305 	case spv::OpImageDrefGather:
306 		return insn.wordCount() > 6 ? 6 : 0;  // Optional
307 	case spv::OpImageFetch:
308 		return insn.wordCount() > 5 ? 5 : 0;  // Optional
309 	case spv::OpImageQueryLod:
310 		ASSERT(insn.wordCount() == 5);
311 		return 0;  // No image operands.
312 	case spv::OpImageRead:
313 		return insn.wordCount() > 5 ? 5 : 0;  // Optional
314 	case spv::OpImageWrite:
315 		return insn.wordCount() > 4 ? 4 : 0;  // Optional
316 	case spv::OpImageTexelPointer:
317 		ASSERT(insn.wordCount() == 6);
318 		return 0;  // No image operands.
319 
320 	default:
321 		ASSERT(false);
322 		return 0;
323 	}
324 }
325 
getImageOperandsMask(InsnIterator insn)326 uint32_t SpirvEmitter::ImageInstruction::getImageOperandsMask(InsnIterator insn)
327 {
328 	uint32_t operandsIndex = getImageOperandsIndex(insn);
329 	return (operandsIndex != 0) ? insn.word(operandsIndex) : 0;
330 }
331 
EmitImageSample(const ImageInstruction & instruction)332 void SpirvEmitter::EmitImageSample(const ImageInstruction &instruction)
333 {
334 	auto &resultType = shader.getType(instruction.resultTypeId);
335 	auto &result = createIntermediate(instruction.resultId, resultType.componentCount);
336 	Array<SIMD::Float> out(4);
337 
338 	// TODO(b/153380916): When we're in a code path that is always executed,
339 	// i.e. post-dominators of the entry block, we don't have to dynamically
340 	// check whether any lanes are active, and can elide the jump.
341 	If(AnyTrue(activeLaneMask()))
342 	{
343 		EmitImageSampleUnconditional(out, instruction);
344 	}
345 
346 	for(auto i = 0u; i < resultType.componentCount; i++) { result.move(i, out[i]); }
347 }
348 
EmitImageSampleUnconditional(Array<SIMD::Float> & out,const ImageInstruction & instruction) const349 void SpirvEmitter::EmitImageSampleUnconditional(Array<SIMD::Float> &out, const ImageInstruction &instruction) const
350 {
351 	auto decorations = shader.GetDecorationsForId(instruction.imageId);
352 
353 	if(decorations.NonUniform)
354 	{
355 		SIMD::Int activeLaneMask = this->activeLaneMask();
356 		SIMD::Pointer imagePointer = getImage(instruction.imageId);
357 		// PerLane output
358 		for(int laneIdx = 0; laneIdx < SIMD::Width; laneIdx++)
359 		{
360 			Array<SIMD::Float> laneOut(out.getArraySize());
361 			If(Extract(activeLaneMask, laneIdx) != 0)
362 			{
363 				Pointer<Byte> imageDescriptor = imagePointer.getPointerForLane(laneIdx);  // vk::SampledImageDescriptor*
364 				Pointer<Byte> samplerDescriptor = getSamplerDescriptor(imageDescriptor, instruction, laneIdx);
365 
366 				Pointer<Byte> samplerFunction = lookupSamplerFunction(imageDescriptor, samplerDescriptor, instruction);
367 
368 				callSamplerFunction(samplerFunction, laneOut, imageDescriptor, instruction);
369 			}
370 
371 			for(int outIdx = 0; outIdx < out.getArraySize(); outIdx++)
372 			{
373 				out[outIdx] = Insert(out[outIdx], Extract(laneOut[outIdx], laneIdx), laneIdx);
374 			}
375 		}
376 	}
377 	else
378 	{
379 		Pointer<Byte> imageDescriptor = getImage(instruction.imageId).getUniformPointer();  // vk::SampledImageDescriptor*
380 		Pointer<Byte> samplerDescriptor = getSamplerDescriptor(imageDescriptor, instruction);
381 
382 		Pointer<Byte> samplerFunction = lookupSamplerFunction(imageDescriptor, samplerDescriptor, instruction);
383 
384 		callSamplerFunction(samplerFunction, out, imageDescriptor, instruction);
385 	}
386 }
387 
getSamplerDescriptor(Pointer<Byte> imageDescriptor,const ImageInstruction & instruction) const388 Pointer<Byte> SpirvEmitter::getSamplerDescriptor(Pointer<Byte> imageDescriptor, const ImageInstruction &instruction) const
389 {
390 	return ((instruction.samplerId == instruction.imageId) || (instruction.samplerId == 0)) ? imageDescriptor : getImage(instruction.samplerId).getUniformPointer();
391 }
392 
getSamplerDescriptor(Pointer<Byte> imageDescriptor,const ImageInstruction & instruction,int laneIdx) const393 Pointer<Byte> SpirvEmitter::getSamplerDescriptor(Pointer<Byte> imageDescriptor, const ImageInstruction &instruction, int laneIdx) const
394 {
395 	return ((instruction.samplerId == instruction.imageId) || (instruction.samplerId == 0)) ? imageDescriptor : getImage(instruction.samplerId).getPointerForLane(laneIdx);
396 }
397 
lookupSamplerFunction(Pointer<Byte> imageDescriptor,Pointer<Byte> samplerDescriptor,const ImageInstruction & instruction) const398 Pointer<Byte> SpirvEmitter::lookupSamplerFunction(Pointer<Byte> imageDescriptor, Pointer<Byte> samplerDescriptor, const ImageInstruction &instruction) const
399 {
400 	Int samplerId = (instruction.samplerId != 0) ? *Pointer<rr::Int>(samplerDescriptor + OFFSET(vk::SampledImageDescriptor, samplerId)) : Int(0);
401 
402 	auto &cache = routine->samplerCache.at(instruction.position);
403 	Bool cacheHit = (cache.imageDescriptor == imageDescriptor) && (cache.samplerId == samplerId);  // TODO(b/205566405): Skip sampler ID check for samplerless instructions.
404 
405 	If(!cacheHit)
406 	{
407 		rr::Int imageViewId = *Pointer<rr::Int>(imageDescriptor + OFFSET(vk::ImageDescriptor, imageViewId));
408 		cache.function = Call(getImageSampler, routine->device, instruction.signature, samplerId, imageViewId);
409 		cache.imageDescriptor = imageDescriptor;
410 		cache.samplerId = samplerId;
411 	}
412 
413 	return cache.function;
414 }
415 
callSamplerFunction(Pointer<Byte> samplerFunction,Array<SIMD::Float> & out,Pointer<Byte> imageDescriptor,const ImageInstruction & instruction) const416 void SpirvEmitter::callSamplerFunction(Pointer<Byte> samplerFunction, Array<SIMD::Float> &out, Pointer<Byte> imageDescriptor, const ImageInstruction &instruction) const
417 {
418 	Array<SIMD::Float> in(16);  // Maximum 16 input parameter components.
419 
420 	auto coordinate = Operand(shader, *this, instruction.coordinateId);
421 
422 	uint32_t i = 0;
423 	for(; i < instruction.coordinates; i++)
424 	{
425 		if(instruction.isProj())
426 		{
427 			in[i] = coordinate.Float(i) / coordinate.Float(instruction.coordinates);  // TODO(b/129523279): Optimize using reciprocal.
428 		}
429 		else
430 		{
431 			in[i] = coordinate.Float(i);
432 		}
433 	}
434 
435 	if(instruction.isDref())
436 	{
437 		auto drefValue = Operand(shader, *this, instruction.drefId);
438 
439 		if(instruction.isProj())
440 		{
441 			in[i] = drefValue.Float(0) / coordinate.Float(instruction.coordinates);  // TODO(b/129523279): Optimize using reciprocal.
442 		}
443 		else
444 		{
445 			in[i] = drefValue.Float(0);
446 		}
447 
448 		i++;
449 	}
450 
451 	if(instruction.lodOrBiasId != 0)
452 	{
453 		auto lodValue = Operand(shader, *this, instruction.lodOrBiasId);
454 		in[i] = lodValue.Float(0);
455 		i++;
456 	}
457 	else if(instruction.gradDxId != 0)
458 	{
459 		auto dxValue = Operand(shader, *this, instruction.gradDxId);
460 		auto dyValue = Operand(shader, *this, instruction.gradDyId);
461 		ASSERT(dxValue.componentCount == dxValue.componentCount);
462 
463 		for(uint32_t j = 0; j < dxValue.componentCount; j++, i++)
464 		{
465 			in[i] = dxValue.Float(j);
466 		}
467 
468 		for(uint32_t j = 0; j < dxValue.componentCount; j++, i++)
469 		{
470 			in[i] = dyValue.Float(j);
471 		}
472 	}
473 	else if(instruction.samplerMethod == Fetch)
474 	{
475 		// The instruction didn't provide a lod operand, but the sampler's Fetch
476 		// function requires one to be present. If no lod is supplied, the default
477 		// is zero.
478 		in[i] = As<SIMD::Float>(SIMD::Int(0));
479 		i++;
480 	}
481 
482 	if(instruction.offsetId != 0)
483 	{
484 		auto offsetValue = Operand(shader, *this, instruction.offsetId);
485 
486 		for(uint32_t j = 0; j < offsetValue.componentCount; j++, i++)
487 		{
488 			in[i] = As<SIMD::Float>(offsetValue.Int(j));  // Integer values, but transfered as float.
489 		}
490 	}
491 
492 	if(instruction.sample)
493 	{
494 		auto sampleValue = Operand(shader, *this, instruction.sampleId);
495 		in[i] = As<SIMD::Float>(sampleValue.Int(0));
496 	}
497 
498 	Pointer<Byte> texture = imageDescriptor + OFFSET(vk::SampledImageDescriptor, texture);  // sw::Texture*
499 
500 	Call<ImageSampler>(samplerFunction, texture, &in, &out, routine->constants);
501 }
502 
EmitImageQuerySizeLod(InsnIterator insn)503 void SpirvEmitter::EmitImageQuerySizeLod(InsnIterator insn)
504 {
505 	auto &resultTy = shader.getType(insn.resultTypeId());
506 	auto imageId = Object::ID(insn.word(3));
507 	auto lodId = Object::ID(insn.word(4));
508 
509 	auto &dst = createIntermediate(insn.resultId(), resultTy.componentCount);
510 	GetImageDimensions(resultTy, imageId, lodId, dst);
511 }
512 
EmitImageQuerySize(InsnIterator insn)513 void SpirvEmitter::EmitImageQuerySize(InsnIterator insn)
514 {
515 	auto &resultTy = shader.getType(insn.resultTypeId());
516 	auto imageId = Object::ID(insn.word(3));
517 	auto lodId = Object::ID(0);
518 
519 	auto &dst = createIntermediate(insn.resultId(), resultTy.componentCount);
520 	GetImageDimensions(resultTy, imageId, lodId, dst);
521 }
522 
GetImageDimensions(const Type & resultTy,Object::ID imageId,Object::ID lodId,Intermediate & dst) const523 void SpirvEmitter::GetImageDimensions(const Type &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const
524 {
525 	auto &image = shader.getObject(imageId);
526 	auto &imageType = shader.getType(image);
527 
528 	ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
529 	bool isArrayed = imageType.definition.word(5) != 0;
530 	uint32_t dimensions = resultTy.componentCount - (isArrayed ? 1 : 0);
531 
532 	const Spirv::DescriptorDecorations &d = shader.descriptorDecorations.at(imageId);
533 	auto descriptorType = routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding);
534 
535 	Pointer<Byte> descriptor = getPointer(imageId).getUniformPointer();
536 
537 	Int width;
538 	Int height;
539 	Int depth;
540 
541 	switch(descriptorType)
542 	{
543 	case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
544 	case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
545 		width = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, width));
546 		height = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, height));
547 		depth = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, depth));
548 		break;
549 	case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
550 	case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
551 	case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
552 		width = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, width));
553 		height = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, height));
554 		depth = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, depth));
555 		break;
556 	default:
557 		UNREACHABLE("Image descriptorType: %d", int(descriptorType));
558 	}
559 
560 	if(lodId != 0)
561 	{
562 		auto lodVal = Operand(shader, *this, lodId);
563 		ASSERT(lodVal.componentCount == 1);
564 		auto lod = lodVal.Int(0);
565 		auto one = SIMD::Int(1);
566 
567 		if(dimensions >= 1) dst.move(0, Max(SIMD::Int(width) >> lod, one));
568 		if(dimensions >= 2) dst.move(1, Max(SIMD::Int(height) >> lod, one));
569 		if(dimensions >= 3) dst.move(2, Max(SIMD::Int(depth) >> lod, one));
570 	}
571 	else
572 	{
573 
574 		if(dimensions >= 1) dst.move(0, SIMD::Int(width));
575 		if(dimensions >= 2) dst.move(1, SIMD::Int(height));
576 		if(dimensions >= 3) dst.move(2, SIMD::Int(depth));
577 	}
578 
579 	if(isArrayed)
580 	{
581 		dst.move(dimensions, SIMD::Int(depth));
582 	}
583 }
584 
EmitImageQueryLevels(InsnIterator insn)585 void SpirvEmitter::EmitImageQueryLevels(InsnIterator insn)
586 {
587 	auto &resultTy = shader.getType(insn.resultTypeId());
588 	ASSERT(resultTy.componentCount == 1);
589 	auto imageId = Object::ID(insn.word(3));
590 
591 	const Spirv::DescriptorDecorations &d = shader.descriptorDecorations.at(imageId);
592 	auto descriptorType = routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding);
593 
594 	Pointer<Byte> descriptor = getPointer(imageId).getUniformPointer();
595 	Int mipLevels = 0;
596 	switch(descriptorType)
597 	{
598 	case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
599 	case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
600 	case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
601 		mipLevels = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, mipLevels));  // uint32_t
602 		break;
603 	default:
604 		UNREACHABLE("Image descriptorType: %d", int(descriptorType));
605 	}
606 
607 	auto &dst = createIntermediate(insn.resultId(), 1);
608 	dst.move(0, SIMD::Int(mipLevels));
609 }
610 
EmitImageQuerySamples(InsnIterator insn)611 void SpirvEmitter::EmitImageQuerySamples(InsnIterator insn)
612 {
613 	auto &resultTy = shader.getType(insn.resultTypeId());
614 	ASSERT(resultTy.componentCount == 1);
615 	auto imageId = Object::ID(insn.word(3));
616 	auto imageTy = shader.getObjectType(imageId);
617 	ASSERT(imageTy.definition.opcode() == spv::OpTypeImage);
618 	ASSERT(imageTy.definition.word(3) == spv::Dim2D);
619 	ASSERT(imageTy.definition.word(6 /* MS */) == 1);
620 
621 	const Spirv::DescriptorDecorations &d = shader.descriptorDecorations.at(imageId);
622 	auto descriptorType = routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding);
623 
624 	Pointer<Byte> descriptor = getPointer(imageId).getUniformPointer();
625 	Int sampleCount = 0;
626 	switch(descriptorType)
627 	{
628 	case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
629 		sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, sampleCount));  // uint32_t
630 		break;
631 	case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
632 	case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
633 	case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
634 		sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, sampleCount));  // uint32_t
635 		break;
636 	default:
637 		UNREACHABLE("Image descriptorType: %d", int(descriptorType));
638 	}
639 
640 	auto &dst = createIntermediate(insn.resultId(), 1);
641 	dst.move(0, SIMD::Int(sampleCount));
642 }
643 
setupTexelAddressData(SIMD::Int rowPitch,SIMD::Int slicePitch,SIMD::Int samplePitch,ImageInstructionSignature instruction,SIMD::Int coordinate[],SIMD::Int sample,vk::Format imageFormat,const SpirvRoutine * routine)644 SpirvEmitter::TexelAddressData SpirvEmitter::setupTexelAddressData(SIMD::Int rowPitch, SIMD::Int slicePitch, SIMD::Int samplePitch, ImageInstructionSignature instruction, SIMD::Int coordinate[], SIMD::Int sample, vk::Format imageFormat, const SpirvRoutine *routine)
645 {
646 	TexelAddressData data;
647 
648 	data.isArrayed = instruction.arrayed;
649 	data.dim = static_cast<spv::Dim>(instruction.dim);
650 	data.texelSize = imageFormat.bytes();
651 	data.dims = instruction.coordinates - (data.isArrayed ? 1 : 0);
652 
653 	data.u = coordinate[0];
654 	data.v = SIMD::Int(0);
655 
656 	if(data.dims > 1)
657 	{
658 		data.v = coordinate[1];
659 	}
660 
661 	if(data.dim == spv::DimSubpassData)
662 	{
663 		data.u += routine->windowSpacePosition[0];
664 		data.v += routine->windowSpacePosition[1];
665 	}
666 
667 	data.ptrOffset = data.u * SIMD::Int(data.texelSize);
668 
669 	if(data.dims > 1)
670 	{
671 		data.ptrOffset += data.v * rowPitch;
672 	}
673 
674 	data.w = 0;
675 	if((data.dims > 2) || data.isArrayed)
676 	{
677 		if(data.dims > 2)
678 		{
679 			data.w += coordinate[2];
680 		}
681 
682 		if(data.isArrayed)
683 		{
684 			data.w += coordinate[data.dims];
685 		}
686 
687 		data.ptrOffset += data.w * slicePitch;
688 	}
689 
690 	if(data.dim == spv::DimSubpassData)
691 	{
692 		// Multiview input attachment access is to the layer corresponding to the current view
693 		data.ptrOffset += SIMD::Int(routine->layer) * slicePitch;
694 	}
695 
696 	if(instruction.sample)
697 	{
698 		data.ptrOffset += sample * samplePitch;
699 	}
700 
701 	return data;
702 }
703 
GetNonUniformTexelAddress(ImageInstructionSignature instruction,SIMD::Pointer descriptor,SIMD::Int coordinate[],SIMD::Int sample,vk::Format imageFormat,OutOfBoundsBehavior outOfBoundsBehavior,SIMD::Int activeLaneMask,const SpirvRoutine * routine)704 SIMD::Pointer SpirvEmitter::GetNonUniformTexelAddress(ImageInstructionSignature instruction, SIMD::Pointer descriptor, SIMD::Int coordinate[], SIMD::Int sample, vk::Format imageFormat, OutOfBoundsBehavior outOfBoundsBehavior, SIMD::Int activeLaneMask, const SpirvRoutine *routine)
705 {
706 	const bool useStencilAspect = (imageFormat == VK_FORMAT_S8_UINT);
707 	auto rowPitch = (descriptor + (useStencilAspect
708 	                                   ? OFFSET(vk::StorageImageDescriptor, stencilRowPitchBytes)
709 	                                   : OFFSET(vk::StorageImageDescriptor, rowPitchBytes)))
710 	                    .Load<SIMD::Int>(outOfBoundsBehavior, activeLaneMask);
711 	auto slicePitch = (descriptor + (useStencilAspect
712 	                                     ? OFFSET(vk::StorageImageDescriptor, stencilSlicePitchBytes)
713 	                                     : OFFSET(vk::StorageImageDescriptor, slicePitchBytes)))
714 	                      .Load<SIMD::Int>(outOfBoundsBehavior, activeLaneMask);
715 	auto samplePitch = (descriptor + (useStencilAspect
716 	                                      ? OFFSET(vk::StorageImageDescriptor, stencilSamplePitchBytes)
717 	                                      : OFFSET(vk::StorageImageDescriptor, samplePitchBytes)))
718 	                       .Load<SIMD::Int>(outOfBoundsBehavior, activeLaneMask);
719 
720 	auto texelData = setupTexelAddressData(rowPitch, slicePitch, samplePitch, instruction, coordinate, sample, imageFormat, routine);
721 
722 	// If the out-of-bounds behavior is set to nullify, then each coordinate must be tested individually.
723 	// Other out-of-bounds behaviors work properly by just comparing the offset against the total size.
724 	if(outOfBoundsBehavior == OutOfBoundsBehavior::Nullify)
725 	{
726 		SIMD::UInt width = (descriptor + OFFSET(vk::StorageImageDescriptor, width)).Load<SIMD::Int>(outOfBoundsBehavior, activeLaneMask);
727 		SIMD::Int oobMask = As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(texelData.u), width));
728 
729 		if(texelData.dims > 1)
730 		{
731 			SIMD::UInt height = As<SIMD::UInt>((descriptor + OFFSET(vk::StorageImageDescriptor, height)).Load<SIMD::Int>(outOfBoundsBehavior, activeLaneMask));
732 			oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(texelData.v), height));
733 		}
734 
735 		if((texelData.dims > 2) || texelData.isArrayed)
736 		{
737 			SIMD::UInt depth = As<SIMD::UInt>((descriptor + OFFSET(vk::StorageImageDescriptor, depth)).Load<SIMD::Int>(outOfBoundsBehavior, activeLaneMask));
738 			if(texelData.dim == spv::DimCube) { depth *= 6; }
739 			oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(texelData.w), depth));
740 		}
741 
742 		if(instruction.sample)
743 		{
744 			SIMD::UInt sampleCount = As<SIMD::UInt>((descriptor + OFFSET(vk::StorageImageDescriptor, sampleCount)).Load<SIMD::Int>(outOfBoundsBehavior, activeLaneMask));
745 			oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(sample), sampleCount));
746 		}
747 
748 		constexpr int32_t OOB_OFFSET = 0x7FFFFFFF - 16;  // SIMD pointer offsets are signed 32-bit, so this is the largest offset (for 16-byte texels).
749 		static_assert(OOB_OFFSET >= vk::MAX_MEMORY_ALLOCATION_SIZE, "the largest offset must be guaranteed to be out-of-bounds");
750 
751 		texelData.ptrOffset = (texelData.ptrOffset & ~oobMask) | (oobMask & SIMD::Int(OOB_OFFSET));  // oob ? OOB_OFFSET : ptrOffset  // TODO: IfThenElse()
752 	}
753 
754 	std::vector<Pointer<Byte>> imageBase(SIMD::Width);
755 	for(int i = 0; i < SIMD::Width; i++)
756 	{
757 		imageBase[i] = *Pointer<Pointer<Byte>>(descriptor.getPointerForLane(i) + (useStencilAspect
758 		                                                                              ? OFFSET(vk::StorageImageDescriptor, stencilPtr)
759 		                                                                              : OFFSET(vk::StorageImageDescriptor, ptr)));
760 	}
761 
762 	return SIMD::Pointer(imageBase) + texelData.ptrOffset;
763 }
764 
GetTexelAddress(ImageInstructionSignature instruction,Pointer<Byte> descriptor,SIMD::Int coordinate[],SIMD::Int sample,vk::Format imageFormat,OutOfBoundsBehavior outOfBoundsBehavior,const SpirvRoutine * routine)765 SIMD::Pointer SpirvEmitter::GetTexelAddress(ImageInstructionSignature instruction, Pointer<Byte> descriptor, SIMD::Int coordinate[], SIMD::Int sample, vk::Format imageFormat, OutOfBoundsBehavior outOfBoundsBehavior, const SpirvRoutine *routine)
766 {
767 	const bool useStencilAspect = (imageFormat == VK_FORMAT_S8_UINT);
768 	auto rowPitch = SIMD::Int(*Pointer<Int>(descriptor + (useStencilAspect
769 	                                                          ? OFFSET(vk::StorageImageDescriptor, stencilRowPitchBytes)
770 	                                                          : OFFSET(vk::StorageImageDescriptor, rowPitchBytes))));
771 	auto slicePitch = SIMD::Int(
772 	    *Pointer<Int>(descriptor + (useStencilAspect
773 	                                    ? OFFSET(vk::StorageImageDescriptor, stencilSlicePitchBytes)
774 	                                    : OFFSET(vk::StorageImageDescriptor, slicePitchBytes))));
775 	auto samplePitch = SIMD::Int(
776 	    *Pointer<Int>(descriptor + (useStencilAspect
777 	                                    ? OFFSET(vk::StorageImageDescriptor, stencilSamplePitchBytes)
778 	                                    : OFFSET(vk::StorageImageDescriptor, samplePitchBytes))));
779 
780 	auto texelData = setupTexelAddressData(rowPitch, slicePitch, samplePitch, instruction, coordinate, sample, imageFormat, routine);
781 
782 	// If the out-of-bounds behavior is set to nullify, then each coordinate must be tested individually.
783 	// Other out-of-bounds behaviors work properly by just comparing the offset against the total size.
784 	if(outOfBoundsBehavior == OutOfBoundsBehavior::Nullify)
785 	{
786 		SIMD::UInt width = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, width));
787 		SIMD::Int oobMask = As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(texelData.u), width));
788 
789 		if(texelData.dims > 1)
790 		{
791 			SIMD::UInt height = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, height));
792 			oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(texelData.v), height));
793 		}
794 
795 		if((texelData.dims > 2) || texelData.isArrayed)
796 		{
797 			UInt depth = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, depth));
798 			if(texelData.dim == spv::DimCube) { depth *= 6; }
799 			oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(texelData.w), SIMD::UInt(depth)));
800 		}
801 
802 		if(instruction.sample)
803 		{
804 			SIMD::UInt sampleCount = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, sampleCount));
805 			oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(sample), sampleCount));
806 		}
807 
808 		constexpr int32_t OOB_OFFSET = 0x7FFFFFFF - 16;  // SIMD pointer offsets are signed 32-bit, so this is the largest offset (for 16-byte texels).
809 		static_assert(OOB_OFFSET >= vk::MAX_MEMORY_ALLOCATION_SIZE, "the largest offset must be guaranteed to be out-of-bounds");
810 
811 		texelData.ptrOffset = (texelData.ptrOffset & ~oobMask) | (oobMask & SIMD::Int(OOB_OFFSET));  // oob ? OOB_OFFSET : ptrOffset  // TODO: IfThenElse()
812 	}
813 
814 	Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(descriptor + (useStencilAspect
815 	                                                                    ? OFFSET(vk::StorageImageDescriptor, stencilPtr)
816 	                                                                    : OFFSET(vk::StorageImageDescriptor, ptr)));
817 
818 	Int imageSizeInBytes = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
819 
820 	return SIMD::Pointer(imageBase, imageSizeInBytes, texelData.ptrOffset);
821 }
822 
EmitImageRead(const ImageInstruction & instruction)823 void SpirvEmitter::EmitImageRead(const ImageInstruction &instruction)
824 {
825 	auto &resultType = shader.getObjectType(instruction.resultId);
826 	auto &image = shader.getObject(instruction.imageId);
827 	auto &imageType = shader.getType(image);
828 
829 	ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
830 	auto dim = static_cast<spv::Dim>(instruction.dim);
831 
832 	auto coordinate = Operand(shader, *this, instruction.coordinateId);
833 	const Spirv::DescriptorDecorations &d = shader.descriptorDecorations.at(instruction.imageId);
834 
835 	// For subpass data, format in the instruction is spv::ImageFormatUnknown. Get it from
836 	// the renderpass data instead. In all other cases, we can use the format in the instruction.
837 	ASSERT(dim != spv::DimSubpassData || attachments != nullptr);
838 	vk::Format imageFormat = (dim == spv::DimSubpassData)
839 	                             ? shader.getInputAttachmentFormat(*attachments, d.InputAttachmentIndex)
840 	                             : SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(instruction.imageFormat));
841 
842 	// Depth+Stencil image attachments select aspect based on the Sampled Type of the
843 	// OpTypeImage. If float, then we want the depth aspect. If int, we want the stencil aspect.
844 	bool useStencilAspect = (imageFormat == VK_FORMAT_D32_SFLOAT_S8_UINT &&
845 	                         shader.getType(imageType.definition.word(2)).opcode() == spv::OpTypeInt);
846 
847 	if(useStencilAspect)
848 	{
849 		imageFormat = VK_FORMAT_S8_UINT;
850 	}
851 
852 	auto &dst = createIntermediate(instruction.resultId, resultType.componentCount);
853 	SIMD::Pointer ptr = getPointer(instruction.imageId);
854 
855 	SIMD::Int uvwa[4];
856 	SIMD::Int sample;
857 	const int texelSize = imageFormat.bytes();
858 	// VK_EXT_image_robustness requires replacing out-of-bounds access with zero.
859 	// TODO(b/162327166): Only perform bounds checks when VK_EXT_image_robustness is enabled.
860 	auto robustness = OutOfBoundsBehavior::Nullify;
861 
862 	for(uint32_t i = 0; i < instruction.coordinates; i++)
863 	{
864 		uvwa[i] = coordinate.Int(i);
865 	}
866 	if(instruction.sample)
867 	{
868 		sample = Operand(shader, *this, instruction.sampleId).Int(0);
869 	}
870 
871 	// Gather packed texel data. Texels larger than 4 bytes occupy multiple SIMD::Int elements.
872 	// TODO(b/160531165): Provide gather abstractions for various element sizes.
873 	SIMD::Int packed[4];
874 
875 	SIMD::Pointer texelPtr = ptr.isBasePlusOffset
876 	                             ? GetTexelAddress(instruction, ptr.getUniformPointer(), uvwa, sample, imageFormat, robustness, routine)
877 	                             : GetNonUniformTexelAddress(instruction, ptr, uvwa, sample, imageFormat, robustness, activeLaneMask(), routine);
878 	if(texelSize == 4 || texelSize == 8 || texelSize == 16)
879 	{
880 		for(auto i = 0; i < texelSize / 4; i++)
881 		{
882 			packed[i] = texelPtr.Load<SIMD::Int>(robustness, activeLaneMask());
883 			texelPtr += sizeof(float);
884 		}
885 	}
886 	else if(texelSize == 2)
887 	{
888 		SIMD::Int mask = activeLaneMask() & texelPtr.isInBounds(2, robustness);
889 
890 		for(int i = 0; i < SIMD::Width; i++)
891 		{
892 			If(Extract(mask, i) != 0)
893 			{
894 				packed[0] = Insert(packed[0], Int(*Pointer<Short>(texelPtr.getPointerForLane(i))), i);
895 			}
896 		}
897 	}
898 	else if(texelSize == 1)
899 	{
900 		SIMD::Int mask = activeLaneMask() & texelPtr.isInBounds(1, robustness);
901 		for(int i = 0; i < SIMD::Width; i++)
902 		{
903 			If(Extract(mask, i) != 0)
904 			{
905 				packed[0] = Insert(packed[0], Int(*Pointer<Byte>(texelPtr.getPointerForLane(i))), i);
906 			}
907 		}
908 	}
909 	else
910 		UNREACHABLE("texelSize: %d", int(texelSize));
911 
912 	// Format support requirements here come from two sources:
913 	// - Minimum required set of formats for loads from storage images
914 	// - Any format supported as a color or depth/stencil attachment, for input attachments
915 	switch(imageFormat)
916 	{
917 	case VK_FORMAT_R32G32B32A32_SFLOAT:
918 	case VK_FORMAT_R32G32B32A32_SINT:
919 	case VK_FORMAT_R32G32B32A32_UINT:
920 		dst.move(0, packed[0]);
921 		dst.move(1, packed[1]);
922 		dst.move(2, packed[2]);
923 		dst.move(3, packed[3]);
924 		break;
925 	case VK_FORMAT_R32_SINT:
926 	case VK_FORMAT_R32_UINT:
927 		dst.move(0, packed[0]);
928 		// Fill remaining channels with 0,0,1 (of the correct type)
929 		dst.move(1, SIMD::Int(0));
930 		dst.move(2, SIMD::Int(0));
931 		dst.move(3, SIMD::Int(1));
932 		break;
933 	case VK_FORMAT_R32_SFLOAT:
934 	case VK_FORMAT_D32_SFLOAT:
935 	case VK_FORMAT_D32_SFLOAT_S8_UINT:
936 		dst.move(0, packed[0]);
937 		// Fill remaining channels with 0,0,1 (of the correct type)
938 		dst.move(1, SIMD::Float(0.0f));
939 		dst.move(2, SIMD::Float(0.0f));
940 		dst.move(3, SIMD::Float(1.0f));
941 		break;
942 	case VK_FORMAT_D16_UNORM:
943 		dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
944 		dst.move(1, SIMD::Float(0.0f));
945 		dst.move(2, SIMD::Float(0.0f));
946 		dst.move(3, SIMD::Float(1.0f));
947 		break;
948 	case VK_FORMAT_R16G16B16A16_UNORM:
949 		dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
950 		dst.move(1, SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
951 		dst.move(2, SIMD::Float(packed[1] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
952 		dst.move(3, SIMD::Float((packed[1] >> 16) & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
953 		break;
954 	case VK_FORMAT_R16G16B16A16_SNORM:
955 		dst.move(0, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
956 		dst.move(1, Max(SIMD::Float(packed[0] & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
957 		dst.move(2, Max(SIMD::Float((packed[1] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
958 		dst.move(3, Max(SIMD::Float(packed[1] & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
959 		break;
960 	case VK_FORMAT_R16G16B16A16_SINT:
961 		dst.move(0, (packed[0] << 16) >> 16);
962 		dst.move(1, packed[0] >> 16);
963 		dst.move(2, (packed[1] << 16) >> 16);
964 		dst.move(3, packed[1] >> 16);
965 		break;
966 	case VK_FORMAT_R16G16B16A16_UINT:
967 		dst.move(0, packed[0] & SIMD::Int(0xFFFF));
968 		dst.move(1, (packed[0] >> 16) & SIMD::Int(0xFFFF));
969 		dst.move(2, packed[1] & SIMD::Int(0xFFFF));
970 		dst.move(3, (packed[1] >> 16) & SIMD::Int(0xFFFF));
971 		break;
972 	case VK_FORMAT_R16G16B16A16_SFLOAT:
973 		dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
974 		dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
975 		dst.move(2, halfToFloatBits(As<SIMD::UInt>(packed[1]) & SIMD::UInt(0x0000FFFF)));
976 		dst.move(3, halfToFloatBits((As<SIMD::UInt>(packed[1]) & SIMD::UInt(0xFFFF0000)) >> 16));
977 		break;
978 	case VK_FORMAT_R8G8B8A8_SNORM:
979 	case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
980 		dst.move(0, Max(SIMD::Float((packed[0] << 24) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
981 		dst.move(1, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
982 		dst.move(2, Max(SIMD::Float((packed[0] << 8) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
983 		dst.move(3, Max(SIMD::Float((packed[0]) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
984 		break;
985 	case VK_FORMAT_R8G8B8A8_UNORM:
986 	case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
987 		dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
988 		dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
989 		dst.move(2, SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
990 		dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
991 		break;
992 	case VK_FORMAT_R8G8B8A8_SRGB:
993 	case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
994 		dst.move(0, sRGBtoLinear(SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
995 		dst.move(1, sRGBtoLinear(SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
996 		dst.move(2, sRGBtoLinear(SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
997 		dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
998 		break;
999 	case VK_FORMAT_B8G8R8A8_UNORM:
1000 		dst.move(0, SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
1001 		dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
1002 		dst.move(2, SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
1003 		dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
1004 		break;
1005 	case VK_FORMAT_B8G8R8A8_SRGB:
1006 		dst.move(0, sRGBtoLinear(SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
1007 		dst.move(1, sRGBtoLinear(SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
1008 		dst.move(2, sRGBtoLinear(SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
1009 		dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
1010 		break;
1011 	case VK_FORMAT_R8G8B8A8_UINT:
1012 	case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1013 		dst.move(0, As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF));
1014 		dst.move(1, (As<SIMD::UInt>(packed[0]) >> 8) & SIMD::UInt(0xFF));
1015 		dst.move(2, (As<SIMD::UInt>(packed[0]) >> 16) & SIMD::UInt(0xFF));
1016 		dst.move(3, (As<SIMD::UInt>(packed[0]) >> 24) & SIMD::UInt(0xFF));
1017 		break;
1018 	case VK_FORMAT_R8G8B8A8_SINT:
1019 	case VK_FORMAT_A8B8G8R8_SINT_PACK32:
1020 		dst.move(0, (packed[0] << 24) >> 24);
1021 		dst.move(1, (packed[0] << 16) >> 24);
1022 		dst.move(2, (packed[0] << 8) >> 24);
1023 		dst.move(3, packed[0] >> 24);
1024 		break;
1025 	case VK_FORMAT_R8_UNORM:
1026 		dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 0xFF));
1027 		dst.move(1, SIMD::Float(0.0f));
1028 		dst.move(2, SIMD::Float(0.0f));
1029 		dst.move(3, SIMD::Float(1.0f));
1030 		break;
1031 	case VK_FORMAT_R8_SNORM:
1032 		dst.move(0, Max(SIMD::Float((packed[0] << 24) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
1033 		dst.move(1, SIMD::Float(0.0f));
1034 		dst.move(2, SIMD::Float(0.0f));
1035 		dst.move(3, SIMD::Float(1.0f));
1036 		break;
1037 	case VK_FORMAT_R8_UINT:
1038 	case VK_FORMAT_S8_UINT:
1039 		dst.move(0, As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF));
1040 		dst.move(1, SIMD::UInt(0));
1041 		dst.move(2, SIMD::UInt(0));
1042 		dst.move(3, SIMD::UInt(1));
1043 		break;
1044 	case VK_FORMAT_R8_SINT:
1045 		dst.move(0, (packed[0] << 24) >> 24);
1046 		dst.move(1, SIMD::Int(0));
1047 		dst.move(2, SIMD::Int(0));
1048 		dst.move(3, SIMD::Int(1));
1049 		break;
1050 	case VK_FORMAT_R8G8_UNORM:
1051 		dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
1052 		dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
1053 		dst.move(2, SIMD::Float(0.0f));
1054 		dst.move(3, SIMD::Float(1.0f));
1055 		break;
1056 	case VK_FORMAT_R8G8_SNORM:
1057 		dst.move(0, Max(SIMD::Float((packed[0] << 24) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
1058 		dst.move(1, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
1059 		dst.move(2, SIMD::Float(0.0f));
1060 		dst.move(3, SIMD::Float(1.0f));
1061 		break;
1062 	case VK_FORMAT_R8G8_UINT:
1063 		dst.move(0, As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF));
1064 		dst.move(1, (As<SIMD::UInt>(packed[0]) >> 8) & SIMD::UInt(0xFF));
1065 		dst.move(2, SIMD::UInt(0));
1066 		dst.move(3, SIMD::UInt(1));
1067 		break;
1068 	case VK_FORMAT_R8G8_SINT:
1069 		dst.move(0, (packed[0] << 24) >> 24);
1070 		dst.move(1, (packed[0] << 16) >> 24);
1071 		dst.move(2, SIMD::Int(0));
1072 		dst.move(3, SIMD::Int(1));
1073 		break;
1074 	case VK_FORMAT_R16_SFLOAT:
1075 		dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
1076 		dst.move(1, SIMD::Float(0.0f));
1077 		dst.move(2, SIMD::Float(0.0f));
1078 		dst.move(3, SIMD::Float(1.0f));
1079 		break;
1080 	case VK_FORMAT_R16_UNORM:
1081 		dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
1082 		dst.move(1, SIMD::Float(0.0f));
1083 		dst.move(2, SIMD::Float(0.0f));
1084 		dst.move(3, SIMD::Float(1.0f));
1085 		break;
1086 	case VK_FORMAT_R16_SNORM:
1087 		dst.move(0, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
1088 		dst.move(1, SIMD::Float(0.0f));
1089 		dst.move(2, SIMD::Float(0.0f));
1090 		dst.move(3, SIMD::Float(1.0f));
1091 		break;
1092 	case VK_FORMAT_R16_UINT:
1093 		dst.move(0, packed[0] & SIMD::Int(0xFFFF));
1094 		dst.move(1, SIMD::UInt(0));
1095 		dst.move(2, SIMD::UInt(0));
1096 		dst.move(3, SIMD::UInt(1));
1097 		break;
1098 	case VK_FORMAT_R16_SINT:
1099 		dst.move(0, (packed[0] << 16) >> 16);
1100 		dst.move(1, SIMD::Int(0));
1101 		dst.move(2, SIMD::Int(0));
1102 		dst.move(3, SIMD::Int(1));
1103 		break;
1104 	case VK_FORMAT_R16G16_SFLOAT:
1105 		dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
1106 		dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
1107 		dst.move(2, SIMD::Float(0.0f));
1108 		dst.move(3, SIMD::Float(1.0f));
1109 		break;
1110 	case VK_FORMAT_R16G16_UNORM:
1111 		dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
1112 		dst.move(1, SIMD::Float(As<SIMD::UInt>(packed[0]) >> 16) * SIMD::Float(1.0f / 0xFFFF));
1113 		dst.move(2, SIMD::Float(0.0f));
1114 		dst.move(3, SIMD::Float(1.0f));
1115 		break;
1116 	case VK_FORMAT_R16G16_SNORM:
1117 		dst.move(0, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
1118 		dst.move(1, Max(SIMD::Float(packed[0] & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
1119 		dst.move(2, SIMD::Float(0.0f));
1120 		dst.move(3, SIMD::Float(1.0f));
1121 		break;
1122 	case VK_FORMAT_R16G16_UINT:
1123 		dst.move(0, packed[0] & SIMD::Int(0xFFFF));
1124 		dst.move(1, (packed[0] >> 16) & SIMD::Int(0xFFFF));
1125 		dst.move(2, SIMD::UInt(0));
1126 		dst.move(3, SIMD::UInt(1));
1127 		break;
1128 	case VK_FORMAT_R16G16_SINT:
1129 		dst.move(0, (packed[0] << 16) >> 16);
1130 		dst.move(1, packed[0] >> 16);
1131 		dst.move(2, SIMD::Int(0));
1132 		dst.move(3, SIMD::Int(1));
1133 		break;
1134 	case VK_FORMAT_R32G32_SINT:
1135 	case VK_FORMAT_R32G32_UINT:
1136 		dst.move(0, packed[0]);
1137 		dst.move(1, packed[1]);
1138 		dst.move(2, SIMD::Int(0));
1139 		dst.move(3, SIMD::Int(1));
1140 		break;
1141 	case VK_FORMAT_R32G32_SFLOAT:
1142 		dst.move(0, packed[0]);
1143 		dst.move(1, packed[1]);
1144 		dst.move(2, SIMD::Float(0.0f));
1145 		dst.move(3, SIMD::Float(1.0f));
1146 		break;
1147 	case VK_FORMAT_A2B10G10R10_UINT_PACK32:
1148 		dst.move(0, packed[0] & SIMD::Int(0x3FF));
1149 		dst.move(1, (packed[0] >> 10) & SIMD::Int(0x3FF));
1150 		dst.move(2, (packed[0] >> 20) & SIMD::Int(0x3FF));
1151 		dst.move(3, (packed[0] >> 30) & SIMD::Int(0x3));
1152 		break;
1153 	case VK_FORMAT_A2R10G10B10_UINT_PACK32:
1154 		dst.move(2, packed[0] & SIMD::Int(0x3FF));
1155 		dst.move(1, (packed[0] >> 10) & SIMD::Int(0x3FF));
1156 		dst.move(0, (packed[0] >> 20) & SIMD::Int(0x3FF));
1157 		dst.move(3, (packed[0] >> 30) & SIMD::Int(0x3));
1158 		break;
1159 	case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
1160 		dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1161 		dst.move(1, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1162 		dst.move(2, SIMD::Float((packed[0] >> 20) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1163 		dst.move(3, SIMD::Float((packed[0] >> 30) & SIMD::Int(0x3)) * SIMD::Float(1.0f / 0x3));
1164 		break;
1165 	case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
1166 		dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1167 		dst.move(1, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1168 		dst.move(0, SIMD::Float((packed[0] >> 20) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
1169 		dst.move(3, SIMD::Float((packed[0] >> 30) & SIMD::Int(0x3)) * SIMD::Float(1.0f / 0x3));
1170 		break;
1171 	case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
1172 		dst.move(0, SIMD::Float((packed[0] >> 12) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1173 		dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1174 		dst.move(2, SIMD::Float((packed[0] >> 4) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1175 		dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1176 		break;
1177 	case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
1178 		dst.move(0, SIMD::Float((packed[0] >> 4) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1179 		dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1180 		dst.move(2, SIMD::Float((packed[0] >> 12) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1181 		dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1182 		break;
1183 	case VK_FORMAT_A4R4G4B4_UNORM_PACK16:
1184 		dst.move(0, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1185 		dst.move(1, SIMD::Float((packed[0] >> 4) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1186 		dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1187 		dst.move(3, SIMD::Float((packed[0] >> 12) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1188 		break;
1189 	case VK_FORMAT_A4B4G4R4_UNORM_PACK16:
1190 		dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1191 		dst.move(1, SIMD::Float((packed[0] >> 4) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1192 		dst.move(2, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1193 		dst.move(3, SIMD::Float((packed[0] >> 12) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
1194 		break;
1195 	case VK_FORMAT_R5G6B5_UNORM_PACK16:
1196 		dst.move(0, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1197 		dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x3F)) * SIMD::Float(1.0f / 0x3F));
1198 		dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1199 		dst.move(3, SIMD::Float(1.0f));
1200 		break;
1201 	case VK_FORMAT_B5G6R5_UNORM_PACK16:
1202 		dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1203 		dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x3F)) * SIMD::Float(1.0f / 0x3F));
1204 		dst.move(2, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1205 		dst.move(3, SIMD::Float(1.0f));
1206 		break;
1207 	case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
1208 		dst.move(0, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1209 		dst.move(1, SIMD::Float((packed[0] >> 6) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1210 		dst.move(2, SIMD::Float((packed[0] >> 1) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1211 		dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0x1)));
1212 		break;
1213 	case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
1214 		dst.move(0, SIMD::Float((packed[0] >> 1) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1215 		dst.move(1, SIMD::Float((packed[0] >> 6) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1216 		dst.move(2, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1217 		dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0x1)));
1218 		break;
1219 	case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
1220 		dst.move(0, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1221 		dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1222 		dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
1223 		dst.move(3, SIMD::Float((packed[0] >> 15) & SIMD::Int(0x1)));
1224 		break;
1225 	case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
1226 		dst.move(0, halfToFloatBits((packed[0] << 4) & SIMD::Int(0x7FF0)));
1227 		dst.move(1, halfToFloatBits((packed[0] >> 7) & SIMD::Int(0x7FF0)));
1228 		dst.move(2, halfToFloatBits((packed[0] >> 17) & SIMD::Int(0x7FE0)));
1229 		dst.move(3, SIMD::Float(1.0f));
1230 		break;
1231 	default:
1232 		UNSUPPORTED("VkFormat %d", int(imageFormat));
1233 		break;
1234 	}
1235 }
1236 
EmitImageWrite(const ImageInstruction & instruction)1237 void SpirvEmitter::EmitImageWrite(const ImageInstruction &instruction)
1238 {
1239 	auto &image = shader.getObject(instruction.imageId);
1240 	auto &imageType = shader.getType(image);
1241 
1242 	ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
1243 	ASSERT(static_cast<spv::Dim>(instruction.dim) != spv::DimSubpassData);  // "Its Dim operand must not be SubpassData."
1244 
1245 	auto coordinate = Operand(shader, *this, instruction.coordinateId);
1246 	auto texel = Operand(shader, *this, instruction.texelId);
1247 
1248 	Array<SIMD::Int> coord(5);  // uvwa & sample
1249 
1250 	uint32_t i = 0;
1251 	for(; i < instruction.coordinates; i++)
1252 	{
1253 		coord[i] = coordinate.Int(i);
1254 	}
1255 
1256 	if(instruction.sample)
1257 	{
1258 		coord[i] = Operand(shader, *this, instruction.sampleId).Int(0);
1259 	}
1260 
1261 	Array<SIMD::Int> texelAndMask(5);
1262 	for(uint32_t i = 0; i < texel.componentCount; ++i)
1263 	{
1264 		texelAndMask[i] = texel.Int(i);
1265 	}
1266 	for(uint32_t i = texel.componentCount; i < 4; ++i)
1267 	{
1268 		texelAndMask[i] = SIMD::Int(0);
1269 	}
1270 	texelAndMask[4] = activeStoresAndAtomicsMask();
1271 
1272 	vk::Format imageFormat = SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(instruction.imageFormat));
1273 
1274 	SIMD::Pointer ptr = getPointer(instruction.imageId);
1275 	if(ptr.isBasePlusOffset)
1276 	{
1277 		Pointer<Byte> imageDescriptor = ptr.getUniformPointer();  // vk::StorageImageDescriptor* or vk::SampledImageDescriptor*
1278 		Pointer<Byte> samplerDescriptor = getSamplerDescriptor(imageDescriptor, instruction);
1279 
1280 		if(imageFormat == VK_FORMAT_UNDEFINED)  // spv::ImageFormatUnknown
1281 		{
1282 			Pointer<Byte> samplerFunction = lookupSamplerFunction(imageDescriptor, samplerDescriptor, instruction);
1283 
1284 			Call<ImageSampler>(samplerFunction, imageDescriptor, &coord, &texelAndMask, routine->constants);
1285 		}
1286 		else
1287 		{
1288 			WriteImage(instruction, imageDescriptor, &coord, &texelAndMask, imageFormat);
1289 		}
1290 	}
1291 	else
1292 	{
1293 		for(int j = 0; j < SIMD::Width; j++)
1294 		{
1295 			SIMD::Int singleLaneMask = 0;
1296 			singleLaneMask = Insert(singleLaneMask, 0xffffffff, j);
1297 			texelAndMask[4] = activeStoresAndAtomicsMask() & singleLaneMask;
1298 			Pointer<Byte> imageDescriptor = ptr.getPointerForLane(j);
1299 			Pointer<Byte> samplerDescriptor = getSamplerDescriptor(imageDescriptor, instruction, j);
1300 
1301 			if(imageFormat == VK_FORMAT_UNDEFINED)  // spv::ImageFormatUnknown
1302 			{
1303 				Pointer<Byte> samplerFunction = lookupSamplerFunction(imageDescriptor, samplerDescriptor, instruction);
1304 
1305 				Call<ImageSampler>(samplerFunction, imageDescriptor, &coord, &texelAndMask, routine->constants);
1306 			}
1307 			else
1308 			{
1309 				WriteImage(instruction, imageDescriptor, &coord, &texelAndMask, imageFormat);
1310 			}
1311 		}
1312 	}
1313 }
1314 
WriteImage(ImageInstructionSignature instruction,Pointer<Byte> descriptor,const Pointer<SIMD::Int> & coord,const Pointer<SIMD::Int> & texelAndMask,vk::Format imageFormat)1315 void SpirvEmitter::WriteImage(ImageInstructionSignature instruction, Pointer<Byte> descriptor, const Pointer<SIMD::Int> &coord, const Pointer<SIMD::Int> &texelAndMask, vk::Format imageFormat)
1316 {
1317 	SIMD::Int texel[4];
1318 	texel[0] = texelAndMask[0];
1319 	texel[1] = texelAndMask[1];
1320 	texel[2] = texelAndMask[2];
1321 	texel[3] = texelAndMask[3];
1322 	SIMD::Int mask = texelAndMask[4];
1323 
1324 	SIMD::Int packed[4];
1325 	switch(imageFormat)
1326 	{
1327 	case VK_FORMAT_R32G32B32A32_SFLOAT:
1328 	case VK_FORMAT_R32G32B32A32_SINT:
1329 	case VK_FORMAT_R32G32B32A32_UINT:
1330 		packed[0] = texel[0];
1331 		packed[1] = texel[1];
1332 		packed[2] = texel[2];
1333 		packed[3] = texel[3];
1334 		break;
1335 	case VK_FORMAT_R32_SFLOAT:
1336 	case VK_FORMAT_R32_SINT:
1337 	case VK_FORMAT_R32_UINT:
1338 		packed[0] = texel[0];
1339 		break;
1340 	case VK_FORMAT_R8G8B8A8_UNORM:
1341 	case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
1342 		packed[0] = (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
1343 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
1344 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
1345 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24);
1346 		break;
1347 	case VK_FORMAT_B8G8R8A8_UNORM:
1348 		packed[0] = (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
1349 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
1350 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
1351 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24);
1352 		break;
1353 	case VK_FORMAT_B8G8R8A8_SRGB:
1354 		packed[0] = (SIMD::UInt(Round(Min(Max(linearToSRGB(As<SIMD::Float>(texel[2])), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
1355 		            ((SIMD::UInt(Round(Min(Max(linearToSRGB(As<SIMD::Float>(texel[1])), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
1356 		            ((SIMD::UInt(Round(Min(Max(linearToSRGB(As<SIMD::Float>(texel[0])), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
1357 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24);
1358 		break;
1359 	case VK_FORMAT_R8G8B8A8_SNORM:
1360 	case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
1361 		packed[0] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
1362 		             SIMD::Int(0xFF)) |
1363 		            ((SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
1364 		              SIMD::Int(0xFF))
1365 		             << 8) |
1366 		            ((SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
1367 		              SIMD::Int(0xFF))
1368 		             << 16) |
1369 		            ((SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
1370 		              SIMD::Int(0xFF))
1371 		             << 24);
1372 		break;
1373 	case VK_FORMAT_R8G8B8A8_SINT:
1374 	case VK_FORMAT_R8G8B8A8_UINT:
1375 	case VK_FORMAT_A8B8G8R8_SINT_PACK32:
1376 	case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1377 		packed[0] = (SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xff))) |
1378 		            (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0xff)) << 8) |
1379 		            (SIMD::UInt(As<SIMD::UInt>(texel[2]) & SIMD::UInt(0xff)) << 16) |
1380 		            (SIMD::UInt(As<SIMD::UInt>(texel[3]) & SIMD::UInt(0xff)) << 24);
1381 		break;
1382 	case VK_FORMAT_R16G16B16A16_SFLOAT:
1383 		packed[0] = floatToHalfBits(As<SIMD::UInt>(texel[0]), false) | floatToHalfBits(As<SIMD::UInt>(texel[1]), true);
1384 		packed[1] = floatToHalfBits(As<SIMD::UInt>(texel[2]), false) | floatToHalfBits(As<SIMD::UInt>(texel[3]), true);
1385 		break;
1386 	case VK_FORMAT_R16G16B16A16_SINT:
1387 	case VK_FORMAT_R16G16B16A16_UINT:
1388 		packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFFFF)) | (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0xFFFF)) << 16);
1389 		packed[1] = SIMD::UInt(As<SIMD::UInt>(texel[2]) & SIMD::UInt(0xFFFF)) | (SIMD::UInt(As<SIMD::UInt>(texel[3]) & SIMD::UInt(0xFFFF)) << 16);
1390 		break;
1391 	case VK_FORMAT_R32G32_SFLOAT:
1392 	case VK_FORMAT_R32G32_SINT:
1393 	case VK_FORMAT_R32G32_UINT:
1394 		packed[0] = texel[0];
1395 		packed[1] = texel[1];
1396 		break;
1397 	case VK_FORMAT_R16G16_SFLOAT:
1398 		packed[0] = floatToHalfBits(As<SIMD::UInt>(texel[0]), false) | floatToHalfBits(As<SIMD::UInt>(texel[1]), true);
1399 		break;
1400 	case VK_FORMAT_R16G16_SINT:
1401 	case VK_FORMAT_R16G16_UINT:
1402 		packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFFFF)) | (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0xFFFF)) << 16);
1403 		break;
1404 	case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
1405 		// Truncates instead of rounding. See b/147900455
1406 		packed[0] = ((floatToHalfBits(As<SIMD::UInt>(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f))), false) & SIMD::UInt(0x7FF0)) >> 4) |
1407 		            ((floatToHalfBits(As<SIMD::UInt>(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f))), false) & SIMD::UInt(0x7FF0)) << 7) |
1408 		            ((floatToHalfBits(As<SIMD::UInt>(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f))), false) & SIMD::UInt(0x7FE0)) << 17);
1409 		break;
1410 	case VK_FORMAT_R16_SFLOAT:
1411 		packed[0] = floatToHalfBits(As<SIMD::UInt>(texel[0]), false);
1412 		break;
1413 	case VK_FORMAT_R16G16B16A16_UNORM:
1414 		packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) |
1415 		            (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) << 16);
1416 		packed[1] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) |
1417 		            (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) << 16);
1418 		break;
1419 	case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
1420 		packed[0] = (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3FF)))) |
1421 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3FF)))) << 10) |
1422 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3FF)))) << 20) |
1423 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3)))) << 30);
1424 		break;
1425 	case VK_FORMAT_R16G16_UNORM:
1426 		packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) |
1427 		            (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) << 16);
1428 		break;
1429 	case VK_FORMAT_R8G8_UNORM:
1430 		packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFF))) |
1431 		            (SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFF))) << 8);
1432 		break;
1433 	case VK_FORMAT_R16_UNORM:
1434 		packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF)));
1435 		break;
1436 	case VK_FORMAT_R8_UNORM:
1437 		packed[0] = SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFF)));
1438 		break;
1439 	case VK_FORMAT_R16G16B16A16_SNORM:
1440 		packed[0] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) & SIMD::Int(0xFFFF)) |
1441 		            (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) << 16);
1442 		packed[1] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[2]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) & SIMD::Int(0xFFFF)) |
1443 		            (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) << 16);
1444 		break;
1445 	case VK_FORMAT_R16G16_SNORM:
1446 		packed[0] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) & SIMD::Int(0xFFFF)) |
1447 		            (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) << 16);
1448 		break;
1449 	case VK_FORMAT_R8G8_SNORM:
1450 		packed[0] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7F))) & SIMD::Int(0xFF)) |
1451 		            (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[1]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7F))) << 8);
1452 		break;
1453 	case VK_FORMAT_R16_SNORM:
1454 		packed[0] = SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF)));
1455 		break;
1456 	case VK_FORMAT_R8_SNORM:
1457 		packed[0] = SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7F)));
1458 		break;
1459 	case VK_FORMAT_R8G8_SINT:
1460 	case VK_FORMAT_R8G8_UINT:
1461 		packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFF)) | (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0xFF)) << 8);
1462 		break;
1463 	case VK_FORMAT_R16_SINT:
1464 	case VK_FORMAT_R16_UINT:
1465 		packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFFFF));
1466 		break;
1467 	case VK_FORMAT_R8_SINT:
1468 	case VK_FORMAT_R8_UINT:
1469 		packed[0] = SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0xFF));
1470 		break;
1471 	case VK_FORMAT_A2B10G10R10_UINT_PACK32:
1472 		packed[0] = (SIMD::UInt(As<SIMD::UInt>(texel[0]) & SIMD::UInt(0x3FF))) |
1473 		            (SIMD::UInt(As<SIMD::UInt>(texel[1]) & SIMD::UInt(0x3FF)) << 10) |
1474 		            (SIMD::UInt(As<SIMD::UInt>(texel[2]) & SIMD::UInt(0x3FF)) << 20) |
1475 		            (SIMD::UInt(As<SIMD::UInt>(texel[3]) & SIMD::UInt(0x3)) << 30);
1476 		break;
1477 	default:
1478 		UNSUPPORTED("VkFormat %d", int(imageFormat));
1479 		break;
1480 	}
1481 
1482 	// "The integer texel coordinates are validated according to the same rules as for texel input coordinate
1483 	//  validation. If the texel fails integer texel coordinate validation, then the write has no effect."
1484 	// - https://www.khronos.org/registry/vulkan/specs/1.2/html/chap16.html#textures-output-coordinate-validation
1485 	auto robustness = OutOfBoundsBehavior::Nullify;
1486 	// GetTexelAddress() only needs the SpirvRoutine* for SubpassData accesses (i.e. input attachments).
1487 	const SpirvRoutine *routine = nullptr;
1488 
1489 	SIMD::Int uvwa[4];
1490 	SIMD::Int sample;
1491 
1492 	uint32_t i = 0;
1493 	for(; i < instruction.coordinates; i++)
1494 	{
1495 		uvwa[i] = As<SIMD::Int>(coord[i]);
1496 	}
1497 
1498 	if(instruction.sample)
1499 	{
1500 		sample = As<SIMD::Int>(coord[i]);
1501 	}
1502 
1503 	auto texelPtr = GetTexelAddress(instruction, descriptor, uvwa, sample, imageFormat, robustness, routine);
1504 
1505 	const int texelSize = imageFormat.bytes();
1506 
1507 	// Scatter packed texel data.
1508 	// TODO(b/160531165): Provide scatter abstractions for various element sizes.
1509 	if(texelSize == 4 || texelSize == 8 || texelSize == 16)
1510 	{
1511 		for(auto i = 0; i < texelSize / 4; i++)
1512 		{
1513 			texelPtr.Store(packed[i], robustness, mask);
1514 			texelPtr += sizeof(float);
1515 		}
1516 	}
1517 	else if(texelSize == 2)
1518 	{
1519 		mask = mask & texelPtr.isInBounds(2, robustness);
1520 
1521 		for(int i = 0; i < SIMD::Width; i++)
1522 		{
1523 			If(Extract(mask, i) != 0)
1524 			{
1525 				*Pointer<Short>(texelPtr.getPointerForLane(i)) = Short(Extract(packed[0], i));
1526 			}
1527 		}
1528 	}
1529 	else if(texelSize == 1)
1530 	{
1531 		mask = mask & texelPtr.isInBounds(1, robustness);
1532 
1533 		for(int i = 0; i < SIMD::Width; i++)
1534 		{
1535 			If(Extract(mask, i) != 0)
1536 			{
1537 				*Pointer<Byte>(texelPtr.getPointerForLane(i)) = Byte(Extract(packed[0], i));
1538 			}
1539 		}
1540 	}
1541 	else
1542 		UNREACHABLE("texelSize: %d", int(texelSize));
1543 }
1544 
EmitImageTexelPointer(const ImageInstruction & instruction)1545 void SpirvEmitter::EmitImageTexelPointer(const ImageInstruction &instruction)
1546 {
1547 	auto coordinate = Operand(shader, *this, instruction.coordinateId);
1548 
1549 	SIMD::Pointer ptr = getPointer(instruction.imageId);
1550 
1551 	// VK_EXT_image_robustness requires checking for out-of-bounds accesses.
1552 	// TODO(b/162327166): Only perform bounds checks when VK_EXT_image_robustness is enabled.
1553 	auto robustness = OutOfBoundsBehavior::Nullify;
1554 	vk::Format imageFormat = SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(instruction.imageFormat));
1555 
1556 	SIMD::Int uvwa[4];
1557 
1558 	for(uint32_t i = 0; i < instruction.coordinates; i++)
1559 	{
1560 		uvwa[i] = coordinate.Int(i);
1561 	}
1562 
1563 	SIMD::Int sample = Operand(shader, *this, instruction.sampleId).Int(0);
1564 
1565 	auto texelPtr = ptr.isBasePlusOffset
1566 	                    ? GetTexelAddress(instruction, ptr.getUniformPointer(), uvwa, sample, imageFormat, robustness, routine)
1567 	                    : GetNonUniformTexelAddress(instruction, ptr, uvwa, sample, imageFormat, robustness, activeLaneMask(), routine);
1568 
1569 	createPointer(instruction.resultId, texelPtr);
1570 }
1571 
EmitSampledImage(InsnIterator insn)1572 void SpirvEmitter::EmitSampledImage(InsnIterator insn)
1573 {
1574 	Object::ID resultId = insn.word(2);
1575 	Object::ID imageId = insn.word(3);
1576 	Object::ID samplerId = insn.word(4);
1577 
1578 	// Create a sampled image, containing both a sampler and an image
1579 	createSampledImage(resultId, { getPointer(imageId), samplerId });
1580 }
1581 
EmitImage(InsnIterator insn)1582 void SpirvEmitter::EmitImage(InsnIterator insn)
1583 {
1584 	Object::ID resultId = insn.word(2);
1585 	Object::ID imageId = insn.word(3);
1586 
1587 	// Extract the image from a sampled image.
1588 	createPointer(resultId, getImage(imageId));
1589 }
1590 
1591 }  // namespace sw
1592