xref: /aosp_15_r20/external/swiftshader/src/Pipeline/VertexRoutine.cpp (revision 03ce13f70fcc45d86ee91b7ee4cab1936a95046e)
1*03ce13f7SAndroid Build Coastguard Worker // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2*03ce13f7SAndroid Build Coastguard Worker //
3*03ce13f7SAndroid Build Coastguard Worker // Licensed under the Apache License, Version 2.0 (the "License");
4*03ce13f7SAndroid Build Coastguard Worker // you may not use this file except in compliance with the License.
5*03ce13f7SAndroid Build Coastguard Worker // You may obtain a copy of the License at
6*03ce13f7SAndroid Build Coastguard Worker //
7*03ce13f7SAndroid Build Coastguard Worker //    http://www.apache.org/licenses/LICENSE-2.0
8*03ce13f7SAndroid Build Coastguard Worker //
9*03ce13f7SAndroid Build Coastguard Worker // Unless required by applicable law or agreed to in writing, software
10*03ce13f7SAndroid Build Coastguard Worker // distributed under the License is distributed on an "AS IS" BASIS,
11*03ce13f7SAndroid Build Coastguard Worker // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*03ce13f7SAndroid Build Coastguard Worker // See the License for the specific language governing permissions and
13*03ce13f7SAndroid Build Coastguard Worker // limitations under the License.
14*03ce13f7SAndroid Build Coastguard Worker 
15*03ce13f7SAndroid Build Coastguard Worker #include "VertexRoutine.hpp"
16*03ce13f7SAndroid Build Coastguard Worker 
17*03ce13f7SAndroid Build Coastguard Worker #include "Constants.hpp"
18*03ce13f7SAndroid Build Coastguard Worker #include "SpirvShader.hpp"
19*03ce13f7SAndroid Build Coastguard Worker #include "Device/Clipper.hpp"
20*03ce13f7SAndroid Build Coastguard Worker #include "Device/Renderer.hpp"
21*03ce13f7SAndroid Build Coastguard Worker #include "Device/Vertex.hpp"
22*03ce13f7SAndroid Build Coastguard Worker #include "System/Debug.hpp"
23*03ce13f7SAndroid Build Coastguard Worker #include "System/Half.hpp"
24*03ce13f7SAndroid Build Coastguard Worker #include "Vulkan/VkDevice.hpp"
25*03ce13f7SAndroid Build Coastguard Worker 
26*03ce13f7SAndroid Build Coastguard Worker namespace sw {
27*03ce13f7SAndroid Build Coastguard Worker 
VertexRoutine(const VertexProcessor::State & state,const vk::PipelineLayout * pipelineLayout,const SpirvShader * spirvShader)28*03ce13f7SAndroid Build Coastguard Worker VertexRoutine::VertexRoutine(
29*03ce13f7SAndroid Build Coastguard Worker     const VertexProcessor::State &state,
30*03ce13f7SAndroid Build Coastguard Worker     const vk::PipelineLayout *pipelineLayout,
31*03ce13f7SAndroid Build Coastguard Worker     const SpirvShader *spirvShader)
32*03ce13f7SAndroid Build Coastguard Worker     : routine(pipelineLayout)
33*03ce13f7SAndroid Build Coastguard Worker     , state(state)
34*03ce13f7SAndroid Build Coastguard Worker     , spirvShader(spirvShader)
35*03ce13f7SAndroid Build Coastguard Worker {
36*03ce13f7SAndroid Build Coastguard Worker 	spirvShader->emitProlog(&routine);
37*03ce13f7SAndroid Build Coastguard Worker }
38*03ce13f7SAndroid Build Coastguard Worker 
~VertexRoutine()39*03ce13f7SAndroid Build Coastguard Worker VertexRoutine::~VertexRoutine()
40*03ce13f7SAndroid Build Coastguard Worker {
41*03ce13f7SAndroid Build Coastguard Worker }
42*03ce13f7SAndroid Build Coastguard Worker 
generate()43*03ce13f7SAndroid Build Coastguard Worker void VertexRoutine::generate()
44*03ce13f7SAndroid Build Coastguard Worker {
45*03ce13f7SAndroid Build Coastguard Worker 	Pointer<Byte> cache = task + OFFSET(VertexTask, vertexCache);
46*03ce13f7SAndroid Build Coastguard Worker 	Pointer<Byte> vertexCache = cache + OFFSET(VertexCache, vertex);
47*03ce13f7SAndroid Build Coastguard Worker 	Pointer<UInt> tagCache = Pointer<UInt>(cache + OFFSET(VertexCache, tag));
48*03ce13f7SAndroid Build Coastguard Worker 
49*03ce13f7SAndroid Build Coastguard Worker 	UInt vertexCount = *Pointer<UInt>(task + OFFSET(VertexTask, vertexCount));
50*03ce13f7SAndroid Build Coastguard Worker 
51*03ce13f7SAndroid Build Coastguard Worker 	constants = device + OFFSET(vk::Device, constants);
52*03ce13f7SAndroid Build Coastguard Worker 
53*03ce13f7SAndroid Build Coastguard Worker 	// Check the cache one vertex index at a time. If a hit occurs, copy from the cache to the 'vertex' output buffer.
54*03ce13f7SAndroid Build Coastguard Worker 	// On a cache miss, process a SIMD width of consecutive indices from the input batch. They're written to the cache
55*03ce13f7SAndroid Build Coastguard Worker 	// in reverse order to guarantee that the first one doesn't get evicted and can be written out.
56*03ce13f7SAndroid Build Coastguard Worker 
57*03ce13f7SAndroid Build Coastguard Worker 	Do
58*03ce13f7SAndroid Build Coastguard Worker 	{
59*03ce13f7SAndroid Build Coastguard Worker 		UInt index = *batch;
60*03ce13f7SAndroid Build Coastguard Worker 		UInt cacheIndex = index & VertexCache::TAG_MASK;
61*03ce13f7SAndroid Build Coastguard Worker 
62*03ce13f7SAndroid Build Coastguard Worker 		If(tagCache[cacheIndex] != index)
63*03ce13f7SAndroid Build Coastguard Worker 		{
64*03ce13f7SAndroid Build Coastguard Worker 			readInput(batch);
65*03ce13f7SAndroid Build Coastguard Worker 			program(batch, vertexCount);
66*03ce13f7SAndroid Build Coastguard Worker 			computeClipFlags();
67*03ce13f7SAndroid Build Coastguard Worker 			computeCullMask();
68*03ce13f7SAndroid Build Coastguard Worker 
69*03ce13f7SAndroid Build Coastguard Worker 			writeCache(vertexCache, tagCache, batch);
70*03ce13f7SAndroid Build Coastguard Worker 		}
71*03ce13f7SAndroid Build Coastguard Worker 
72*03ce13f7SAndroid Build Coastguard Worker 		Pointer<Byte> cacheEntry = vertexCache + cacheIndex * UInt((int)sizeof(Vertex));
73*03ce13f7SAndroid Build Coastguard Worker 
74*03ce13f7SAndroid Build Coastguard Worker 		// For points, vertexCount is 1 per primitive, so duplicate vertex for all 3 vertices of the primitive
75*03ce13f7SAndroid Build Coastguard Worker 		for(int i = 0; i < (state.isPoint ? 3 : 1); i++)
76*03ce13f7SAndroid Build Coastguard Worker 		{
77*03ce13f7SAndroid Build Coastguard Worker 			writeVertex(vertex, cacheEntry);
78*03ce13f7SAndroid Build Coastguard Worker 			vertex += sizeof(Vertex);
79*03ce13f7SAndroid Build Coastguard Worker 		}
80*03ce13f7SAndroid Build Coastguard Worker 
81*03ce13f7SAndroid Build Coastguard Worker 		batch = Pointer<UInt>(Pointer<Byte>(batch) + sizeof(uint32_t));
82*03ce13f7SAndroid Build Coastguard Worker 		vertexCount--;
83*03ce13f7SAndroid Build Coastguard Worker 	}
84*03ce13f7SAndroid Build Coastguard Worker 	Until(vertexCount == 0);
85*03ce13f7SAndroid Build Coastguard Worker 
86*03ce13f7SAndroid Build Coastguard Worker 	Return();
87*03ce13f7SAndroid Build Coastguard Worker }
88*03ce13f7SAndroid Build Coastguard Worker 
readInput(Pointer<UInt> & batch)89*03ce13f7SAndroid Build Coastguard Worker void VertexRoutine::readInput(Pointer<UInt> &batch)
90*03ce13f7SAndroid Build Coastguard Worker {
91*03ce13f7SAndroid Build Coastguard Worker 	for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i += 4)
92*03ce13f7SAndroid Build Coastguard Worker 	{
93*03ce13f7SAndroid Build Coastguard Worker 		if(spirvShader->inputs[i + 0].Type != Spirv::ATTRIBTYPE_UNUSED ||
94*03ce13f7SAndroid Build Coastguard Worker 		   spirvShader->inputs[i + 1].Type != Spirv::ATTRIBTYPE_UNUSED ||
95*03ce13f7SAndroid Build Coastguard Worker 		   spirvShader->inputs[i + 2].Type != Spirv::ATTRIBTYPE_UNUSED ||
96*03ce13f7SAndroid Build Coastguard Worker 		   spirvShader->inputs[i + 3].Type != Spirv::ATTRIBTYPE_UNUSED)
97*03ce13f7SAndroid Build Coastguard Worker 		{
98*03ce13f7SAndroid Build Coastguard Worker 			Pointer<Byte> input = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, input) + sizeof(void *) * (i / 4));
99*03ce13f7SAndroid Build Coastguard Worker 			UInt stride = *Pointer<UInt>(data + OFFSET(DrawData, stride) + sizeof(uint32_t) * (i / 4));
100*03ce13f7SAndroid Build Coastguard Worker 			Int baseVertex = *Pointer<Int>(data + OFFSET(DrawData, baseVertex));
101*03ce13f7SAndroid Build Coastguard Worker 			UInt robustnessSize(0);
102*03ce13f7SAndroid Build Coastguard Worker 			if(state.robustBufferAccess)
103*03ce13f7SAndroid Build Coastguard Worker 			{
104*03ce13f7SAndroid Build Coastguard Worker 				robustnessSize = *Pointer<UInt>(data + OFFSET(DrawData, robustnessSize) + sizeof(uint32_t) * (i / 4));
105*03ce13f7SAndroid Build Coastguard Worker 			}
106*03ce13f7SAndroid Build Coastguard Worker 
107*03ce13f7SAndroid Build Coastguard Worker 			auto value = readStream(input, stride, state.input[i / 4], batch, state.robustBufferAccess, robustnessSize, baseVertex);
108*03ce13f7SAndroid Build Coastguard Worker 			routine.inputs[i + 0] = value.x;
109*03ce13f7SAndroid Build Coastguard Worker 			routine.inputs[i + 1] = value.y;
110*03ce13f7SAndroid Build Coastguard Worker 			routine.inputs[i + 2] = value.z;
111*03ce13f7SAndroid Build Coastguard Worker 			routine.inputs[i + 3] = value.w;
112*03ce13f7SAndroid Build Coastguard Worker 		}
113*03ce13f7SAndroid Build Coastguard Worker 	}
114*03ce13f7SAndroid Build Coastguard Worker }
115*03ce13f7SAndroid Build Coastguard Worker 
computeClipFlags()116*03ce13f7SAndroid Build Coastguard Worker void VertexRoutine::computeClipFlags()
117*03ce13f7SAndroid Build Coastguard Worker {
118*03ce13f7SAndroid Build Coastguard Worker 	auto it = spirvShader->outputBuiltins.find(spv::BuiltInPosition);
119*03ce13f7SAndroid Build Coastguard Worker 	if(it != spirvShader->outputBuiltins.end())
120*03ce13f7SAndroid Build Coastguard Worker 	{
121*03ce13f7SAndroid Build Coastguard Worker 		assert(it->second.SizeInComponents == 4);
122*03ce13f7SAndroid Build Coastguard Worker 		auto &pos = routine.getVariable(it->second.Id);
123*03ce13f7SAndroid Build Coastguard Worker 		auto posX = pos[it->second.FirstComponent + 0];
124*03ce13f7SAndroid Build Coastguard Worker 		auto posY = pos[it->second.FirstComponent + 1];
125*03ce13f7SAndroid Build Coastguard Worker 		auto posZ = pos[it->second.FirstComponent + 2];
126*03ce13f7SAndroid Build Coastguard Worker 		auto posW = pos[it->second.FirstComponent + 3];
127*03ce13f7SAndroid Build Coastguard Worker 
128*03ce13f7SAndroid Build Coastguard Worker 		SIMD::Int maxX = CmpLT(posW, posX);
129*03ce13f7SAndroid Build Coastguard Worker 		SIMD::Int maxY = CmpLT(posW, posY);
130*03ce13f7SAndroid Build Coastguard Worker 		SIMD::Int minX = CmpNLE(-posW, posX);
131*03ce13f7SAndroid Build Coastguard Worker 		SIMD::Int minY = CmpNLE(-posW, posY);
132*03ce13f7SAndroid Build Coastguard Worker 
133*03ce13f7SAndroid Build Coastguard Worker 		clipFlags = maxX & Clipper::CLIP_RIGHT;
134*03ce13f7SAndroid Build Coastguard Worker 		clipFlags |= maxY & Clipper::CLIP_TOP;
135*03ce13f7SAndroid Build Coastguard Worker 		clipFlags |= minX & Clipper::CLIP_LEFT;
136*03ce13f7SAndroid Build Coastguard Worker 		clipFlags |= minY & Clipper::CLIP_BOTTOM;
137*03ce13f7SAndroid Build Coastguard Worker 		if(state.depthClipEnable)
138*03ce13f7SAndroid Build Coastguard Worker 		{
139*03ce13f7SAndroid Build Coastguard Worker 			// If depthClipNegativeOneToOne is enabled, depth values are in [-1, 1] instead of [0, 1].
140*03ce13f7SAndroid Build Coastguard Worker 			SIMD::Int maxZ = CmpLT(posW, posZ);
141*03ce13f7SAndroid Build Coastguard Worker 			SIMD::Int minZ = CmpNLE(state.depthClipNegativeOneToOne ? -posW : 0.0f, posZ);
142*03ce13f7SAndroid Build Coastguard Worker 			clipFlags |= maxZ & Clipper::CLIP_FAR;
143*03ce13f7SAndroid Build Coastguard Worker 			clipFlags |= minZ & Clipper::CLIP_NEAR;
144*03ce13f7SAndroid Build Coastguard Worker 		}
145*03ce13f7SAndroid Build Coastguard Worker 
146*03ce13f7SAndroid Build Coastguard Worker 		SIMD::Float maxPos = As<SIMD::Float>(SIMD::Int(0x7F7FFFFF));
147*03ce13f7SAndroid Build Coastguard Worker 		SIMD::Int finiteX = CmpLE(Abs(posX), maxPos);
148*03ce13f7SAndroid Build Coastguard Worker 		SIMD::Int finiteY = CmpLE(Abs(posY), maxPos);
149*03ce13f7SAndroid Build Coastguard Worker 		SIMD::Int finiteZ = CmpLE(Abs(posZ), maxPos);
150*03ce13f7SAndroid Build Coastguard Worker 
151*03ce13f7SAndroid Build Coastguard Worker 		SIMD::Int finiteXYZ = finiteX & finiteY & finiteZ;
152*03ce13f7SAndroid Build Coastguard Worker 		clipFlags |= finiteXYZ & Clipper::CLIP_FINITE;
153*03ce13f7SAndroid Build Coastguard Worker 	}
154*03ce13f7SAndroid Build Coastguard Worker }
155*03ce13f7SAndroid Build Coastguard Worker 
computeCullMask()156*03ce13f7SAndroid Build Coastguard Worker void VertexRoutine::computeCullMask()
157*03ce13f7SAndroid Build Coastguard Worker {
158*03ce13f7SAndroid Build Coastguard Worker 	cullMask = Int(15);
159*03ce13f7SAndroid Build Coastguard Worker 
160*03ce13f7SAndroid Build Coastguard Worker 	auto it = spirvShader->outputBuiltins.find(spv::BuiltInCullDistance);
161*03ce13f7SAndroid Build Coastguard Worker 	if(it != spirvShader->outputBuiltins.end())
162*03ce13f7SAndroid Build Coastguard Worker 	{
163*03ce13f7SAndroid Build Coastguard Worker 		auto count = spirvShader->getNumOutputCullDistances();
164*03ce13f7SAndroid Build Coastguard Worker 		for(uint32_t i = 0; i < count; i++)
165*03ce13f7SAndroid Build Coastguard Worker 		{
166*03ce13f7SAndroid Build Coastguard Worker 			const auto &distance = routine.getVariable(it->second.Id)[it->second.FirstComponent + i];
167*03ce13f7SAndroid Build Coastguard Worker 			auto mask = SignMask(CmpGE(distance, SIMD::Float(0)));
168*03ce13f7SAndroid Build Coastguard Worker 			cullMask &= mask;
169*03ce13f7SAndroid Build Coastguard Worker 		}
170*03ce13f7SAndroid Build Coastguard Worker 	}
171*03ce13f7SAndroid Build Coastguard Worker }
172*03ce13f7SAndroid Build Coastguard Worker 
readStream(Pointer<Byte> & buffer,UInt & stride,const Stream & stream,Pointer<UInt> & batch,bool robustBufferAccess,UInt & robustnessSize,Int baseVertex)173*03ce13f7SAndroid Build Coastguard Worker Vector4f VertexRoutine::readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, Pointer<UInt> &batch,
174*03ce13f7SAndroid Build Coastguard Worker                                    bool robustBufferAccess, UInt &robustnessSize, Int baseVertex)
175*03ce13f7SAndroid Build Coastguard Worker {
176*03ce13f7SAndroid Build Coastguard Worker 	Vector4f v;
177*03ce13f7SAndroid Build Coastguard Worker 	// Because of the following rule in the Vulkan spec, we do not care if a very large negative
178*03ce13f7SAndroid Build Coastguard Worker 	// baseVertex would overflow all the way back into a valid region of the index buffer:
179*03ce13f7SAndroid Build Coastguard Worker 	// "Out-of-bounds buffer loads will return any of the following values :
180*03ce13f7SAndroid Build Coastguard Worker 	//  - Values from anywhere within the memory range(s) bound to the buffer (possibly including
181*03ce13f7SAndroid Build Coastguard Worker 	//    bytes of memory past the end of the buffer, up to the end of the bound range)."
182*03ce13f7SAndroid Build Coastguard Worker 	UInt4 offsets = (*Pointer<UInt4>(As<Pointer<UInt4>>(batch)) + As<UInt4>(Int4(baseVertex))) * UInt4(stride);
183*03ce13f7SAndroid Build Coastguard Worker 
184*03ce13f7SAndroid Build Coastguard Worker 	Pointer<Byte> source0 = buffer + offsets.x;
185*03ce13f7SAndroid Build Coastguard Worker 	Pointer<Byte> source1 = buffer + offsets.y;
186*03ce13f7SAndroid Build Coastguard Worker 	Pointer<Byte> source2 = buffer + offsets.z;
187*03ce13f7SAndroid Build Coastguard Worker 	Pointer<Byte> source3 = buffer + offsets.w;
188*03ce13f7SAndroid Build Coastguard Worker 
189*03ce13f7SAndroid Build Coastguard Worker 	vk::Format format(stream.format);
190*03ce13f7SAndroid Build Coastguard Worker 
191*03ce13f7SAndroid Build Coastguard Worker 	UInt4 zero(0);
192*03ce13f7SAndroid Build Coastguard Worker 	if(robustBufferAccess)
193*03ce13f7SAndroid Build Coastguard Worker 	{
194*03ce13f7SAndroid Build Coastguard Worker 		// Prevent integer overflow on the addition below.
195*03ce13f7SAndroid Build Coastguard Worker 		offsets = Min(offsets, UInt4(robustnessSize));
196*03ce13f7SAndroid Build Coastguard Worker 
197*03ce13f7SAndroid Build Coastguard Worker 		// "vertex input attributes are considered out of bounds if the offset of the attribute
198*03ce13f7SAndroid Build Coastguard Worker 		//  in the bound vertex buffer range plus the size of the attribute is greater than ..."
199*03ce13f7SAndroid Build Coastguard Worker 		UInt4 limits = offsets + UInt4(format.bytes());
200*03ce13f7SAndroid Build Coastguard Worker 
201*03ce13f7SAndroid Build Coastguard Worker 		Pointer<Byte> zeroSource = As<Pointer<Byte>>(&zero);
202*03ce13f7SAndroid Build Coastguard Worker 		// TODO(b/141124876): Optimize for wide-vector gather operations.
203*03ce13f7SAndroid Build Coastguard Worker 		source0 = IfThenElse(limits.x > robustnessSize, zeroSource, source0);
204*03ce13f7SAndroid Build Coastguard Worker 		source1 = IfThenElse(limits.y > robustnessSize, zeroSource, source1);
205*03ce13f7SAndroid Build Coastguard Worker 		source2 = IfThenElse(limits.z > robustnessSize, zeroSource, source2);
206*03ce13f7SAndroid Build Coastguard Worker 		source3 = IfThenElse(limits.w > robustnessSize, zeroSource, source3);
207*03ce13f7SAndroid Build Coastguard Worker 	}
208*03ce13f7SAndroid Build Coastguard Worker 
209*03ce13f7SAndroid Build Coastguard Worker 	int componentCount = format.componentCount();
210*03ce13f7SAndroid Build Coastguard Worker 	bool normalized = !format.isUnnormalizedInteger();
211*03ce13f7SAndroid Build Coastguard Worker 	bool isNativeFloatAttrib = (stream.attribType == Spirv::ATTRIBTYPE_FLOAT) || normalized;
212*03ce13f7SAndroid Build Coastguard Worker 	bool bgra = false;
213*03ce13f7SAndroid Build Coastguard Worker 
214*03ce13f7SAndroid Build Coastguard Worker 	switch(stream.format)
215*03ce13f7SAndroid Build Coastguard Worker 	{
216*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R32_SFLOAT:
217*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R32G32_SFLOAT:
218*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R32G32B32_SFLOAT:
219*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R32G32B32A32_SFLOAT:
220*03ce13f7SAndroid Build Coastguard Worker 		{
221*03ce13f7SAndroid Build Coastguard Worker 			if(componentCount == 0)
222*03ce13f7SAndroid Build Coastguard Worker 			{
223*03ce13f7SAndroid Build Coastguard Worker 				// Null stream, all default components
224*03ce13f7SAndroid Build Coastguard Worker 			}
225*03ce13f7SAndroid Build Coastguard Worker 			else
226*03ce13f7SAndroid Build Coastguard Worker 			{
227*03ce13f7SAndroid Build Coastguard Worker 				if(componentCount == 1)
228*03ce13f7SAndroid Build Coastguard Worker 				{
229*03ce13f7SAndroid Build Coastguard Worker 					v.x.x = *Pointer<Float>(source0);
230*03ce13f7SAndroid Build Coastguard Worker 					v.x.y = *Pointer<Float>(source1);
231*03ce13f7SAndroid Build Coastguard Worker 					v.x.z = *Pointer<Float>(source2);
232*03ce13f7SAndroid Build Coastguard Worker 					v.x.w = *Pointer<Float>(source3);
233*03ce13f7SAndroid Build Coastguard Worker 				}
234*03ce13f7SAndroid Build Coastguard Worker 				else
235*03ce13f7SAndroid Build Coastguard Worker 				{
236*03ce13f7SAndroid Build Coastguard Worker 					v.x = *Pointer<Float4>(source0);
237*03ce13f7SAndroid Build Coastguard Worker 					v.y = *Pointer<Float4>(source1);
238*03ce13f7SAndroid Build Coastguard Worker 					v.z = *Pointer<Float4>(source2);
239*03ce13f7SAndroid Build Coastguard Worker 					v.w = *Pointer<Float4>(source3);
240*03ce13f7SAndroid Build Coastguard Worker 
241*03ce13f7SAndroid Build Coastguard Worker 					transpose4xN(v.x, v.y, v.z, v.w, componentCount);
242*03ce13f7SAndroid Build Coastguard Worker 				}
243*03ce13f7SAndroid Build Coastguard Worker 			}
244*03ce13f7SAndroid Build Coastguard Worker 		}
245*03ce13f7SAndroid Build Coastguard Worker 		break;
246*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_B8G8R8A8_UNORM:
247*03ce13f7SAndroid Build Coastguard Worker 		bgra = true;
248*03ce13f7SAndroid Build Coastguard Worker 		// [[fallthrough]]
249*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R8_UNORM:
250*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R8G8_UNORM:
251*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R8G8B8A8_UNORM:
252*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
253*03ce13f7SAndroid Build Coastguard Worker 		v.x = Float4(*Pointer<Byte4>(source0));
254*03ce13f7SAndroid Build Coastguard Worker 		v.y = Float4(*Pointer<Byte4>(source1));
255*03ce13f7SAndroid Build Coastguard Worker 		v.z = Float4(*Pointer<Byte4>(source2));
256*03ce13f7SAndroid Build Coastguard Worker 		v.w = Float4(*Pointer<Byte4>(source3));
257*03ce13f7SAndroid Build Coastguard Worker 
258*03ce13f7SAndroid Build Coastguard Worker 		transpose4xN(v.x, v.y, v.z, v.w, componentCount);
259*03ce13f7SAndroid Build Coastguard Worker 
260*03ce13f7SAndroid Build Coastguard Worker 		if(componentCount >= 1) v.x *= (1.0f / 0xFF);
261*03ce13f7SAndroid Build Coastguard Worker 		if(componentCount >= 2) v.y *= (1.0f / 0xFF);
262*03ce13f7SAndroid Build Coastguard Worker 		if(componentCount >= 3) v.z *= (1.0f / 0xFF);
263*03ce13f7SAndroid Build Coastguard Worker 		if(componentCount >= 4) v.w *= (1.0f / 0xFF);
264*03ce13f7SAndroid Build Coastguard Worker 		break;
265*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R8_UINT:
266*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R8G8_UINT:
267*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R8G8B8A8_UINT:
268*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_A8B8G8R8_UINT_PACK32:
269*03ce13f7SAndroid Build Coastguard Worker 		v.x = As<Float4>(Int4(*Pointer<Byte4>(source0)));
270*03ce13f7SAndroid Build Coastguard Worker 		v.y = As<Float4>(Int4(*Pointer<Byte4>(source1)));
271*03ce13f7SAndroid Build Coastguard Worker 		v.z = As<Float4>(Int4(*Pointer<Byte4>(source2)));
272*03ce13f7SAndroid Build Coastguard Worker 		v.w = As<Float4>(Int4(*Pointer<Byte4>(source3)));
273*03ce13f7SAndroid Build Coastguard Worker 
274*03ce13f7SAndroid Build Coastguard Worker 		transpose4xN(v.x, v.y, v.z, v.w, componentCount);
275*03ce13f7SAndroid Build Coastguard Worker 		break;
276*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R8_SNORM:
277*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R8G8_SNORM:
278*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R8G8B8A8_SNORM:
279*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
280*03ce13f7SAndroid Build Coastguard Worker 		v.x = Float4(*Pointer<SByte4>(source0));
281*03ce13f7SAndroid Build Coastguard Worker 		v.y = Float4(*Pointer<SByte4>(source1));
282*03ce13f7SAndroid Build Coastguard Worker 		v.z = Float4(*Pointer<SByte4>(source2));
283*03ce13f7SAndroid Build Coastguard Worker 		v.w = Float4(*Pointer<SByte4>(source3));
284*03ce13f7SAndroid Build Coastguard Worker 
285*03ce13f7SAndroid Build Coastguard Worker 		transpose4xN(v.x, v.y, v.z, v.w, componentCount);
286*03ce13f7SAndroid Build Coastguard Worker 
287*03ce13f7SAndroid Build Coastguard Worker 		if(componentCount >= 1) v.x = Max(v.x * (1.0f / 0x7F), Float4(-1.0f));
288*03ce13f7SAndroid Build Coastguard Worker 		if(componentCount >= 2) v.y = Max(v.y * (1.0f / 0x7F), Float4(-1.0f));
289*03ce13f7SAndroid Build Coastguard Worker 		if(componentCount >= 3) v.z = Max(v.z * (1.0f / 0x7F), Float4(-1.0f));
290*03ce13f7SAndroid Build Coastguard Worker 		if(componentCount >= 4) v.w = Max(v.w * (1.0f / 0x7F), Float4(-1.0f));
291*03ce13f7SAndroid Build Coastguard Worker 		break;
292*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R8_USCALED:
293*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R8G8_USCALED:
294*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R8G8B8A8_USCALED:
295*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_A8B8G8R8_USCALED_PACK32:
296*03ce13f7SAndroid Build Coastguard Worker 		v.x = Float4(*Pointer<Byte4>(source0));
297*03ce13f7SAndroid Build Coastguard Worker 		v.y = Float4(*Pointer<Byte4>(source1));
298*03ce13f7SAndroid Build Coastguard Worker 		v.z = Float4(*Pointer<Byte4>(source2));
299*03ce13f7SAndroid Build Coastguard Worker 		v.w = Float4(*Pointer<Byte4>(source3));
300*03ce13f7SAndroid Build Coastguard Worker 
301*03ce13f7SAndroid Build Coastguard Worker 		transpose4xN(v.x, v.y, v.z, v.w, componentCount);
302*03ce13f7SAndroid Build Coastguard Worker 		break;
303*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R8_SSCALED:
304*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R8G8_SSCALED:
305*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R8G8B8A8_SSCALED:
306*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_A8B8G8R8_SSCALED_PACK32:
307*03ce13f7SAndroid Build Coastguard Worker 		v.x = Float4(*Pointer<SByte4>(source0));
308*03ce13f7SAndroid Build Coastguard Worker 		v.y = Float4(*Pointer<SByte4>(source1));
309*03ce13f7SAndroid Build Coastguard Worker 		v.z = Float4(*Pointer<SByte4>(source2));
310*03ce13f7SAndroid Build Coastguard Worker 		v.w = Float4(*Pointer<SByte4>(source3));
311*03ce13f7SAndroid Build Coastguard Worker 
312*03ce13f7SAndroid Build Coastguard Worker 		transpose4xN(v.x, v.y, v.z, v.w, componentCount);
313*03ce13f7SAndroid Build Coastguard Worker 		break;
314*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R8_SINT:
315*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R8G8_SINT:
316*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R8G8B8A8_SINT:
317*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_A8B8G8R8_SINT_PACK32:
318*03ce13f7SAndroid Build Coastguard Worker 		v.x = As<Float4>(Int4(*Pointer<SByte4>(source0)));
319*03ce13f7SAndroid Build Coastguard Worker 		v.y = As<Float4>(Int4(*Pointer<SByte4>(source1)));
320*03ce13f7SAndroid Build Coastguard Worker 		v.z = As<Float4>(Int4(*Pointer<SByte4>(source2)));
321*03ce13f7SAndroid Build Coastguard Worker 		v.w = As<Float4>(Int4(*Pointer<SByte4>(source3)));
322*03ce13f7SAndroid Build Coastguard Worker 
323*03ce13f7SAndroid Build Coastguard Worker 		transpose4xN(v.x, v.y, v.z, v.w, componentCount);
324*03ce13f7SAndroid Build Coastguard Worker 		break;
325*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R16_UNORM:
326*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R16G16_UNORM:
327*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R16G16B16A16_UNORM:
328*03ce13f7SAndroid Build Coastguard Worker 		v.x = Float4(*Pointer<UShort4>(source0));
329*03ce13f7SAndroid Build Coastguard Worker 		v.y = Float4(*Pointer<UShort4>(source1));
330*03ce13f7SAndroid Build Coastguard Worker 		v.z = Float4(*Pointer<UShort4>(source2));
331*03ce13f7SAndroid Build Coastguard Worker 		v.w = Float4(*Pointer<UShort4>(source3));
332*03ce13f7SAndroid Build Coastguard Worker 
333*03ce13f7SAndroid Build Coastguard Worker 		transpose4xN(v.x, v.y, v.z, v.w, componentCount);
334*03ce13f7SAndroid Build Coastguard Worker 
335*03ce13f7SAndroid Build Coastguard Worker 		if(componentCount >= 1) v.x *= (1.0f / 0xFFFF);
336*03ce13f7SAndroid Build Coastguard Worker 		if(componentCount >= 2) v.y *= (1.0f / 0xFFFF);
337*03ce13f7SAndroid Build Coastguard Worker 		if(componentCount >= 3) v.z *= (1.0f / 0xFFFF);
338*03ce13f7SAndroid Build Coastguard Worker 		if(componentCount >= 4) v.w *= (1.0f / 0xFFFF);
339*03ce13f7SAndroid Build Coastguard Worker 		break;
340*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R16_SNORM:
341*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R16G16_SNORM:
342*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R16G16B16A16_SNORM:
343*03ce13f7SAndroid Build Coastguard Worker 		v.x = Float4(*Pointer<Short4>(source0));
344*03ce13f7SAndroid Build Coastguard Worker 		v.y = Float4(*Pointer<Short4>(source1));
345*03ce13f7SAndroid Build Coastguard Worker 		v.z = Float4(*Pointer<Short4>(source2));
346*03ce13f7SAndroid Build Coastguard Worker 		v.w = Float4(*Pointer<Short4>(source3));
347*03ce13f7SAndroid Build Coastguard Worker 
348*03ce13f7SAndroid Build Coastguard Worker 		transpose4xN(v.x, v.y, v.z, v.w, componentCount);
349*03ce13f7SAndroid Build Coastguard Worker 
350*03ce13f7SAndroid Build Coastguard Worker 		if(componentCount >= 1) v.x = Max(v.x * (1.0f / 0x7FFF), Float4(-1.0f));
351*03ce13f7SAndroid Build Coastguard Worker 		if(componentCount >= 2) v.y = Max(v.y * (1.0f / 0x7FFF), Float4(-1.0f));
352*03ce13f7SAndroid Build Coastguard Worker 		if(componentCount >= 3) v.z = Max(v.z * (1.0f / 0x7FFF), Float4(-1.0f));
353*03ce13f7SAndroid Build Coastguard Worker 		if(componentCount >= 4) v.w = Max(v.w * (1.0f / 0x7FFF), Float4(-1.0f));
354*03ce13f7SAndroid Build Coastguard Worker 		break;
355*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R16_USCALED:
356*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R16G16_USCALED:
357*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R16G16B16A16_USCALED:
358*03ce13f7SAndroid Build Coastguard Worker 		v.x = Float4(*Pointer<UShort4>(source0));
359*03ce13f7SAndroid Build Coastguard Worker 		v.y = Float4(*Pointer<UShort4>(source1));
360*03ce13f7SAndroid Build Coastguard Worker 		v.z = Float4(*Pointer<UShort4>(source2));
361*03ce13f7SAndroid Build Coastguard Worker 		v.w = Float4(*Pointer<UShort4>(source3));
362*03ce13f7SAndroid Build Coastguard Worker 
363*03ce13f7SAndroid Build Coastguard Worker 		transpose4xN(v.x, v.y, v.z, v.w, componentCount);
364*03ce13f7SAndroid Build Coastguard Worker 		break;
365*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R16_SSCALED:
366*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R16G16_SSCALED:
367*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R16G16B16A16_SSCALED:
368*03ce13f7SAndroid Build Coastguard Worker 		v.x = Float4(*Pointer<Short4>(source0));
369*03ce13f7SAndroid Build Coastguard Worker 		v.y = Float4(*Pointer<Short4>(source1));
370*03ce13f7SAndroid Build Coastguard Worker 		v.z = Float4(*Pointer<Short4>(source2));
371*03ce13f7SAndroid Build Coastguard Worker 		v.w = Float4(*Pointer<Short4>(source3));
372*03ce13f7SAndroid Build Coastguard Worker 
373*03ce13f7SAndroid Build Coastguard Worker 		transpose4xN(v.x, v.y, v.z, v.w, componentCount);
374*03ce13f7SAndroid Build Coastguard Worker 		break;
375*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R16_SINT:
376*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R16G16_SINT:
377*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R16G16B16A16_SINT:
378*03ce13f7SAndroid Build Coastguard Worker 		v.x = As<Float4>(Int4(*Pointer<Short4>(source0)));
379*03ce13f7SAndroid Build Coastguard Worker 		v.y = As<Float4>(Int4(*Pointer<Short4>(source1)));
380*03ce13f7SAndroid Build Coastguard Worker 		v.z = As<Float4>(Int4(*Pointer<Short4>(source2)));
381*03ce13f7SAndroid Build Coastguard Worker 		v.w = As<Float4>(Int4(*Pointer<Short4>(source3)));
382*03ce13f7SAndroid Build Coastguard Worker 
383*03ce13f7SAndroid Build Coastguard Worker 		transpose4xN(v.x, v.y, v.z, v.w, componentCount);
384*03ce13f7SAndroid Build Coastguard Worker 		break;
385*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R16_UINT:
386*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R16G16_UINT:
387*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R16G16B16A16_UINT:
388*03ce13f7SAndroid Build Coastguard Worker 		v.x = As<Float4>(Int4(*Pointer<UShort4>(source0)));
389*03ce13f7SAndroid Build Coastguard Worker 		v.y = As<Float4>(Int4(*Pointer<UShort4>(source1)));
390*03ce13f7SAndroid Build Coastguard Worker 		v.z = As<Float4>(Int4(*Pointer<UShort4>(source2)));
391*03ce13f7SAndroid Build Coastguard Worker 		v.w = As<Float4>(Int4(*Pointer<UShort4>(source3)));
392*03ce13f7SAndroid Build Coastguard Worker 
393*03ce13f7SAndroid Build Coastguard Worker 		transpose4xN(v.x, v.y, v.z, v.w, componentCount);
394*03ce13f7SAndroid Build Coastguard Worker 		break;
395*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R32_SINT:
396*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R32G32_SINT:
397*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R32G32B32_SINT:
398*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R32G32B32A32_SINT:
399*03ce13f7SAndroid Build Coastguard Worker 		v.x = *Pointer<Float4>(source0);
400*03ce13f7SAndroid Build Coastguard Worker 		v.y = *Pointer<Float4>(source1);
401*03ce13f7SAndroid Build Coastguard Worker 		v.z = *Pointer<Float4>(source2);
402*03ce13f7SAndroid Build Coastguard Worker 		v.w = *Pointer<Float4>(source3);
403*03ce13f7SAndroid Build Coastguard Worker 
404*03ce13f7SAndroid Build Coastguard Worker 		transpose4xN(v.x, v.y, v.z, v.w, componentCount);
405*03ce13f7SAndroid Build Coastguard Worker 		break;
406*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R32_UINT:
407*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R32G32_UINT:
408*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R32G32B32_UINT:
409*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R32G32B32A32_UINT:
410*03ce13f7SAndroid Build Coastguard Worker 		v.x = *Pointer<Float4>(source0);
411*03ce13f7SAndroid Build Coastguard Worker 		v.y = *Pointer<Float4>(source1);
412*03ce13f7SAndroid Build Coastguard Worker 		v.z = *Pointer<Float4>(source2);
413*03ce13f7SAndroid Build Coastguard Worker 		v.w = *Pointer<Float4>(source3);
414*03ce13f7SAndroid Build Coastguard Worker 
415*03ce13f7SAndroid Build Coastguard Worker 		transpose4xN(v.x, v.y, v.z, v.w, componentCount);
416*03ce13f7SAndroid Build Coastguard Worker 		break;
417*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R16_SFLOAT:
418*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R16G16_SFLOAT:
419*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_R16G16B16A16_SFLOAT:
420*03ce13f7SAndroid Build Coastguard Worker 		{
421*03ce13f7SAndroid Build Coastguard Worker 			if(componentCount >= 1)
422*03ce13f7SAndroid Build Coastguard Worker 			{
423*03ce13f7SAndroid Build Coastguard Worker 				UShort x0 = *Pointer<UShort>(source0 + 0);
424*03ce13f7SAndroid Build Coastguard Worker 				UShort x1 = *Pointer<UShort>(source1 + 0);
425*03ce13f7SAndroid Build Coastguard Worker 				UShort x2 = *Pointer<UShort>(source2 + 0);
426*03ce13f7SAndroid Build Coastguard Worker 				UShort x3 = *Pointer<UShort>(source3 + 0);
427*03ce13f7SAndroid Build Coastguard Worker 
428*03ce13f7SAndroid Build Coastguard Worker 				v.x.x = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(x0) * 4);
429*03ce13f7SAndroid Build Coastguard Worker 				v.x.y = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(x1) * 4);
430*03ce13f7SAndroid Build Coastguard Worker 				v.x.z = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(x2) * 4);
431*03ce13f7SAndroid Build Coastguard Worker 				v.x.w = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(x3) * 4);
432*03ce13f7SAndroid Build Coastguard Worker 			}
433*03ce13f7SAndroid Build Coastguard Worker 
434*03ce13f7SAndroid Build Coastguard Worker 			if(componentCount >= 2)
435*03ce13f7SAndroid Build Coastguard Worker 			{
436*03ce13f7SAndroid Build Coastguard Worker 				UShort y0 = *Pointer<UShort>(source0 + 2);
437*03ce13f7SAndroid Build Coastguard Worker 				UShort y1 = *Pointer<UShort>(source1 + 2);
438*03ce13f7SAndroid Build Coastguard Worker 				UShort y2 = *Pointer<UShort>(source2 + 2);
439*03ce13f7SAndroid Build Coastguard Worker 				UShort y3 = *Pointer<UShort>(source3 + 2);
440*03ce13f7SAndroid Build Coastguard Worker 
441*03ce13f7SAndroid Build Coastguard Worker 				v.y.x = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(y0) * 4);
442*03ce13f7SAndroid Build Coastguard Worker 				v.y.y = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(y1) * 4);
443*03ce13f7SAndroid Build Coastguard Worker 				v.y.z = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(y2) * 4);
444*03ce13f7SAndroid Build Coastguard Worker 				v.y.w = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(y3) * 4);
445*03ce13f7SAndroid Build Coastguard Worker 			}
446*03ce13f7SAndroid Build Coastguard Worker 
447*03ce13f7SAndroid Build Coastguard Worker 			if(componentCount >= 3)
448*03ce13f7SAndroid Build Coastguard Worker 			{
449*03ce13f7SAndroid Build Coastguard Worker 				UShort z0 = *Pointer<UShort>(source0 + 4);
450*03ce13f7SAndroid Build Coastguard Worker 				UShort z1 = *Pointer<UShort>(source1 + 4);
451*03ce13f7SAndroid Build Coastguard Worker 				UShort z2 = *Pointer<UShort>(source2 + 4);
452*03ce13f7SAndroid Build Coastguard Worker 				UShort z3 = *Pointer<UShort>(source3 + 4);
453*03ce13f7SAndroid Build Coastguard Worker 
454*03ce13f7SAndroid Build Coastguard Worker 				v.z.x = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(z0) * 4);
455*03ce13f7SAndroid Build Coastguard Worker 				v.z.y = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(z1) * 4);
456*03ce13f7SAndroid Build Coastguard Worker 				v.z.z = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(z2) * 4);
457*03ce13f7SAndroid Build Coastguard Worker 				v.z.w = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(z3) * 4);
458*03ce13f7SAndroid Build Coastguard Worker 			}
459*03ce13f7SAndroid Build Coastguard Worker 
460*03ce13f7SAndroid Build Coastguard Worker 			if(componentCount >= 4)
461*03ce13f7SAndroid Build Coastguard Worker 			{
462*03ce13f7SAndroid Build Coastguard Worker 				UShort w0 = *Pointer<UShort>(source0 + 6);
463*03ce13f7SAndroid Build Coastguard Worker 				UShort w1 = *Pointer<UShort>(source1 + 6);
464*03ce13f7SAndroid Build Coastguard Worker 				UShort w2 = *Pointer<UShort>(source2 + 6);
465*03ce13f7SAndroid Build Coastguard Worker 				UShort w3 = *Pointer<UShort>(source3 + 6);
466*03ce13f7SAndroid Build Coastguard Worker 
467*03ce13f7SAndroid Build Coastguard Worker 				v.w.x = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(w0) * 4);
468*03ce13f7SAndroid Build Coastguard Worker 				v.w.y = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(w1) * 4);
469*03ce13f7SAndroid Build Coastguard Worker 				v.w.z = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(w2) * 4);
470*03ce13f7SAndroid Build Coastguard Worker 				v.w.w = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(w3) * 4);
471*03ce13f7SAndroid Build Coastguard Worker 			}
472*03ce13f7SAndroid Build Coastguard Worker 		}
473*03ce13f7SAndroid Build Coastguard Worker 		break;
474*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
475*03ce13f7SAndroid Build Coastguard Worker 		bgra = true;
476*03ce13f7SAndroid Build Coastguard Worker 		// [[fallthrough]]
477*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
478*03ce13f7SAndroid Build Coastguard Worker 		{
479*03ce13f7SAndroid Build Coastguard Worker 			Int4 src;
480*03ce13f7SAndroid Build Coastguard Worker 			src = Insert(src, *Pointer<Int>(source0), 0);
481*03ce13f7SAndroid Build Coastguard Worker 			src = Insert(src, *Pointer<Int>(source1), 1);
482*03ce13f7SAndroid Build Coastguard Worker 			src = Insert(src, *Pointer<Int>(source2), 2);
483*03ce13f7SAndroid Build Coastguard Worker 			src = Insert(src, *Pointer<Int>(source3), 3);
484*03ce13f7SAndroid Build Coastguard Worker 			v.x = Float4((src << 22) >> 22);
485*03ce13f7SAndroid Build Coastguard Worker 			v.y = Float4((src << 12) >> 22);
486*03ce13f7SAndroid Build Coastguard Worker 			v.z = Float4((src << 02) >> 22);
487*03ce13f7SAndroid Build Coastguard Worker 			v.w = Float4(src >> 30);
488*03ce13f7SAndroid Build Coastguard Worker 
489*03ce13f7SAndroid Build Coastguard Worker 			v.x = Max(v.x * Float4(1.0f / 0x1FF), Float4(-1.0f));
490*03ce13f7SAndroid Build Coastguard Worker 			v.y = Max(v.y * Float4(1.0f / 0x1FF), Float4(-1.0f));
491*03ce13f7SAndroid Build Coastguard Worker 			v.z = Max(v.z * Float4(1.0f / 0x1FF), Float4(-1.0f));
492*03ce13f7SAndroid Build Coastguard Worker 			v.w = Max(v.w, Float4(-1.0f));
493*03ce13f7SAndroid Build Coastguard Worker 		}
494*03ce13f7SAndroid Build Coastguard Worker 		break;
495*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_A2R10G10B10_SINT_PACK32:
496*03ce13f7SAndroid Build Coastguard Worker 		bgra = true;
497*03ce13f7SAndroid Build Coastguard Worker 		// [[fallthrough]]
498*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_A2B10G10R10_SINT_PACK32:
499*03ce13f7SAndroid Build Coastguard Worker 		{
500*03ce13f7SAndroid Build Coastguard Worker 			Int4 src;
501*03ce13f7SAndroid Build Coastguard Worker 			src = Insert(src, *Pointer<Int>(source0), 0);
502*03ce13f7SAndroid Build Coastguard Worker 			src = Insert(src, *Pointer<Int>(source1), 1);
503*03ce13f7SAndroid Build Coastguard Worker 			src = Insert(src, *Pointer<Int>(source2), 2);
504*03ce13f7SAndroid Build Coastguard Worker 			src = Insert(src, *Pointer<Int>(source3), 3);
505*03ce13f7SAndroid Build Coastguard Worker 			v.x = As<Float4>((src << 22) >> 22);
506*03ce13f7SAndroid Build Coastguard Worker 			v.y = As<Float4>((src << 12) >> 22);
507*03ce13f7SAndroid Build Coastguard Worker 			v.z = As<Float4>((src << 02) >> 22);
508*03ce13f7SAndroid Build Coastguard Worker 			v.w = As<Float4>(src >> 30);
509*03ce13f7SAndroid Build Coastguard Worker 		}
510*03ce13f7SAndroid Build Coastguard Worker 		break;
511*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
512*03ce13f7SAndroid Build Coastguard Worker 		bgra = true;
513*03ce13f7SAndroid Build Coastguard Worker 		// [[fallthrough]]
514*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
515*03ce13f7SAndroid Build Coastguard Worker 		{
516*03ce13f7SAndroid Build Coastguard Worker 			Int4 src;
517*03ce13f7SAndroid Build Coastguard Worker 			src = Insert(src, *Pointer<Int>(source0), 0);
518*03ce13f7SAndroid Build Coastguard Worker 			src = Insert(src, *Pointer<Int>(source1), 1);
519*03ce13f7SAndroid Build Coastguard Worker 			src = Insert(src, *Pointer<Int>(source2), 2);
520*03ce13f7SAndroid Build Coastguard Worker 			src = Insert(src, *Pointer<Int>(source3), 3);
521*03ce13f7SAndroid Build Coastguard Worker 
522*03ce13f7SAndroid Build Coastguard Worker 			v.x = Float4(src & Int4(0x3FF));
523*03ce13f7SAndroid Build Coastguard Worker 			v.y = Float4((src >> 10) & Int4(0x3FF));
524*03ce13f7SAndroid Build Coastguard Worker 			v.z = Float4((src >> 20) & Int4(0x3FF));
525*03ce13f7SAndroid Build Coastguard Worker 			v.w = Float4((src >> 30) & Int4(0x3));
526*03ce13f7SAndroid Build Coastguard Worker 
527*03ce13f7SAndroid Build Coastguard Worker 			v.x *= Float4(1.0f / 0x3FF);
528*03ce13f7SAndroid Build Coastguard Worker 			v.y *= Float4(1.0f / 0x3FF);
529*03ce13f7SAndroid Build Coastguard Worker 			v.z *= Float4(1.0f / 0x3FF);
530*03ce13f7SAndroid Build Coastguard Worker 			v.w *= Float4(1.0f / 0x3);
531*03ce13f7SAndroid Build Coastguard Worker 		}
532*03ce13f7SAndroid Build Coastguard Worker 		break;
533*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_A2R10G10B10_UINT_PACK32:
534*03ce13f7SAndroid Build Coastguard Worker 		bgra = true;
535*03ce13f7SAndroid Build Coastguard Worker 		// [[fallthrough]]
536*03ce13f7SAndroid Build Coastguard Worker 	case VK_FORMAT_A2B10G10R10_UINT_PACK32:
537*03ce13f7SAndroid Build Coastguard Worker 		{
538*03ce13f7SAndroid Build Coastguard Worker 			Int4 src;
539*03ce13f7SAndroid Build Coastguard Worker 			src = Insert(src, *Pointer<Int>(source0), 0);
540*03ce13f7SAndroid Build Coastguard Worker 			src = Insert(src, *Pointer<Int>(source1), 1);
541*03ce13f7SAndroid Build Coastguard Worker 			src = Insert(src, *Pointer<Int>(source2), 2);
542*03ce13f7SAndroid Build Coastguard Worker 			src = Insert(src, *Pointer<Int>(source3), 3);
543*03ce13f7SAndroid Build Coastguard Worker 
544*03ce13f7SAndroid Build Coastguard Worker 			v.x = As<Float4>(src & Int4(0x3FF));
545*03ce13f7SAndroid Build Coastguard Worker 			v.y = As<Float4>((src >> 10) & Int4(0x3FF));
546*03ce13f7SAndroid Build Coastguard Worker 			v.z = As<Float4>((src >> 20) & Int4(0x3FF));
547*03ce13f7SAndroid Build Coastguard Worker 			v.w = As<Float4>((src >> 30) & Int4(0x3));
548*03ce13f7SAndroid Build Coastguard Worker 		}
549*03ce13f7SAndroid Build Coastguard Worker 		break;
550*03ce13f7SAndroid Build Coastguard Worker 	default:
551*03ce13f7SAndroid Build Coastguard Worker 		UNSUPPORTED("stream.format %d", int(stream.format));
552*03ce13f7SAndroid Build Coastguard Worker 	}
553*03ce13f7SAndroid Build Coastguard Worker 
554*03ce13f7SAndroid Build Coastguard Worker 	if(bgra)
555*03ce13f7SAndroid Build Coastguard Worker 	{
556*03ce13f7SAndroid Build Coastguard Worker 		// Swap red and blue
557*03ce13f7SAndroid Build Coastguard Worker 		Float4 t = v.x;
558*03ce13f7SAndroid Build Coastguard Worker 		v.x = v.z;
559*03ce13f7SAndroid Build Coastguard Worker 		v.z = t;
560*03ce13f7SAndroid Build Coastguard Worker 	}
561*03ce13f7SAndroid Build Coastguard Worker 
562*03ce13f7SAndroid Build Coastguard Worker 	if(componentCount < 1) v.x = Float4(0.0f);
563*03ce13f7SAndroid Build Coastguard Worker 	if(componentCount < 2) v.y = Float4(0.0f);
564*03ce13f7SAndroid Build Coastguard Worker 	if(componentCount < 3) v.z = Float4(0.0f);
565*03ce13f7SAndroid Build Coastguard Worker 	if(componentCount < 4) v.w = isNativeFloatAttrib ? As<Float4>(Float4(1.0f)) : As<Float4>(Int4(1));
566*03ce13f7SAndroid Build Coastguard Worker 
567*03ce13f7SAndroid Build Coastguard Worker 	return v;
568*03ce13f7SAndroid Build Coastguard Worker }
569*03ce13f7SAndroid Build Coastguard Worker 
writeCache(Pointer<Byte> & vertexCache,Pointer<UInt> & tagCache,Pointer<UInt> & batch)570*03ce13f7SAndroid Build Coastguard Worker void VertexRoutine::writeCache(Pointer<Byte> &vertexCache, Pointer<UInt> &tagCache, Pointer<UInt> &batch)
571*03ce13f7SAndroid Build Coastguard Worker {
572*03ce13f7SAndroid Build Coastguard Worker 	ASSERT(SIMD::Width == 4);
573*03ce13f7SAndroid Build Coastguard Worker 
574*03ce13f7SAndroid Build Coastguard Worker 	UInt index0 = batch[0];
575*03ce13f7SAndroid Build Coastguard Worker 	UInt index1 = batch[1];
576*03ce13f7SAndroid Build Coastguard Worker 	UInt index2 = batch[2];
577*03ce13f7SAndroid Build Coastguard Worker 	UInt index3 = batch[3];
578*03ce13f7SAndroid Build Coastguard Worker 
579*03ce13f7SAndroid Build Coastguard Worker 	UInt cacheIndex0 = index0 & VertexCache::TAG_MASK;
580*03ce13f7SAndroid Build Coastguard Worker 	UInt cacheIndex1 = index1 & VertexCache::TAG_MASK;
581*03ce13f7SAndroid Build Coastguard Worker 	UInt cacheIndex2 = index2 & VertexCache::TAG_MASK;
582*03ce13f7SAndroid Build Coastguard Worker 	UInt cacheIndex3 = index3 & VertexCache::TAG_MASK;
583*03ce13f7SAndroid Build Coastguard Worker 
584*03ce13f7SAndroid Build Coastguard Worker 	// We processed a SIMD group of vertices, with the first one being the one that missed the cache tag check.
585*03ce13f7SAndroid Build Coastguard Worker 	// Write them out in reverse order here and below to ensure the first one is now guaranteed to be in the cache.
586*03ce13f7SAndroid Build Coastguard Worker 	tagCache[cacheIndex3] = index3;
587*03ce13f7SAndroid Build Coastguard Worker 	tagCache[cacheIndex2] = index2;
588*03ce13f7SAndroid Build Coastguard Worker 	tagCache[cacheIndex1] = index1;
589*03ce13f7SAndroid Build Coastguard Worker 	tagCache[cacheIndex0] = index0;
590*03ce13f7SAndroid Build Coastguard Worker 
591*03ce13f7SAndroid Build Coastguard Worker 	auto it = spirvShader->outputBuiltins.find(spv::BuiltInPosition);
592*03ce13f7SAndroid Build Coastguard Worker 	if(it != spirvShader->outputBuiltins.end())
593*03ce13f7SAndroid Build Coastguard Worker 	{
594*03ce13f7SAndroid Build Coastguard Worker 		assert(it->second.SizeInComponents == 4);
595*03ce13f7SAndroid Build Coastguard Worker 		auto &position = routine.getVariable(it->second.Id);
596*03ce13f7SAndroid Build Coastguard Worker 
597*03ce13f7SAndroid Build Coastguard Worker 		SIMD::Float4 pos;
598*03ce13f7SAndroid Build Coastguard Worker 		pos.x = position[it->second.FirstComponent + 0];
599*03ce13f7SAndroid Build Coastguard Worker 		pos.y = position[it->second.FirstComponent + 1];
600*03ce13f7SAndroid Build Coastguard Worker 		pos.z = position[it->second.FirstComponent + 2];
601*03ce13f7SAndroid Build Coastguard Worker 		pos.w = position[it->second.FirstComponent + 3];
602*03ce13f7SAndroid Build Coastguard Worker 
603*03ce13f7SAndroid Build Coastguard Worker 		// Projection and viewport transform.
604*03ce13f7SAndroid Build Coastguard Worker 		SIMD::Float w = As<SIMD::Float>(As<SIMD::Int>(pos.w) | (As<SIMD::Int>(CmpEQ(pos.w, 0.0f)) & As<SIMD::Int>(SIMD::Float(1.0f))));
605*03ce13f7SAndroid Build Coastguard Worker 		SIMD::Float rhw = 1.0f / w;
606*03ce13f7SAndroid Build Coastguard Worker 
607*03ce13f7SAndroid Build Coastguard Worker 		SIMD::Float4 proj;
608*03ce13f7SAndroid Build Coastguard Worker 		proj.x = As<Float4>(RoundIntClamped(SIMD::Float(*Pointer<Float>(data + OFFSET(DrawData, X0xF))) + pos.x * rhw * SIMD::Float(*Pointer<Float>(data + OFFSET(DrawData, WxF)))));
609*03ce13f7SAndroid Build Coastguard Worker 		proj.y = As<Float4>(RoundIntClamped(SIMD::Float(*Pointer<Float>(data + OFFSET(DrawData, Y0xF))) + pos.y * rhw * SIMD::Float(*Pointer<Float>(data + OFFSET(DrawData, HxF)))));
610*03ce13f7SAndroid Build Coastguard Worker 		proj.z = pos.z * rhw;
611*03ce13f7SAndroid Build Coastguard Worker 		proj.w = rhw;
612*03ce13f7SAndroid Build Coastguard Worker 
613*03ce13f7SAndroid Build Coastguard Worker 		Float4 pos_x = Extract128(pos.x, 0);
614*03ce13f7SAndroid Build Coastguard Worker 		Float4 pos_y = Extract128(pos.y, 0);
615*03ce13f7SAndroid Build Coastguard Worker 		Float4 pos_z = Extract128(pos.z, 0);
616*03ce13f7SAndroid Build Coastguard Worker 		Float4 pos_w = Extract128(pos.w, 0);
617*03ce13f7SAndroid Build Coastguard Worker 		transpose4x4(pos_x, pos_y, pos_z, pos_w);
618*03ce13f7SAndroid Build Coastguard Worker 
619*03ce13f7SAndroid Build Coastguard Worker 		*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, position), 16) = pos_w;
620*03ce13f7SAndroid Build Coastguard Worker 		*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, position), 16) = pos_z;
621*03ce13f7SAndroid Build Coastguard Worker 		*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, position), 16) = pos_y;
622*03ce13f7SAndroid Build Coastguard Worker 		*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, position), 16) = pos_x;
623*03ce13f7SAndroid Build Coastguard Worker 
624*03ce13f7SAndroid Build Coastguard Worker 		*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, clipFlags)) = Extract(clipFlags, 3);
625*03ce13f7SAndroid Build Coastguard Worker 		*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, clipFlags)) = Extract(clipFlags, 2);
626*03ce13f7SAndroid Build Coastguard Worker 		*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, clipFlags)) = Extract(clipFlags, 1);
627*03ce13f7SAndroid Build Coastguard Worker 		*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, clipFlags)) = Extract(clipFlags, 0);
628*03ce13f7SAndroid Build Coastguard Worker 
629*03ce13f7SAndroid Build Coastguard Worker 		Float4 proj_x = Extract128(proj.x, 0);
630*03ce13f7SAndroid Build Coastguard Worker 		Float4 proj_y = Extract128(proj.y, 0);
631*03ce13f7SAndroid Build Coastguard Worker 		Float4 proj_z = Extract128(proj.z, 0);
632*03ce13f7SAndroid Build Coastguard Worker 		Float4 proj_w = Extract128(proj.w, 0);
633*03ce13f7SAndroid Build Coastguard Worker 		transpose4x4(proj_x, proj_y, proj_z, proj_w);
634*03ce13f7SAndroid Build Coastguard Worker 
635*03ce13f7SAndroid Build Coastguard Worker 		*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, projected), 16) = proj_w;
636*03ce13f7SAndroid Build Coastguard Worker 		*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, projected), 16) = proj_z;
637*03ce13f7SAndroid Build Coastguard Worker 		*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, projected), 16) = proj_y;
638*03ce13f7SAndroid Build Coastguard Worker 		*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, projected), 16) = proj_x;
639*03ce13f7SAndroid Build Coastguard Worker 	}
640*03ce13f7SAndroid Build Coastguard Worker 
641*03ce13f7SAndroid Build Coastguard Worker 	it = spirvShader->outputBuiltins.find(spv::BuiltInPointSize);
642*03ce13f7SAndroid Build Coastguard Worker 	if(it != spirvShader->outputBuiltins.end())
643*03ce13f7SAndroid Build Coastguard Worker 	{
644*03ce13f7SAndroid Build Coastguard Worker 		ASSERT(it->second.SizeInComponents == 1);
645*03ce13f7SAndroid Build Coastguard Worker 		auto psize = routine.getVariable(it->second.Id)[it->second.FirstComponent];
646*03ce13f7SAndroid Build Coastguard Worker 
647*03ce13f7SAndroid Build Coastguard Worker 		*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, pointSize)) = Extract(psize, 3);
648*03ce13f7SAndroid Build Coastguard Worker 		*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, pointSize)) = Extract(psize, 2);
649*03ce13f7SAndroid Build Coastguard Worker 		*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, pointSize)) = Extract(psize, 1);
650*03ce13f7SAndroid Build Coastguard Worker 		*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, pointSize)) = Extract(psize, 0);
651*03ce13f7SAndroid Build Coastguard Worker 	}
652*03ce13f7SAndroid Build Coastguard Worker 
653*03ce13f7SAndroid Build Coastguard Worker 	it = spirvShader->outputBuiltins.find(spv::BuiltInClipDistance);
654*03ce13f7SAndroid Build Coastguard Worker 	if(it != spirvShader->outputBuiltins.end())
655*03ce13f7SAndroid Build Coastguard Worker 	{
656*03ce13f7SAndroid Build Coastguard Worker 		auto count = spirvShader->getNumOutputClipDistances();
657*03ce13f7SAndroid Build Coastguard Worker 		for(unsigned int i = 0; i < count; i++)
658*03ce13f7SAndroid Build Coastguard Worker 		{
659*03ce13f7SAndroid Build Coastguard Worker 			auto dist = routine.getVariable(it->second.Id)[it->second.FirstComponent + i];
660*03ce13f7SAndroid Build Coastguard Worker 			*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, clipDistance[i])) = Extract(dist, 3);
661*03ce13f7SAndroid Build Coastguard Worker 			*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, clipDistance[i])) = Extract(dist, 2);
662*03ce13f7SAndroid Build Coastguard Worker 			*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, clipDistance[i])) = Extract(dist, 1);
663*03ce13f7SAndroid Build Coastguard Worker 			*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, clipDistance[i])) = Extract(dist, 0);
664*03ce13f7SAndroid Build Coastguard Worker 		}
665*03ce13f7SAndroid Build Coastguard Worker 	}
666*03ce13f7SAndroid Build Coastguard Worker 
667*03ce13f7SAndroid Build Coastguard Worker 	it = spirvShader->outputBuiltins.find(spv::BuiltInCullDistance);
668*03ce13f7SAndroid Build Coastguard Worker 	if(it != spirvShader->outputBuiltins.end())
669*03ce13f7SAndroid Build Coastguard Worker 	{
670*03ce13f7SAndroid Build Coastguard Worker 		auto count = spirvShader->getNumOutputCullDistances();
671*03ce13f7SAndroid Build Coastguard Worker 		for(unsigned int i = 0; i < count; i++)
672*03ce13f7SAndroid Build Coastguard Worker 		{
673*03ce13f7SAndroid Build Coastguard Worker 			auto dist = routine.getVariable(it->second.Id)[it->second.FirstComponent + i];
674*03ce13f7SAndroid Build Coastguard Worker 			*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, cullDistance[i])) = Extract(dist, 3);
675*03ce13f7SAndroid Build Coastguard Worker 			*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, cullDistance[i])) = Extract(dist, 2);
676*03ce13f7SAndroid Build Coastguard Worker 			*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, cullDistance[i])) = Extract(dist, 1);
677*03ce13f7SAndroid Build Coastguard Worker 			*Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, cullDistance[i])) = Extract(dist, 0);
678*03ce13f7SAndroid Build Coastguard Worker 		}
679*03ce13f7SAndroid Build Coastguard Worker 	}
680*03ce13f7SAndroid Build Coastguard Worker 
681*03ce13f7SAndroid Build Coastguard Worker 	*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, cullMask)) = -((cullMask >> 3) & 1);
682*03ce13f7SAndroid Build Coastguard Worker 	*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, cullMask)) = -((cullMask >> 2) & 1);
683*03ce13f7SAndroid Build Coastguard Worker 	*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, cullMask)) = -((cullMask >> 1) & 1);
684*03ce13f7SAndroid Build Coastguard Worker 	*Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, cullMask)) = -((cullMask >> 0) & 1);
685*03ce13f7SAndroid Build Coastguard Worker 
686*03ce13f7SAndroid Build Coastguard Worker 	for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i += 4)
687*03ce13f7SAndroid Build Coastguard Worker 	{
688*03ce13f7SAndroid Build Coastguard Worker 		if(spirvShader->outputs[i + 0].Type != Spirv::ATTRIBTYPE_UNUSED ||
689*03ce13f7SAndroid Build Coastguard Worker 		   spirvShader->outputs[i + 1].Type != Spirv::ATTRIBTYPE_UNUSED ||
690*03ce13f7SAndroid Build Coastguard Worker 		   spirvShader->outputs[i + 2].Type != Spirv::ATTRIBTYPE_UNUSED ||
691*03ce13f7SAndroid Build Coastguard Worker 		   spirvShader->outputs[i + 3].Type != Spirv::ATTRIBTYPE_UNUSED)
692*03ce13f7SAndroid Build Coastguard Worker 		{
693*03ce13f7SAndroid Build Coastguard Worker 			Vector4f v;
694*03ce13f7SAndroid Build Coastguard Worker 			v.x = Extract128(routine.outputs[i + 0], 0);
695*03ce13f7SAndroid Build Coastguard Worker 			v.y = Extract128(routine.outputs[i + 1], 0);
696*03ce13f7SAndroid Build Coastguard Worker 			v.z = Extract128(routine.outputs[i + 2], 0);
697*03ce13f7SAndroid Build Coastguard Worker 			v.w = Extract128(routine.outputs[i + 3], 0);
698*03ce13f7SAndroid Build Coastguard Worker 
699*03ce13f7SAndroid Build Coastguard Worker 			transpose4x4(v.x, v.y, v.z, v.w);
700*03ce13f7SAndroid Build Coastguard Worker 
701*03ce13f7SAndroid Build Coastguard Worker 			*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex, v[i]), 16) = v.w;
702*03ce13f7SAndroid Build Coastguard Worker 			*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex, v[i]), 16) = v.z;
703*03ce13f7SAndroid Build Coastguard Worker 			*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex, v[i]), 16) = v.y;
704*03ce13f7SAndroid Build Coastguard Worker 			*Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex, v[i]), 16) = v.x;
705*03ce13f7SAndroid Build Coastguard Worker 		}
706*03ce13f7SAndroid Build Coastguard Worker 	}
707*03ce13f7SAndroid Build Coastguard Worker }
708*03ce13f7SAndroid Build Coastguard Worker 
writeVertex(const Pointer<Byte> & vertex,Pointer<Byte> & cacheEntry)709*03ce13f7SAndroid Build Coastguard Worker void VertexRoutine::writeVertex(const Pointer<Byte> &vertex, Pointer<Byte> &cacheEntry)
710*03ce13f7SAndroid Build Coastguard Worker {
711*03ce13f7SAndroid Build Coastguard Worker 	*Pointer<Int4>(vertex + OFFSET(Vertex, position)) = *Pointer<Int4>(cacheEntry + OFFSET(Vertex, position));
712*03ce13f7SAndroid Build Coastguard Worker 	*Pointer<Int>(vertex + OFFSET(Vertex, pointSize)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex, pointSize));
713*03ce13f7SAndroid Build Coastguard Worker 
714*03ce13f7SAndroid Build Coastguard Worker 	*Pointer<Int>(vertex + OFFSET(Vertex, clipFlags)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex, clipFlags));
715*03ce13f7SAndroid Build Coastguard Worker 	*Pointer<Int>(vertex + OFFSET(Vertex, cullMask)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex, cullMask));
716*03ce13f7SAndroid Build Coastguard Worker 	*Pointer<Int4>(vertex + OFFSET(Vertex, projected)) = *Pointer<Int4>(cacheEntry + OFFSET(Vertex, projected));
717*03ce13f7SAndroid Build Coastguard Worker 
718*03ce13f7SAndroid Build Coastguard Worker 	for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i++)
719*03ce13f7SAndroid Build Coastguard Worker 	{
720*03ce13f7SAndroid Build Coastguard Worker 		if(spirvShader->outputs[i].Type != Spirv::ATTRIBTYPE_UNUSED)
721*03ce13f7SAndroid Build Coastguard Worker 		{
722*03ce13f7SAndroid Build Coastguard Worker 			*Pointer<Int>(vertex + OFFSET(Vertex, v[i]), 4) = *Pointer<Int>(cacheEntry + OFFSET(Vertex, v[i]), 4);
723*03ce13f7SAndroid Build Coastguard Worker 		}
724*03ce13f7SAndroid Build Coastguard Worker 	}
725*03ce13f7SAndroid Build Coastguard Worker 	for(unsigned int i = 0; i < spirvShader->getNumOutputClipDistances(); i++)
726*03ce13f7SAndroid Build Coastguard Worker 	{
727*03ce13f7SAndroid Build Coastguard Worker 		*Pointer<Float>(vertex + OFFSET(Vertex, clipDistance[i]), 4) = *Pointer<Float>(cacheEntry + OFFSET(Vertex, clipDistance[i]), 4);
728*03ce13f7SAndroid Build Coastguard Worker 	}
729*03ce13f7SAndroid Build Coastguard Worker 	for(unsigned int i = 0; i < spirvShader->getNumOutputCullDistances(); i++)
730*03ce13f7SAndroid Build Coastguard Worker 	{
731*03ce13f7SAndroid Build Coastguard Worker 		*Pointer<Float>(vertex + OFFSET(Vertex, cullDistance[i]), 4) = *Pointer<Float>(cacheEntry + OFFSET(Vertex, cullDistance[i]), 4);
732*03ce13f7SAndroid Build Coastguard Worker 	}
733*03ce13f7SAndroid Build Coastguard Worker }
734*03ce13f7SAndroid Build Coastguard Worker 
735*03ce13f7SAndroid Build Coastguard Worker }  // namespace sw
736