xref: /aosp_15_r20/external/angle/src/libANGLE/renderer/metal/shaders/mtl_internal_shaders_src_autogen.h (revision 8975f5c5ed3d1c378011245431ada316dfb6f244)
1 // GENERATED FILE - DO NOT EDIT.
2 // Generated by gen_mtl_internal_shaders.py
3 //
4 // Copyright 2020 The ANGLE Project Authors. All rights reserved.
5 // Use of this source code is governed by a BSD-style license that can be
6 // found in the LICENSE file.
7 //
8 
9 // C++ string version of combined Metal default shaders.
10 
11 static char gDefaultMetallibSrc[] = R"(
12 # 1 "temp_master_source.metal"
13 # 1 "<built-in>" 1
14 # 1 "<built-in>" 3
15 # 435 "<built-in>" 3
16 # 1 "<command line>" 1
17 # 1 "<built-in>" 2
18 # 1 "temp_master_source.metal" 2
19 # 1 ".\\blit.metal" 1
20 
21 
22 
23 
24 
25 
26 
27 # 1 ".\\common.h" 1
28 # 13 ".\\common.h"
29 # include <simd/simd.h>
30 # include <metal_stdlib>
31 
32 
33 # 1 ".\\constants.h" 1
34 # 11 ".\\constants.h"
35 namespace rx
36 {
37 namespace mtl_shader
38 {
39 
40 enum
41 {
42     kTextureType2D = 0,
43     kTextureType2DMultisample = 1,
44     kTextureType2DArray = 2,
45     kTextureTypeCube = 3,
46     kTextureType3D = 4,
47     kTextureTypeCount = 5,
48 };
49 
50 
51 
52 
53 }
54 }
55 # 18 ".\\common.h" 2
56 
57 
58 
59 
60 
61 
62 
63 using namespace metal;
64 
65 
66 
67 constant uint32_t kNumColorOutputs [[function_constant(0)]];
68 constant bool kColorOutputAvailable0 = kNumColorOutputs > 0;
69 constant bool kColorOutputAvailable1 = kNumColorOutputs > 1;
70 constant bool kColorOutputAvailable2 = kNumColorOutputs > 2;
71 constant bool kColorOutputAvailable3 = kNumColorOutputs > 3;
72 constant bool kColorOutputAvailable4 = kNumColorOutputs > 4;
73 constant bool kColorOutputAvailable5 = kNumColorOutputs > 5;
74 constant bool kColorOutputAvailable6 = kNumColorOutputs > 6;
75 constant bool kColorOutputAvailable7 = kNumColorOutputs > 7;
76 
77 namespace rx
78 {
79 namespace mtl_shader
80 {
81 
82 
83 constant float2 gCorners[3] = {float2(-1.0f, -1.0f), float2(3.0f, -1.0f), float2(-1.0f, 3.0f)};
84 
85 template <typename T>
86 struct MultipleColorOutputs
87 {
88     vec<T, 4> color0 [[color(0), function_constant(kColorOutputAvailable0)]];
89     vec<T, 4> color1 [[color(1), function_constant(kColorOutputAvailable1)]];
90     vec<T, 4> color2 [[color(2), function_constant(kColorOutputAvailable2)]];
91     vec<T, 4> color3 [[color(3), function_constant(kColorOutputAvailable3)]];
92     vec<T, 4> color4 [[color(4), function_constant(kColorOutputAvailable4)]];
93     vec<T, 4> color5 [[color(5), function_constant(kColorOutputAvailable5)]];
94     vec<T, 4> color6 [[color(6), function_constant(kColorOutputAvailable6)]];
95     vec<T, 4> color7 [[color(7), function_constant(kColorOutputAvailable7)]];
96 };
97 # 69 ".\\common.h"
98 template <typename T>
99 static inline MultipleColorOutputs<T> toMultipleColorOutputs(vec<T, 4> color)
100 {
101     MultipleColorOutputs<T> re;
102 
103     do { if (kColorOutputAvailable0) { re.color0 = color; } } while (0);
104     do { if (kColorOutputAvailable1) { re.color1 = color; } } while (0);
105     do { if (kColorOutputAvailable2) { re.color2 = color; } } while (0);
106     do { if (kColorOutputAvailable3) { re.color3 = color; } } while (0);
107     do { if (kColorOutputAvailable4) { re.color4 = color; } } while (0);
108     do { if (kColorOutputAvailable5) { re.color5 = color; } } while (0);
109     do { if (kColorOutputAvailable6) { re.color6 = color; } } while (0);
110     do { if (kColorOutputAvailable7) { re.color7 = color; } } while (0);
111 
112     return re;
113 }
114 
115 static inline float3 cubeTexcoords(float2 texcoords, int face)
116 {
117     texcoords = 2.0 * texcoords - 1.0;
118     switch (face)
119     {
120         case 0:
121             return float3(1.0, -texcoords.y, -texcoords.x);
122         case 1:
123             return float3(-1.0, -texcoords.y, texcoords.x);
124         case 2:
125             return float3(texcoords.x, 1.0, texcoords.y);
126         case 3:
127             return float3(texcoords.x, -1.0, -texcoords.y);
128         case 4:
129             return float3(texcoords.x, -texcoords.y, 1.0);
130         case 5:
131             return float3(-texcoords.x, -texcoords.y, -1.0);
132     }
133     return float3(texcoords, 0);
134 }
135 
136 template <typename T>
137 static inline vec<T, 4> resolveTextureMS(texture2d_ms<T> srcTexture, uint2 coords)
138 {
139     uint samples = srcTexture.get_num_samples();
140 
141     vec<T, 4> output(0);
142 
143     for (uint sample = 0; sample < samples; ++sample)
144     {
145         output += srcTexture.read(coords, sample);
146     }
147 
148     output = output / samples;
149 
150     return output;
151 }
152 
153 static inline float4 sRGBtoLinear(float4 color)
154 {
155     float3 linear1 = color.rgb / 12.92;
156     float3 linear2 = powr((color.rgb + float3(0.055)) / 1.055, 2.4);
157     float3 factor = float3(color.rgb <= float3(0.04045));
158     float4 linear = float4(factor * linear1 + float3(1.0 - factor) * linear2, color.a);
159 
160     return linear;
161 }
162 
163 static inline float linearToSRGB(float color)
164 {
165     if (color <= 0.0f)
166         return 0.0f;
167     if (color < 0.0031308f)
168         return 12.92f * color;
169     if (color < 1.0f)
170         return 1.055f * powr(color, 0.41666f) - 0.055f;
171     return 1.0f;
172 }
173 
174 static inline float4 linearToSRGB(float4 color)
175 {
176     return float4(linearToSRGB(color.r), linearToSRGB(color.g), linearToSRGB(color.b), color.a);
177 }
178 
179 template <typename Short>
180 static inline Short bytesToShort(constant uchar *input, uint offset)
181 {
182     Short inputLo = input[offset];
183     Short inputHi = input[offset + 1];
184 
185     return inputLo | (inputHi << 8);
186 }
187 
188 template <typename Int>
189 static inline Int bytesToInt(constant uchar *input, uint offset)
190 {
191     Int input0 = input[offset];
192     Int input1 = input[offset + 1];
193     Int input2 = input[offset + 2];
194     Int input3 = input[offset + 3];
195 
196     return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24);
197 }
198 
199 template <typename Short>
200 static inline void shortToBytes(Short val, uint offset, device uchar *output)
201 {
202     ushort valUnsigned = as_type<ushort>(val);
203     output[offset] = valUnsigned & 0xff;
204     output[offset + 1] = (valUnsigned >> 8) & 0xff;
205 }
206 
207 template <typename Int>
208 static inline void intToBytes(Int val, uint offset, device uchar *output)
209 {
210     uint valUnsigned = as_type<uint>(val);
211     output[offset] = valUnsigned & 0xff;
212     output[offset + 1] = (valUnsigned >> 8) & 0xff;
213     output[offset + 2] = (valUnsigned >> 16) & 0xff;
214     output[offset + 3] = (valUnsigned >> 24) & 0xff;
215 }
216 
217 static inline void floatToBytes(float val, uint offset, device uchar *output)
218 {
219     intToBytes(as_type<uint>(val), offset, output);
220 }
221 
222 static inline void int24bitToBytes(uint val, uint offset, device uchar *output)
223 {
224     output[offset] = val & 0xff;
225     output[offset + 1] = (val >> 8) & 0xff;
226     output[offset + 2] = (val >> 16) & 0xff;
227 }
228 
229 template <unsigned int inputBitCount, unsigned int inputBitStart, typename T>
230 static inline T getShiftedData(T input)
231 {
232     static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8),
233                   "T must have at least as many bits as inputBitCount + inputBitStart.");
234     const T mask = (1 << inputBitCount) - 1;
235     return (input >> inputBitStart) & mask;
236 }
237 
238 template <unsigned int inputBitCount, unsigned int inputBitStart, typename T>
239 static inline T shiftData(T input)
240 {
241     static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8),
242                   "T must have at least as many bits as inputBitCount + inputBitStart.");
243     const T mask = (1 << inputBitCount) - 1;
244     return (input & mask) << inputBitStart;
245 }
246 
247 template <unsigned int inputBitCount, typename T>
248 static inline float normalizedToFloat(T input)
249 {
250     static_assert(inputBitCount <= (sizeof(T) * 8),
251                   "T must have more bits than or same bits as inputBitCount.");
252     static_assert(inputBitCount <= 23, "Only single precision is supported");
253 
254     constexpr float inverseMax = 1.0f / ((1 << inputBitCount) - 1);
255     return input * inverseMax;
256 }
257 
258 template <typename T>
259 static inline float normalizedToFloat(T input)
260 {
261     return normalizedToFloat<sizeof(T) * 8, T>(input);
262 }
263 
264 template <>
265 inline float normalizedToFloat(short input)
266 {
267     constexpr float inverseMax = 1.0f / 0x7fff;
268     return static_cast<float>(input) * inverseMax;
269 }
270 
271 template <>
272 inline float normalizedToFloat(int input)
273 {
274     constexpr float inverseMax = 1.0f / 0x7fffffff;
275     return static_cast<float>(input) * inverseMax;
276 }
277 
278 template <>
279 inline float normalizedToFloat(uint input)
280 {
281     constexpr float inverseMax = 1.0f / 0xffffffff;
282     return static_cast<float>(input) * inverseMax;
283 }
284 
285 template <unsigned int outputBitCount, typename T>
286 static inline T floatToNormalized(float input)
287 {
288     static_assert(outputBitCount <= (sizeof(T) * 8),
289                   "T must have more bits than or same bits as inputBitCount.");
290     static_assert(outputBitCount > (metal::is_unsigned<T>::value ? 0 : 1),
291                   "outputBitCount must be at least 1 not counting the sign bit.");
292     constexpr unsigned int bits =
293         metal::is_unsigned<T>::value ? outputBitCount : outputBitCount - 1;
294     static_assert(bits <= 23, "Only single precision is supported");
295 
296     return static_cast<T>(metal::round(((1 << bits) - 1) * input));
297 }
298 
299 template <typename T>
300 static inline T floatToNormalized(float input)
301 {
302     return floatToNormalized<sizeof(T) * 8, T>(input);
303 }
304 
305 }
306 }
307 # 9 ".\\blit.metal" 2
308 
309 using namespace rx::mtl_shader;
310 
311 
312 constant bool kPremultiplyAlpha [[function_constant(1)]];
313 constant bool kUnmultiplyAlpha [[function_constant(2)]];
314 constant bool kTransformLinearToSrgb [[function_constant(3)]];
315 constant int kSourceTextureType [[function_constant(4)]];
316 constant int kSourceTexture2Type [[function_constant(5)]];
317 
318 constant bool kSourceTextureType2D = kSourceTextureType == kTextureType2D;
319 constant bool kSourceTextureType2DArray = kSourceTextureType == kTextureType2DArray;
320 constant bool kSourceTextureType2DMS = kSourceTextureType == kTextureType2DMultisample;
321 constant bool kSourceTextureTypeCube = kSourceTextureType == kTextureTypeCube;
322 constant bool kSourceTextureType3D = kSourceTextureType == kTextureType3D;
323 
324 constant bool kSourceTexture2Type2D = kSourceTexture2Type == kTextureType2D;
325 constant bool kSourceTexture2Type2DArray = kSourceTexture2Type == kTextureType2DArray;
326 constant bool kSourceTexture2Type2DMS = kSourceTexture2Type == kTextureType2DMultisample;
327 constant bool kSourceTexture2TypeCube = kSourceTexture2Type == kTextureTypeCube;
328 
329 struct BlitParams
330 {
331 
332     float4 srcTexCoords;
333     int srcLevel;
334     int srcLayer;
335     bool dstLuminance;
336     uint8_t padding[7];
337 };
338 
339 struct BlitVSOut
340 {
341     float4 position [[position]];
342     float2 texCoords [[center_no_perspective, user(locn1)]];
343 };
344 
345 vertex BlitVSOut blitVS(unsigned int vid [[vertex_id]], constant BlitParams &options [[buffer(0)]])
346 {
347     BlitVSOut output;
348     output.position.xy = select(float2(-1.0f), float2(1.0f), bool2(vid & uint2(2, 1)));
349     output.position.zw = float2(0.0, 1.0);
350     output.texCoords = select(options.srcTexCoords.xy, options.srcTexCoords.zw, bool2(vid & uint2(2, 1)));
351 
352     return output;
353 }
354 
355 template <typename SrcTexture2d>
356 static uint2 getImageCoords(SrcTexture2d srcTexture, float2 texCoords)
357 {
358     uint2 dimens(srcTexture.get_width(), srcTexture.get_height());
359     uint2 coords = uint2(texCoords * float2(dimens));
360 
361     return coords;
362 }
363 
364 template <typename T>
365 static inline vec<T, 4> blitSampleTextureMS(texture2d_ms<T> srcTexture, float2 texCoords)
366 {
367     uint2 coords = getImageCoords(srcTexture, texCoords);
368     return resolveTextureMS(srcTexture, coords);
369 }
370 
371 template <typename T>
372 static inline vec<T, 4> blitSampleTexture3D(texture3d<T> srcTexture,
373                                             sampler textureSampler,
374                                             float2 texCoords,
375                                             constant BlitParams &options)
376 {
377     uint depth = srcTexture.get_depth(options.srcLevel);
378     float zCoord = (float(options.srcLayer) + 0.5) / float(depth);
379 
380     return srcTexture.sample(textureSampler, float3(texCoords, zCoord), level(options.srcLevel));
381 }
382 # 101 ".\\blit.metal"
383 template <typename T>
384 static inline vec<T, 4> blitReadTexture(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
385 {
386     vec<T, 4> output;
387 
388     switch (kSourceTextureType)
389     {
390         case kTextureType2D:
391             output = srcTexture2d.sample(textureSampler, input.texCoords, level(options.srcLevel));
392             break;
393         case kTextureType2DArray:
394             output = srcTexture2dArray.sample(textureSampler, input.texCoords, options.srcLayer,
395                                               level(options.srcLevel));
396             break;
397         case kTextureType2DMultisample:
398             output = blitSampleTextureMS(srcTexture2dMS, input.texCoords);
399             break;
400         case kTextureTypeCube:
401             output = srcTextureCube.sample(textureSampler,
402                                            cubeTexcoords(input.texCoords, options.srcLayer),
403                                            level(options.srcLevel));
404             break;
405         case kTextureType3D:
406             output = blitSampleTexture3D(srcTexture3d, textureSampler, input.texCoords, options);
407             break;
408     }
409 
410     if (kTransformLinearToSrgb) {
411         output.x = linearToSRGB(output.x);
412         output.y = linearToSRGB(output.y);
413         output.z = linearToSRGB(output.z);
414     }
415     if (kUnmultiplyAlpha)
416     {
417         if (output.a != 0.0)
418         {
419             output.xyz /= output.a;
420         }
421     }
422     if (kPremultiplyAlpha)
423     {
424         output.xyz *= output.a;
425     }
426 
427     if (options.dstLuminance)
428     {
429         output.g = output.b = output.r;
430     }
431 
432     return output;
433 }
434 
435 template <typename T>
436 static inline MultipleColorOutputs<T> blitFS(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
437 {
438     vec<T, 4> output = blitReadTexture(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
439 
440     return toMultipleColorOutputs(output);
441 }
442 
443 fragment MultipleColorOutputs<float> blitFloatFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
444 {
445     return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
446 }
447 fragment MultipleColorOutputs<int> blitIntFS(BlitVSOut input [[stage_in]], texture2d<int> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<int> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<int> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<int> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<int> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
448 {
449     return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
450 }
451 fragment MultipleColorOutputs<uint> blitUIntFS(BlitVSOut input [[stage_in]], texture2d<uint> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<uint> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<uint> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<uint> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<uint> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
452 {
453     return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
454 }
455 
456 fragment MultipleColorOutputs<uint> copyTextureFloatToUIntFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
457 {
458     float4 inputColor = blitReadTexture<>(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
459     uint4 output = uint4(inputColor * float4(255.0));
460 
461     return toMultipleColorOutputs(output);
462 }
463 
464 
465 struct FragmentDepthOut
466 {
467     float depth [[depth(any)]];
468 };
469 
470 static inline float sampleDepth(
471     texture2d<float> srcTexture2d [[function_constant(kSourceTextureType2D)]],
472     texture2d_array<float> srcTexture2dArray [[function_constant(kSourceTextureType2DArray)]],
473     texture2d_ms<float> srcTexture2dMS [[function_constant(kSourceTextureType2DMS)]],
474     texturecube<float> srcTextureCube [[function_constant(kSourceTextureTypeCube)]],
475     float2 texCoords,
476     constant BlitParams &options)
477 {
478     float4 output;
479 
480     constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest);
481 
482     switch (kSourceTextureType)
483     {
484         case kTextureType2D:
485             output = srcTexture2d.sample(textureSampler, texCoords, level(options.srcLevel));
486             break;
487         case kTextureType2DArray:
488             output = srcTexture2dArray.sample(textureSampler, texCoords, options.srcLayer,
489                                               level(options.srcLevel));
490             break;
491         case kTextureType2DMultisample:
492 
493             output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0);
494             break;
495         case kTextureTypeCube:
496             output =
497                 srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, options.srcLayer),
498                                       level(options.srcLevel));
499             break;
500     }
501 
502     return output.r;
503 }
504 
505 fragment FragmentDepthOut blitDepthFS(BlitVSOut input [[stage_in]],
506                                       texture2d<float> srcTexture2d
507                                       [[texture(0), function_constant(kSourceTextureType2D)]],
508                                       texture2d_array<float> srcTexture2dArray
509                                       [[texture(0), function_constant(kSourceTextureType2DArray)]],
510                                       texture2d_ms<float> srcTexture2dMS
511                                       [[texture(0), function_constant(kSourceTextureType2DMS)]],
512                                       texturecube<float> srcTextureCube
513                                       [[texture(0), function_constant(kSourceTextureTypeCube)]],
514                                       constant BlitParams &options [[buffer(0)]])
515 {
516     FragmentDepthOut re;
517 
518     re.depth = sampleDepth(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube,
519                            input.texCoords, options);
520 
521     return re;
522 }
523 
524 static inline uint32_t sampleStencil(
525     texture2d<uint32_t> srcTexture2d [[function_constant(kSourceTexture2Type2D)]],
526     texture2d_array<uint32_t> srcTexture2dArray [[function_constant(kSourceTexture2Type2DArray)]],
527     texture2d_ms<uint32_t> srcTexture2dMS [[function_constant(kSourceTexture2Type2DMS)]],
528     texturecube<uint32_t> srcTextureCube [[function_constant(kSourceTexture2TypeCube)]],
529     float2 texCoords,
530     int srcLevel,
531     int srcLayer)
532 {
533     uint4 output;
534     constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest);
535 
536     switch (kSourceTexture2Type)
537     {
538         case kTextureType2D:
539             output = srcTexture2d.sample(textureSampler, texCoords, level(srcLevel));
540             break;
541         case kTextureType2DArray:
542             output = srcTexture2dArray.sample(textureSampler, texCoords, srcLayer, level(srcLevel));
543             break;
544         case kTextureType2DMultisample:
545 
546             output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0);
547             break;
548         case kTextureTypeCube:
549             output = srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, srcLayer),
550                                            level(srcLevel));
551             break;
552     }
553 
554     return output.r;
555 }
556 
557 
558 struct BlitStencilToBufferParams
559 {
560     float2 srcStartTexCoords;
561     float2 srcTexCoordSteps;
562     int srcLevel;
563     int srcLayer;
564 
565     uint2 dstSize;
566     uint dstBufferRowPitch;
567 
568     bool resolveMS;
569 };
570 
571 kernel void blitStencilToBufferCS(ushort2 gIndices [[thread_position_in_grid]],
572                                   texture2d<uint32_t> srcTexture2d
573                                   [[texture(1), function_constant(kSourceTexture2Type2D)]],
574                                   texture2d_array<uint32_t> srcTexture2dArray
575                                   [[texture(1), function_constant(kSourceTexture2Type2DArray)]],
576                                   texture2d_ms<uint32_t> srcTexture2dMS
577                                   [[texture(1), function_constant(kSourceTexture2Type2DMS)]],
578                                   texturecube<uint32_t> srcTextureCube
579                                   [[texture(1), function_constant(kSourceTexture2TypeCube)]],
580                                   constant BlitStencilToBufferParams &options [[buffer(0)]],
581                                   device uchar *buffer [[buffer(1)]])
582 {
583     if (gIndices.x >= options.dstSize.x || gIndices.y >= options.dstSize.y)
584     {
585         return;
586     }
587 
588     float2 srcTexCoords = options.srcStartTexCoords + float2(gIndices) * options.srcTexCoordSteps;
589 
590     if (kSourceTexture2Type == kTextureType2DMultisample && !options.resolveMS)
591     {
592         uint samples = srcTexture2dMS.get_num_samples();
593         uint2 imageCoords = getImageCoords(srcTexture2dMS, srcTexCoords);
594         uint bufferOffset = options.dstBufferRowPitch * gIndices.y + samples * gIndices.x;
595 
596         for (uint sample = 0; sample < samples; ++sample)
597         {
598             uint stencilPerSample = srcTexture2dMS.read(imageCoords, sample).r;
599             buffer[bufferOffset + sample] = static_cast<uchar>(stencilPerSample);
600         }
601     }
602     else
603     {
604         uint32_t stencil =
605             sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube,
606                           srcTexCoords, options.srcLevel, options.srcLayer);
607 
608         buffer[options.dstBufferRowPitch * gIndices.y + gIndices.x] = static_cast<uchar>(stencil);
609     }
610 }
611 
612 
613 #if __METAL_VERSION__ >= 210
614 
615 struct FragmentStencilOut
616 {
617     uint32_t stencil [[stencil]];
618 };
619 
620 struct FragmentDepthStencilOut
621 {
622     float depth [[depth(any)]];
623     uint32_t stencil [[stencil]];
624 };
625 
626 fragment FragmentStencilOut blitStencilFS(
627     BlitVSOut input [[stage_in]],
628     texture2d<uint32_t> srcTexture2d [[texture(1), function_constant(kSourceTexture2Type2D)]],
629     texture2d_array<uint32_t> srcTexture2dArray
630     [[texture(1), function_constant(kSourceTexture2Type2DArray)]],
631     texture2d_ms<uint32_t> srcTexture2dMS
632     [[texture(1), function_constant(kSourceTexture2Type2DMS)]],
633     texturecube<uint32_t> srcTextureCube [[texture(1), function_constant(kSourceTexture2TypeCube)]],
634     constant BlitParams &options [[buffer(0)]])
635 {
636     FragmentStencilOut re;
637 
638     re.stencil = sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube,
639                                input.texCoords, options.srcLevel, options.srcLayer);
640 
641     return re;
642 }
643 
644 fragment FragmentDepthStencilOut blitDepthStencilFS(
645     BlitVSOut input [[stage_in]],
646 
647     texture2d<float> srcDepthTexture2d [[texture(0), function_constant(kSourceTextureType2D)]],
648     texture2d_array<float> srcDepthTexture2dArray
649     [[texture(0), function_constant(kSourceTextureType2DArray)]],
650     texture2d_ms<float> srcDepthTexture2dMS
651     [[texture(0), function_constant(kSourceTextureType2DMS)]],
652     texturecube<float> srcDepthTextureCube
653     [[texture(0), function_constant(kSourceTextureTypeCube)]],
654 
655 
656     texture2d<uint32_t> srcStencilTexture2d
657     [[texture(1), function_constant(kSourceTexture2Type2D)]],
658     texture2d_array<uint32_t> srcStencilTexture2dArray
659     [[texture(1), function_constant(kSourceTexture2Type2DArray)]],
660     texture2d_ms<uint32_t> srcStencilTexture2dMS
661     [[texture(1), function_constant(kSourceTexture2Type2DMS)]],
662     texturecube<uint32_t> srcStencilTextureCube
663     [[texture(1), function_constant(kSourceTexture2TypeCube)]],
664 
665     constant BlitParams &options [[buffer(0)]])
666 {
667     FragmentDepthStencilOut re;
668 
669     re.depth = sampleDepth(srcDepthTexture2d, srcDepthTexture2dArray, srcDepthTexture2dMS,
670                            srcDepthTextureCube, input.texCoords, options);
671     re.stencil =
672         sampleStencil(srcStencilTexture2d, srcStencilTexture2dArray, srcStencilTexture2dMS,
673                       srcStencilTextureCube, input.texCoords, options.srcLevel, options.srcLayer);
674     return re;
675 }
676 #endif
677 # 2 "temp_master_source.metal" 2
678 # 1 ".\\clear.metal" 1
679 # 10 ".\\clear.metal"
680 using namespace rx::mtl_shader;
681 
682 struct ClearParams
683 {
684     float4 clearColor;
685     float clearDepth;
686 };
687 
688 vertex float4 clearVS(unsigned int vid [[ vertex_id ]],
689                       constant ClearParams &clearParams [[buffer(0)]])
690 {
691     return float4(gCorners[vid], clearParams.clearDepth, 1.0);
692 }
693 
694 fragment MultipleColorOutputs<float> clearFloatFS(constant ClearParams &clearParams [[buffer(0)]])
695 {
696     return toMultipleColorOutputs(clearParams.clearColor);
697 }
698 
699 fragment MultipleColorOutputs<int> clearIntFS(constant ClearParams &clearParams [[buffer(0)]])
700 {
701     return toMultipleColorOutputs(as_type<int4>(clearParams.clearColor));
702 }
703 
704 fragment MultipleColorOutputs<uint> clearUIntFS(constant ClearParams &clearParams [[buffer(0)]])
705 {
706     return toMultipleColorOutputs(as_type<uint4>(clearParams.clearColor));
707 }
708 # 3 "temp_master_source.metal" 2
709 # 1 ".\\gen_indices.metal" 1
710 
711 
712 
713 
714 
715 
716 
717 
718 using namespace rx::mtl_shader;
719 
720 
721 constant bool kSourceBufferAligned[[function_constant(100)]];
722 constant bool kSourceIndexIsU8[[function_constant(200)]];
723 constant bool kSourceIndexIsU16[[function_constant(300)]];
724 constant bool kSourceIndexIsU32[[function_constant(400)]];
725 constant bool kSourceBufferUnaligned = !kSourceBufferAligned;
726 constant bool kUseSourceBufferU8 = kSourceIndexIsU8 || kSourceBufferUnaligned;
727 constant bool kUseSourceBufferU16 = kSourceIndexIsU16 && kSourceBufferAligned;
728 constant bool kUseSourceBufferU32 = kSourceIndexIsU32 && kSourceBufferAligned;
729 
730 struct IndexConversionParams
731 {
732     uint32_t srcOffset;
733     uint32_t indexCount;
734     bool primitiveRestartEnabled;
735 };
736 
737 
738 
739 inline ushort getIndexAligned(constant ushort *inputAligned, uint offset, uint idx)
740 {
741     return inputAligned[offset / 2 + idx];
742 }
743 inline uint getIndexAligned(constant uint *inputAligned, uint offset, uint idx)
744 {
745     return inputAligned[offset / 4 + idx];
746 }
747 inline uchar getIndexAligned(constant uchar *input, uint offset, uint idx)
748 {
749     return input[offset + idx];
750 }
751 inline ushort getIndexUnalignedU16(constant uchar *input, uint offset, uint idx)
752 {
753     ushort inputLo = input[offset + 2 * idx];
754     ushort inputHi = input[offset + 2 * idx + 1];
755 
756     return inputLo | (inputHi << 8);
757 }
758 inline uint getIndexUnalignedU32(constant uchar *input, uint offset, uint idx)
759 {
760     uint input0 = input[offset + 4 * idx];
761     uint input1 = input[offset + 4 * idx + 1];
762     uint input2 = input[offset + 4 * idx + 2];
763     uint input3 = input[offset + 4 * idx + 3];
764 
765     return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24);
766 }
767 
768 kernel void convertIndexU8ToU16(uint idx [[thread_position_in_grid]],
769                                 constant IndexConversionParams &options [[buffer(0)]],
770                                 constant uchar *input [[buffer(1)]],
771                                 device ushort *output [[buffer(2)]])
772 {
773     if (idx >= options.indexCount) { return; };
774 
775     uchar value = getIndexAligned(input, options.srcOffset, idx);
776 
777     if (options.primitiveRestartEnabled && value == 0xff)
778     {
779         output[idx] = 0xffff;
780     }
781     else
782     {
783         output[idx] = value;
784     }
785 }
786 
787 kernel void convertIndexU16(uint idx [[thread_position_in_grid]],
788                             constant IndexConversionParams &options [[buffer(0)]],
789                             constant uchar *input
790                             [[buffer(1), function_constant(kSourceBufferUnaligned)]],
791                             constant ushort *inputAligned
792                             [[buffer(1), function_constant(kSourceBufferAligned)]],
793                             device ushort *output [[buffer(2)]])
794 {
795     if (idx >= options.indexCount) { return; };
796 
797     ushort value;
798     if (kSourceBufferAligned)
799     {
800         value = getIndexAligned(inputAligned, options.srcOffset, idx);
801     }
802     else
803     {
804         value = getIndexUnalignedU16(input, options.srcOffset, idx);
805     }
806     output[idx] = value;
807 }
808 
809 kernel void convertIndexU32(uint idx [[thread_position_in_grid]],
810                             constant IndexConversionParams &options [[buffer(0)]],
811                             constant uchar *input
812                             [[buffer(1), function_constant(kSourceBufferUnaligned)]],
813                             constant uint *inputAligned
814                             [[buffer(1), function_constant(kSourceBufferAligned)]],
815                             device uint *output [[buffer(2)]])
816 {
817     if (idx >= options.indexCount) { return; };
818 
819     uint value;
820     if (kSourceBufferAligned)
821     {
822         value = getIndexAligned(inputAligned, options.srcOffset, idx);
823     }
824     else
825     {
826         value = getIndexUnalignedU32(input, options.srcOffset, idx);
827     }
828     output[idx] = value;
829 }
830 
831 struct IndexFromArrayParams
832 {
833     uint firstVertex;
834 
835     uint vertexCount;
836 };
837 
838 
839 kernel void genTriFanIndicesFromArray(uint idx [[thread_position_in_grid]],
840                                       constant IndexFromArrayParams &options [[buffer(0)]],
841                                       device uint *output [[buffer(2)]])
842 {
843     if (idx >= options.vertexCount) { return; };
844 
845     uint vertexIdx = options.firstVertex + 2 + idx;
846 
847 
848 
849     output[3 * idx ] = vertexIdx - 1;
850     output[3 * idx + 1] = vertexIdx;
851     output[3 * idx + 2] = options.firstVertex;
852 }
853 
854 inline uint getIndexU32(uint offset,
855                         uint idx,
856                         constant uchar *inputU8 [[function_constant(kUseSourceBufferU8)]],
857                         constant ushort *inputU16 [[function_constant(kUseSourceBufferU16)]],
858                         constant uint *inputU32 [[function_constant(kUseSourceBufferU32)]])
859 {
860     if (kUseSourceBufferU8)
861     {
862         if (kSourceIndexIsU16)
863         {
864             return getIndexUnalignedU16(inputU8, offset, idx);
865         }
866         else if (kSourceIndexIsU32)
867         {
868             return getIndexUnalignedU32(inputU8, offset, idx);
869         }
870         return getIndexAligned(inputU8, offset, idx);
871     }
872     else if (kUseSourceBufferU16)
873     {
874         return getIndexAligned(inputU16, offset, idx);
875     }
876     else if (kUseSourceBufferU32)
877     {
878         return getIndexAligned(inputU32, offset, idx);
879     }
880     return 0;
881 }
882 
883 
884 
885 
886 kernel void genTriFanIndicesFromElements(uint idx [[thread_position_in_grid]],
887                                          constant IndexConversionParams &options [[buffer(0)]],
888                                          constant uchar *inputU8
889                                          [[buffer(1), function_constant(kUseSourceBufferU8)]],
890                                          constant ushort *inputU16
891                                          [[buffer(1), function_constant(kUseSourceBufferU16)]],
892                                          constant uint *inputU32
893                                          [[buffer(1), function_constant(kUseSourceBufferU32)]],
894                                          device uint *output [[buffer(2)]])
895 {
896     if (idx >= options.indexCount) { return; };
897 
898     uint elemIdx = 2 + idx;
899 
900     output[3 * idx] = getIndexU32(options.srcOffset, 0, inputU8, inputU16, inputU32);
901     output[3 * idx + 1] = getIndexU32(options.srcOffset, elemIdx - 1, inputU8, inputU16, inputU32);
902     output[3 * idx + 2] = getIndexU32(options.srcOffset, elemIdx, inputU8, inputU16, inputU32);
903 }
904 
905 
906 kernel void genLineLoopIndicesFromArray(uint idx [[thread_position_in_grid]],
907                                         constant IndexFromArrayParams &options [[buffer(0)]],
908                                         device uint *output [[buffer(2)]])
909 {
910     uint totalIndices = options.vertexCount + 1;
911     if (idx >= totalIndices) { return; };
912 
913     output[idx] = options.firstVertex + idx % options.vertexCount;
914 }
915 
916 
917 
918 kernel void genLineLoopIndicesFromElements(uint idx [[thread_position_in_grid]],
919                                            constant IndexConversionParams &options [[buffer(0)]],
920                                            constant uchar *inputU8
921                                            [[buffer(1), function_constant(kUseSourceBufferU8)]],
922                                            constant ushort *inputU16
923                                            [[buffer(1), function_constant(kUseSourceBufferU16)]],
924                                            constant uint *inputU32
925                                            [[buffer(1), function_constant(kUseSourceBufferU32)]],
926                                            device uint *output [[buffer(2)]])
927 {
928     uint totalTargetIndices = options.indexCount + 1;
929     if (idx >= totalTargetIndices) { return; };
930 
931     output[idx] =
932         getIndexU32(options.srcOffset, idx % options.indexCount, inputU8, inputU16, inputU32);
933 }
934 # 4 "temp_master_source.metal" 2
935 # 1 ".\\gen_mipmap.metal" 1
936 
937 
938 
939 
940 
941 
942 
943 
944 using namespace rx::mtl_shader;
945 # 31 ".\\gen_mipmap.metal"
946 struct GenMipParams
947 {
948     uint srcLevel;
949     uint numMipLevelsToGen;
950     bool sRGB;
951 };
952 
953 
954 
955 kernel void generate3DMipmaps(uint lIndex [[thread_index_in_threadgroup]],
956                               ushort3 gIndices [[thread_position_in_grid]],
957                               texture3d<float> srcTexture [[texture(0)]],
958                               texture3d<float, access::write> dstMip1 [[texture(1)]],
959                               texture3d<float, access::write> dstMip2 [[texture(2)]],
960                               texture3d<float, access::write> dstMip3 [[texture(3)]],
961                               texture3d<float, access::write> dstMip4 [[texture(4)]],
962                               constant GenMipParams &options [[buffer(0)]])
963 {
964     ushort3 mipSize = ushort3(dstMip1.get_width(), dstMip1.get_height(), dstMip1.get_depth());
965     bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y && gIndices.z < mipSize.z;
966 
967     constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
968 
969 
970 
971 
972     threadgroup float sR[(8 * 8 * 8)];
973     threadgroup float sG[(8 * 8 * 8)];
974     threadgroup float sB[(8 * 8 * 8)];
975     threadgroup float sA[(8 * 8 * 8)];
976 
977 
978     float4 texel1;
979     if (validThread)
980     {
981         float3 texCoords = (float3(gIndices) + float3(0.5, 0.5, 0.5)) / float3(mipSize);
982         texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel));
983 
984 
985         dstMip1.write(texel1, gIndices);
986     }
987     else
988     {
989 
990         lIndex = 0xffffffff;
991     }
992 
993     if (options.numMipLevelsToGen == 1)
994     {
995         return;
996     }
997 
998 
999 
1000 
1001     if (options.sRGB)
1002     {
1003         texel1 = linearToSRGB(texel1);
1004     }
1005     sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1006 
1007     threadgroup_barrier(mem_flags::mem_threadgroup);
1008 
1009 
1010     if ((lIndex & 0x49) == 0)
1011     {
1012         bool3 atEdge = gIndices == (mipSize - ushort3(1));
1013 
1014 
1015 
1016         float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1]));
1017 
1018         float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8]));
1019 
1020         float4 texel4 = (atEdge.z) ? (texel1) : (float4(sR[lIndex + (8 * 8)], sG[lIndex + (8 * 8)], sB[lIndex + (8 * 8)], sA[lIndex + (8 * 8)]));
1021 
1022         float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)]));
1023 
1024 
1025         float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + ((8 * 8) + 1)], sG[lIndex + ((8 * 8) + 1)], sB[lIndex + ((8 * 8) + 1)], sA[lIndex + ((8 * 8) + 1)]));
1026 
1027 
1028         float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + ((8 * 8) + 8)], sG[lIndex + ((8 * 8) + 8)], sB[lIndex + ((8 * 8) + 8)], sA[lIndex + ((8 * 8) + 8)]));
1029 
1030 
1031         float4 texel8 =
1032             (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + ((8 * 8) + 8 + 1)], sG[lIndex + ((8 * 8) + 8 + 1)], sB[lIndex + ((8 * 8) + 8 + 1)], sA[lIndex + ((8 * 8) + 8 + 1)]));
1033 
1034 
1035         texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0;
1036 
1037         dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1);
1038 
1039 
1040         sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1041     }
1042 
1043     if (options.numMipLevelsToGen == 2)
1044     {
1045         return;
1046     }
1047 
1048 
1049     threadgroup_barrier(mem_flags::mem_threadgroup);
1050 
1051 
1052     if ((lIndex & 0xdb) == 0)
1053     {
1054         mipSize = max(mipSize >> 1, ushort3(1));
1055         bool3 atEdge = (gIndices >> 1) == (mipSize - ushort3(1));
1056 
1057 
1058 
1059         float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2]));
1060 
1061         float4 texel3 =
1062             (atEdge.y) ? (texel1) : (float4(sR[lIndex + (2 * 8)], sG[lIndex + (2 * 8)], sB[lIndex + (2 * 8)], sA[lIndex + (2 * 8)]));
1063 
1064         float4 texel4 =
1065             (atEdge.z) ? (texel1) : (float4(sR[lIndex + (2 * (8 * 8))], sG[lIndex + (2 * (8 * 8))], sB[lIndex + (2 * (8 * 8))], sA[lIndex + (2 * (8 * 8))]));
1066 
1067         float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)]));
1068 
1069 
1070         float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (2 * (8 * 8) + 2)], sG[lIndex + (2 * (8 * 8) + 2)], sB[lIndex + (2 * (8 * 8) + 2)], sA[lIndex + (2 * (8 * 8) + 2)]));
1071 
1072 
1073         float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8)], sG[lIndex + (2 * (8 * 8) + 2 * 8)], sB[lIndex + (2 * (8 * 8) + 2 * 8)], sA[lIndex + (2 * (8 * 8) + 2 * 8)]));
1074 
1075 
1076 
1077         float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sG[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sB[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sA[lIndex + (2 * (8 * 8) + 2 * 8 + 2)]));
1078 
1079 
1080 
1081         texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0;
1082 
1083         dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2);
1084 
1085 
1086         sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1087     }
1088 
1089     if (options.numMipLevelsToGen == 3)
1090     {
1091         return;
1092     }
1093 
1094 
1095     threadgroup_barrier(mem_flags::mem_threadgroup);
1096 
1097 
1098     if ((lIndex & 0x1ff) == 0)
1099     {
1100         mipSize = max(mipSize >> 1, ushort3(1));
1101         bool3 atEdge = (gIndices >> 2) == (mipSize - ushort3(1));
1102 
1103 
1104 
1105         float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4]));
1106 
1107         float4 texel3 =
1108             (atEdge.y) ? (texel1) : (float4(sR[lIndex + (4 * 8)], sG[lIndex + (4 * 8)], sB[lIndex + (4 * 8)], sA[lIndex + (4 * 8)]));
1109 
1110         float4 texel4 =
1111             (atEdge.z) ? (texel1) : (float4(sR[lIndex + (4 * (8 * 8))], sG[lIndex + (4 * (8 * 8))], sB[lIndex + (4 * (8 * 8))], sA[lIndex + (4 * (8 * 8))]));
1112 
1113         float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)]));
1114 
1115 
1116         float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (4 * (8 * 8) + 4)], sG[lIndex + (4 * (8 * 8) + 4)], sB[lIndex + (4 * (8 * 8) + 4)], sA[lIndex + (4 * (8 * 8) + 4)]));
1117 
1118 
1119         float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8)], sG[lIndex + (4 * (8 * 8) + 4 * 8)], sB[lIndex + (4 * (8 * 8) + 4 * 8)], sA[lIndex + (4 * (8 * 8) + 4 * 8)]));
1120 
1121 
1122 
1123         float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sG[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sB[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sA[lIndex + (4 * (8 * 8) + 4 * 8 + 4)]));
1124 
1125 
1126 
1127         texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0;
1128 
1129         dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3);
1130     }
1131 }
1132 
1133 kernel void generate2DMipmaps(uint lIndex [[thread_index_in_threadgroup]],
1134                               ushort2 gIndices [[thread_position_in_grid]],
1135                               texture2d<float> srcTexture [[texture(0)]],
1136                               texture2d<float, access::write> dstMip1 [[texture(1)]],
1137                               texture2d<float, access::write> dstMip2 [[texture(2)]],
1138                               texture2d<float, access::write> dstMip3 [[texture(3)]],
1139                               texture2d<float, access::write> dstMip4 [[texture(4)]],
1140                               constant GenMipParams &options [[buffer(0)]])
1141 {
1142     uint firstMipLevel = options.srcLevel + 1;
1143     ushort2 mipSize =
1144         ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel));
1145     bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y;
1146 
1147     constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
1148 
1149 
1150 
1151 
1152     threadgroup float sR[(8 * 8)];
1153     threadgroup float sG[(8 * 8)];
1154     threadgroup float sB[(8 * 8)];
1155     threadgroup float sA[(8 * 8)];
1156 
1157 
1158     float4 texel1;
1159     if (validThread)
1160     {
1161         float2 texCoords = (float2(gIndices) + float2(0.5, 0.5)) / float2(mipSize);
1162         texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel));
1163 
1164 
1165         dstMip1.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices);
1166     }
1167     else
1168     {
1169 
1170         lIndex = 0xffffffff;
1171     }
1172 
1173     if (options.numMipLevelsToGen == 1)
1174     {
1175         return;
1176     }
1177 
1178 
1179 
1180 
1181     sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1182 
1183     threadgroup_barrier(mem_flags::mem_threadgroup);
1184 
1185 
1186     if ((lIndex & 0x09) == 0)
1187     {
1188         bool2 atEdge = gIndices == (mipSize - ushort2(1));
1189 
1190 
1191 
1192         float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1]));
1193 
1194         float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8]));
1195 
1196         float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)]));
1197 
1198 
1199         texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1200 
1201         dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1);
1202 
1203 
1204         sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1205     }
1206 
1207     if (options.numMipLevelsToGen == 2)
1208     {
1209         return;
1210     }
1211 
1212 
1213     threadgroup_barrier(mem_flags::mem_threadgroup);
1214 
1215 
1216     if ((lIndex & 0x1b) == 0)
1217     {
1218         mipSize = max(mipSize >> 1, ushort2(1));
1219         bool2 atEdge = (gIndices >> 1) == (mipSize - ushort2(1));
1220 
1221 
1222 
1223         float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2]));
1224 
1225         float4 texel3 =
1226             (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8]));
1227 
1228         float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)]));
1229 
1230 
1231         texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1232 
1233         dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2);
1234 
1235 
1236         sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1237     }
1238 
1239     if (options.numMipLevelsToGen == 3)
1240     {
1241         return;
1242     }
1243 
1244 
1245     threadgroup_barrier(mem_flags::mem_threadgroup);
1246 
1247 
1248     if ((lIndex & 0x3f) == 0)
1249     {
1250         mipSize = max(mipSize >> 1, ushort2(1));
1251         bool2 atEdge = (gIndices >> 2) == (mipSize - ushort2(1));
1252 
1253 
1254 
1255         float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4]));
1256 
1257         float4 texel3 =
1258             (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8]));
1259 
1260         float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)]));
1261 
1262 
1263         texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1264 
1265         dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3);
1266     }
1267 }
1268 
1269 template <typename TextureTypeR, typename TextureTypeW>
1270 static __attribute__((always_inline)) void generateCubeOr2DArray2ndAndMoreMipmaps(
1271     uint lIndex,
1272     ushort3 gIndices,
1273     TextureTypeR srcTexture,
1274     TextureTypeW dstMip2,
1275     TextureTypeW dstMip3,
1276     TextureTypeW dstMip4,
1277     ushort2 mip1Size,
1278     float4 mip1Texel,
1279     threadgroup float *sR,
1280     threadgroup float *sG,
1281     threadgroup float *sB,
1282     threadgroup float *sA,
1283     constant GenMipParams &options)
1284 {
1285     ushort2 mipSize = mip1Size;
1286     float4 texel1 = mip1Texel;
1287 
1288 
1289 
1290 
1291     sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1292 
1293     threadgroup_barrier(mem_flags::mem_threadgroup);
1294 
1295 
1296     if ((lIndex & 0x09) == 0)
1297     {
1298         bool2 atEdge = gIndices.xy == (mipSize - ushort2(1));
1299 
1300 
1301 
1302         float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1]));
1303 
1304         float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8]));
1305 
1306         float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)]));
1307 
1308 
1309         texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1310 
1311         dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 1, gIndices.z);
1312 
1313 
1314         sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1315     }
1316 
1317     if (options.numMipLevelsToGen == 2)
1318     {
1319         return;
1320     }
1321 
1322 
1323     threadgroup_barrier(mem_flags::mem_threadgroup);
1324 
1325 
1326     if ((lIndex & 0x1b) == 0)
1327     {
1328         mipSize = max(mipSize >> 1, ushort2(1));
1329         bool2 atEdge = (gIndices.xy >> 1) == (mipSize - ushort2(1));
1330 
1331 
1332 
1333         float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2]));
1334 
1335         float4 texel3 =
1336             (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8]));
1337 
1338         float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)]));
1339 
1340 
1341         texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1342 
1343         dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 2, gIndices.z);
1344 
1345 
1346         sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1347     }
1348 
1349     if (options.numMipLevelsToGen == 3)
1350     {
1351         return;
1352     }
1353 
1354 
1355     threadgroup_barrier(mem_flags::mem_threadgroup);
1356 
1357 
1358     if ((lIndex & 0x3f) == 0)
1359     {
1360         mipSize = max(mipSize >> 1, ushort2(1));
1361         bool2 atEdge = (gIndices.xy >> 2) == (mipSize - ushort2(1));
1362 
1363 
1364 
1365         float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4]));
1366 
1367         float4 texel3 =
1368             (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8]));
1369 
1370         float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)]));
1371 
1372 
1373         texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1374 
1375         dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 3, gIndices.z);
1376     }
1377 }
1378 
1379 kernel void generateCubeMipmaps(uint lIndex [[thread_index_in_threadgroup]],
1380                                 ushort3 gIndices [[thread_position_in_grid]],
1381                                 texturecube<float> srcTexture [[texture(0)]],
1382                                 texturecube<float, access::write> dstMip1 [[texture(1)]],
1383                                 texturecube<float, access::write> dstMip2 [[texture(2)]],
1384                                 texturecube<float, access::write> dstMip3 [[texture(3)]],
1385                                 texturecube<float, access::write> dstMip4 [[texture(4)]],
1386                                 constant GenMipParams &options [[buffer(0)]])
1387 {
1388     uint firstMipLevel = options.srcLevel + 1;
1389     ushort2 mip1Size =
1390         ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel));
1391     bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y;
1392 
1393     constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
1394 
1395 
1396     float4 mip1Texel;
1397     if (validThread)
1398     {
1399         float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size);
1400         mip1Texel = srcTexture.sample(textureSampler, cubeTexcoords(texCoords, int(gIndices.z)),
1401                                       level(options.srcLevel));
1402 
1403 
1404         dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z);
1405     }
1406     else
1407     {
1408 
1409         lIndex = 0xffffffff;
1410     }
1411 
1412     if (options.numMipLevelsToGen == 1)
1413     {
1414         return;
1415     }
1416 
1417 
1418     threadgroup float sR[(8 * 8)];
1419     threadgroup float sG[(8 * 8)];
1420     threadgroup float sB[(8 * 8)];
1421     threadgroup float sA[(8 * 8)];
1422 
1423     generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4,
1424                                            mip1Size, mip1Texel, sR, sG, sB, sA, options);
1425 }
1426 
1427 kernel void generate2DArrayMipmaps(uint lIndex [[thread_index_in_threadgroup]],
1428                                    ushort3 gIndices [[thread_position_in_grid]],
1429                                    texture2d_array<float> srcTexture [[texture(0)]],
1430                                    texture2d_array<float, access::write> dstMip1 [[texture(1)]],
1431                                    texture2d_array<float, access::write> dstMip2 [[texture(2)]],
1432                                    texture2d_array<float, access::write> dstMip3 [[texture(3)]],
1433                                    texture2d_array<float, access::write> dstMip4 [[texture(4)]],
1434                                    constant GenMipParams &options [[buffer(0)]])
1435 {
1436     uint firstMipLevel = options.srcLevel + 1;
1437     ushort2 mip1Size =
1438         ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel));
1439     bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y;
1440 
1441     constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
1442 
1443 
1444     float4 mip1Texel;
1445     if (validThread)
1446     {
1447         float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size);
1448         mip1Texel =
1449             srcTexture.sample(textureSampler, texCoords, gIndices.z, level(options.srcLevel));
1450 
1451 
1452         dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z);
1453     }
1454     else
1455     {
1456 
1457         lIndex = 0xffffffff;
1458     }
1459 
1460     if (options.numMipLevelsToGen == 1)
1461     {
1462         return;
1463     }
1464 
1465 
1466     threadgroup float sR[(8 * 8)];
1467     threadgroup float sG[(8 * 8)];
1468     threadgroup float sB[(8 * 8)];
1469     threadgroup float sA[(8 * 8)];
1470 
1471     generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4,
1472                                            mip1Size, mip1Texel, sR, sG, sB, sA, options);
1473 }
1474 # 5 "temp_master_source.metal" 2
1475 # 1 ".\\copy_buffer.metal" 1
1476 # 12 ".\\copy_buffer.metal"
1477 #include <metal_pack>
1478 
1479 
1480 # 1 ".\\format_autogen.h" 1
1481 
1482 
1483 
1484 namespace rx
1485 {
1486 namespace mtl_shader
1487 {
1488 
1489 namespace FormatID
1490 {
1491 enum
1492 {
1493     NONE,
1494     D16_UNORM,
1495     D24_UNORM_S8_UINT,
1496     D24_UNORM_X8_UINT,
1497     D32_FLOAT,
1498     D32_FLOAT_S8X24_UINT,
1499     D32_UNORM,
1500     S8_UINT,
1501     A16_FLOAT,
1502     A1R5G5B5_UNORM,
1503     A2R10G10B10_SINT_VERTEX,
1504     A2R10G10B10_SNORM_VERTEX,
1505     A2R10G10B10_SSCALED_VERTEX,
1506     A2R10G10B10_UINT_VERTEX,
1507     A2R10G10B10_UNORM_VERTEX,
1508     A2R10G10B10_USCALED_VERTEX,
1509     A32_FLOAT,
1510     A8_UNORM,
1511     ASTC_10x10_SRGB_BLOCK,
1512     ASTC_10x10_UNORM_BLOCK,
1513     ASTC_10x5_SRGB_BLOCK,
1514     ASTC_10x5_UNORM_BLOCK,
1515     ASTC_10x6_SRGB_BLOCK,
1516     ASTC_10x6_UNORM_BLOCK,
1517     ASTC_10x8_SRGB_BLOCK,
1518     ASTC_10x8_UNORM_BLOCK,
1519     ASTC_12x10_SRGB_BLOCK,
1520     ASTC_12x10_UNORM_BLOCK,
1521     ASTC_12x12_SRGB_BLOCK,
1522     ASTC_12x12_UNORM_BLOCK,
1523     ASTC_3x3x3_UNORM_BLOCK,
1524     ASTC_3x3x3_UNORM_SRGB_BLOCK,
1525     ASTC_4x3x3_UNORM_BLOCK,
1526     ASTC_4x3x3_UNORM_SRGB_BLOCK,
1527     ASTC_4x4_SRGB_BLOCK,
1528     ASTC_4x4_UNORM_BLOCK,
1529     ASTC_4x4x3_UNORM_BLOCK,
1530     ASTC_4x4x3_UNORM_SRGB_BLOCK,
1531     ASTC_4x4x4_UNORM_BLOCK,
1532     ASTC_4x4x4_UNORM_SRGB_BLOCK,
1533     ASTC_5x4_SRGB_BLOCK,
1534     ASTC_5x4_UNORM_BLOCK,
1535     ASTC_5x4x4_UNORM_BLOCK,
1536     ASTC_5x4x4_UNORM_SRGB_BLOCK,
1537     ASTC_5x5_SRGB_BLOCK,
1538     ASTC_5x5_UNORM_BLOCK,
1539     ASTC_5x5x4_UNORM_BLOCK,
1540     ASTC_5x5x4_UNORM_SRGB_BLOCK,
1541     ASTC_5x5x5_UNORM_BLOCK,
1542     ASTC_5x5x5_UNORM_SRGB_BLOCK,
1543     ASTC_6x5_SRGB_BLOCK,
1544     ASTC_6x5_UNORM_BLOCK,
1545     ASTC_6x5x5_UNORM_BLOCK,
1546     ASTC_6x5x5_UNORM_SRGB_BLOCK,
1547     ASTC_6x6_SRGB_BLOCK,
1548     ASTC_6x6_UNORM_BLOCK,
1549     ASTC_6x6x5_UNORM_BLOCK,
1550     ASTC_6x6x5_UNORM_SRGB_BLOCK,
1551     ASTC_6x6x6_UNORM_BLOCK,
1552     ASTC_6x6x6_UNORM_SRGB_BLOCK,
1553     ASTC_8x5_SRGB_BLOCK,
1554     ASTC_8x5_UNORM_BLOCK,
1555     ASTC_8x6_SRGB_BLOCK,
1556     ASTC_8x6_UNORM_BLOCK,
1557     ASTC_8x8_SRGB_BLOCK,
1558     ASTC_8x8_UNORM_BLOCK,
1559     B10G10R10A2_UNORM,
1560     B4G4R4A4_UNORM,
1561     B5G5R5A1_UNORM,
1562     B5G6R5_UNORM,
1563     B8G8R8A8_TYPELESS,
1564     B8G8R8A8_TYPELESS_SRGB,
1565     B8G8R8A8_UNORM,
1566     B8G8R8A8_UNORM_SRGB,
1567     B8G8R8X8_UNORM,
1568     B8G8R8X8_UNORM_SRGB,
1569     BC1_RGBA_UNORM_BLOCK,
1570     BC1_RGBA_UNORM_SRGB_BLOCK,
1571     BC1_RGB_UNORM_BLOCK,
1572     BC1_RGB_UNORM_SRGB_BLOCK,
1573     BC2_RGBA_UNORM_BLOCK,
1574     BC2_RGBA_UNORM_SRGB_BLOCK,
1575     BC3_RGBA_UNORM_BLOCK,
1576     BC3_RGBA_UNORM_SRGB_BLOCK,
1577     BC4_RED_SNORM_BLOCK,
1578     BC4_RED_UNORM_BLOCK,
1579     BC5_RG_SNORM_BLOCK,
1580     BC5_RG_UNORM_BLOCK,
1581     BC6H_RGB_SFLOAT_BLOCK,
1582     BC6H_RGB_UFLOAT_BLOCK,
1583     BC7_RGBA_UNORM_BLOCK,
1584     BC7_RGBA_UNORM_SRGB_BLOCK,
1585     EAC_R11G11_SNORM_BLOCK,
1586     EAC_R11G11_UNORM_BLOCK,
1587     EAC_R11_SNORM_BLOCK,
1588     EAC_R11_UNORM_BLOCK,
1589     ETC1_LOSSY_DECODE_R8G8B8_UNORM_BLOCK,
1590     ETC1_R8G8B8_UNORM_BLOCK,
1591     ETC2_R8G8B8A1_SRGB_BLOCK,
1592     ETC2_R8G8B8A1_UNORM_BLOCK,
1593     ETC2_R8G8B8A8_SRGB_BLOCK,
1594     ETC2_R8G8B8A8_UNORM_BLOCK,
1595     ETC2_R8G8B8_SRGB_BLOCK,
1596     ETC2_R8G8B8_UNORM_BLOCK,
1597     G8_B8R8_2PLANE_420_UNORM,
1598     G8_B8_R8_3PLANE_420_UNORM,
1599     L16A16_FLOAT,
1600     L16_FLOAT,
1601     L32A32_FLOAT,
1602     L32_FLOAT,
1603     L4A4_UNORM,
1604     L8A8_UNORM,
1605     L8_UNORM,
1606     PALETTE4_R4G4B4A4_UNORM,
1607     PALETTE4_R5G5B5A1_UNORM,
1608     PALETTE4_R5G6B5_UNORM,
1609     PALETTE4_R8G8B8A8_UNORM,
1610     PALETTE4_R8G8B8_UNORM,
1611     PALETTE8_R4G4B4A4_UNORM,
1612     PALETTE8_R5G5B5A1_UNORM,
1613     PALETTE8_R5G6B5_UNORM,
1614     PALETTE8_R8G8B8A8_UNORM,
1615     PALETTE8_R8G8B8_UNORM,
1616     PVRTC1_RGBA_2BPP_UNORM_BLOCK,
1617     PVRTC1_RGBA_2BPP_UNORM_SRGB_BLOCK,
1618     PVRTC1_RGBA_4BPP_UNORM_BLOCK,
1619     PVRTC1_RGBA_4BPP_UNORM_SRGB_BLOCK,
1620     PVRTC1_RGB_2BPP_UNORM_BLOCK,
1621     PVRTC1_RGB_2BPP_UNORM_SRGB_BLOCK,
1622     PVRTC1_RGB_4BPP_UNORM_BLOCK,
1623     PVRTC1_RGB_4BPP_UNORM_SRGB_BLOCK,
1624     R10G10B10A2_SINT,
1625     R10G10B10A2_SNORM,
1626     R10G10B10A2_SSCALED,
1627     R10G10B10A2_UINT,
1628     R10G10B10A2_UNORM,
1629     R10G10B10A2_USCALED,
1630     R10G10B10X2_UNORM,
1631     R11G11B10_FLOAT,
1632     R16G16B16A16_FLOAT,
1633     R16G16B16A16_SINT,
1634     R16G16B16A16_SNORM,
1635     R16G16B16A16_SSCALED,
1636     R16G16B16A16_UINT,
1637     R16G16B16A16_UNORM,
1638     R16G16B16A16_USCALED,
1639     R16G16B16_FLOAT,
1640     R16G16B16_SINT,
1641     R16G16B16_SNORM,
1642     R16G16B16_SSCALED,
1643     R16G16B16_UINT,
1644     R16G16B16_UNORM,
1645     R16G16B16_USCALED,
1646     R16G16_FLOAT,
1647     R16G16_SINT,
1648     R16G16_SNORM,
1649     R16G16_SSCALED,
1650     R16G16_UINT,
1651     R16G16_UNORM,
1652     R16G16_USCALED,
1653     R16_FLOAT,
1654     R16_SINT,
1655     R16_SNORM,
1656     R16_SSCALED,
1657     R16_UINT,
1658     R16_UNORM,
1659     R16_USCALED,
1660     R32G32B32A32_FIXED,
1661     R32G32B32A32_FLOAT,
1662     R32G32B32A32_SINT,
1663     R32G32B32A32_SNORM,
1664     R32G32B32A32_SSCALED,
1665     R32G32B32A32_UINT,
1666     R32G32B32A32_UNORM,
1667     R32G32B32A32_USCALED,
1668     R32G32B32_FIXED,
1669     R32G32B32_FLOAT,
1670     R32G32B32_SINT,
1671     R32G32B32_SNORM,
1672     R32G32B32_SSCALED,
1673     R32G32B32_UINT,
1674     R32G32B32_UNORM,
1675     R32G32B32_USCALED,
1676     R32G32_FIXED,
1677     R32G32_FLOAT,
1678     R32G32_SINT,
1679     R32G32_SNORM,
1680     R32G32_SSCALED,
1681     R32G32_UINT,
1682     R32G32_UNORM,
1683     R32G32_USCALED,
1684     R32_FIXED,
1685     R32_FLOAT,
1686     R32_SINT,
1687     R32_SNORM,
1688     R32_SSCALED,
1689     R32_UINT,
1690     R32_UNORM,
1691     R32_USCALED,
1692     R4G4B4A4_UNORM,
1693     R5G5B5A1_UNORM,
1694     R5G6B5_UNORM,
1695     R8G8B8A8_SINT,
1696     R8G8B8A8_SNORM,
1697     R8G8B8A8_SSCALED,
1698     R8G8B8A8_TYPELESS,
1699     R8G8B8A8_TYPELESS_SRGB,
1700     R8G8B8A8_UINT,
1701     R8G8B8A8_UNORM,
1702     R8G8B8A8_UNORM_SRGB,
1703     R8G8B8A8_USCALED,
1704     R8G8B8X8_UNORM,
1705     R8G8B8X8_UNORM_SRGB,
1706     R8G8B8_SINT,
1707     R8G8B8_SNORM,
1708     R8G8B8_SSCALED,
1709     R8G8B8_UINT,
1710     R8G8B8_UNORM,
1711     R8G8B8_UNORM_SRGB,
1712     R8G8B8_USCALED,
1713     R8G8_SINT,
1714     R8G8_SNORM,
1715     R8G8_SSCALED,
1716     R8G8_UINT,
1717     R8G8_UNORM,
1718     R8G8_UNORM_SRGB,
1719     R8G8_USCALED,
1720     R8_SINT,
1721     R8_SNORM,
1722     R8_SSCALED,
1723     R8_UINT,
1724     R8_UNORM,
1725     R8_UNORM_SRGB,
1726     R8_USCALED,
1727     R9G9B9E5_SHAREDEXP,
1728     X2R10G10B10_SINT_VERTEX,
1729     X2R10G10B10_SNORM_VERTEX,
1730     X2R10G10B10_SSCALED_VERTEX,
1731     X2R10G10B10_UINT_VERTEX,
1732     X2R10G10B10_UNORM_VERTEX,
1733     X2R10G10B10_USCALED_VERTEX,
1734     EXTERNAL0,
1735     EXTERNAL1,
1736     EXTERNAL2,
1737     EXTERNAL3,
1738     EXTERNAL4,
1739     EXTERNAL5,
1740     EXTERNAL6,
1741     EXTERNAL7
1742 };
1743 
1744 }
1745 
1746 }
1747 }
1748 # 16 ".\\copy_buffer.metal" 2
1749 
1750 using namespace rx::mtl_shader;
1751 
1752 constant int kCopyFormatType [[function_constant(10)]];
1753 
1754 
1755 constant int kCopyTextureType [[function_constant(20)]];
1756 constant bool kCopyTextureType2D = kCopyTextureType == kTextureType2D;
1757 constant bool kCopyTextureType2DArray = kCopyTextureType == kTextureType2DArray;
1758 constant bool kCopyTextureType2DMS = kCopyTextureType == kTextureType2DMultisample;
1759 constant bool kCopyTextureTypeCube = kCopyTextureType == kTextureTypeCube;
1760 constant bool kCopyTextureType3D = kCopyTextureType == kTextureType3D;
1761 
1762 struct CopyPixelParams
1763 {
1764     uint3 copySize;
1765     uint3 textureOffset;
1766 
1767     uint bufferStartOffset;
1768     uint pixelSize;
1769     uint bufferRowPitch;
1770     uint bufferDepthPitch;
1771 };
1772 
1773 struct WritePixelParams
1774 {
1775     uint2 copySize;
1776     uint2 textureOffset;
1777 
1778     uint bufferStartOffset;
1779 
1780     uint pixelSize;
1781     uint bufferRowPitch;
1782 
1783     uint textureLevel;
1784     uint textureLayer;
1785 
1786     bool reverseTextureRowOrder;
1787 };
1788 # 120 ".\\copy_buffer.metal"
1789 template <typename T>
1790 static inline void textureWrite(ushort3 gIndices,
1791                                 constant CopyPixelParams &options,
1792                                 vec<T, 4> color,
1793                                 texture2d<T, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
1794 {
1795     uint3 writeIndices = options.textureOffset + uint3(gIndices);
1796     switch (kCopyTextureType)
1797     {
1798         case kTextureType2D:
1799             dstTexture2d.write(color, writeIndices.xy);
1800             break;
1801         case kTextureType2DArray:
1802             dstTexture2dArray.write(color, writeIndices.xy, writeIndices.z);
1803             break;
1804         case kTextureType3D:
1805             dstTexture3d.write(color, writeIndices);
1806             break;
1807         case kTextureTypeCube:
1808             dstTextureCube.write(color, writeIndices.xy, writeIndices.z);
1809             break;
1810     }
1811 }
1812 
1813 
1814 template <typename T>
1815 static inline vec<T, 4> textureRead(ushort2 gIndices,
1816                                     constant WritePixelParams &options,
1817                                     texture2d<T, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<T, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]])
1818 {
1819     vec<T, 4> color;
1820     uint2 coords = uint2(gIndices);
1821     if (options.reverseTextureRowOrder)
1822     {
1823         coords.y = options.copySize.y - 1 - gIndices.y;
1824     }
1825     coords += options.textureOffset;
1826     switch (kCopyTextureType)
1827     {
1828         case kTextureType2D:
1829             color = srcTexture2d.read(coords.xy, options.textureLevel);
1830             break;
1831         case kTextureType2DArray:
1832             color = srcTexture2dArray.read(coords.xy, options.textureLayer, options.textureLevel);
1833             break;
1834         case kTextureType2DMultisample:
1835             color = resolveTextureMS(srcTexture2dMS, coords.xy);
1836             break;
1837         case kTextureType3D:
1838             color = srcTexture3d.read(uint3(coords, options.textureLayer), options.textureLevel);
1839             break;
1840         case kTextureTypeCube:
1841             color = srcTextureCube.read(coords.xy, options.textureLayer, options.textureLevel);
1842             break;
1843     }
1844     return color;
1845 }
1846 # 215 ".\\copy_buffer.metal"
1847 static inline float4 readR5G6B5_UNORM(uint bufferOffset, constant uchar *buffer)
1848 {
1849     float4 color;
1850     ushort src = bytesToShort<ushort>(buffer, bufferOffset);
1851 
1852     color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src));
1853     color.g = normalizedToFloat<6>(getShiftedData<6, 5>(src));
1854     color.b = normalizedToFloat<5>(getShiftedData<5, 0>(src));
1855     color.a = 1.0;
1856     return color;
1857 }
1858 static inline void writeR5G6B5_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
1859 {
1860     ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) |
1861                  shiftData<6, 5>(floatToNormalized<6, ushort>(color.g)) |
1862                  shiftData<5, 0>(floatToNormalized<5, ushort>(color.b));
1863 
1864     shortToBytes(dst, bufferOffset, buffer);
1865 }
1866 
1867 
1868 static inline float4 readR4G4B4A4_UNORM(uint bufferOffset, constant uchar *buffer)
1869 {
1870     float4 color;
1871     ushort src = bytesToShort<ushort>(buffer, bufferOffset);
1872 
1873     color.r = normalizedToFloat<4>(getShiftedData<4, 12>(src));
1874     color.g = normalizedToFloat<4>(getShiftedData<4, 8>(src));
1875     color.b = normalizedToFloat<4>(getShiftedData<4, 4>(src));
1876     color.a = normalizedToFloat<4>(getShiftedData<4, 0>(src));
1877     return color;
1878 }
1879 static inline void writeR4G4B4A4_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
1880 {
1881     ushort dst = shiftData<4, 12>(floatToNormalized<4, ushort>(color.r)) |
1882                  shiftData<4, 8>(floatToNormalized<4, ushort>(color.g)) |
1883                  shiftData<4, 4>(floatToNormalized<4, ushort>(color.b)) |
1884                  shiftData<4, 0>(floatToNormalized<4, ushort>(color.a));
1885     ;
1886 
1887     shortToBytes(dst, bufferOffset, buffer);
1888 }
1889 
1890 
1891 static inline float4 readR5G5B5A1_UNORM(uint bufferOffset, constant uchar *buffer)
1892 {
1893     float4 color;
1894     ushort src = bytesToShort<ushort>(buffer, bufferOffset);
1895 
1896     color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src));
1897     color.g = normalizedToFloat<5>(getShiftedData<5, 6>(src));
1898     color.b = normalizedToFloat<5>(getShiftedData<5, 1>(src));
1899     color.a = normalizedToFloat<1>(getShiftedData<1, 0>(src));
1900     return color;
1901 }
1902 static inline void writeR5G5B5A1_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
1903 {
1904     ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) |
1905                  shiftData<5, 6>(floatToNormalized<5, ushort>(color.g)) |
1906                  shiftData<5, 1>(floatToNormalized<5, ushort>(color.b)) |
1907                  shiftData<1, 0>(floatToNormalized<1, ushort>(color.a));
1908     ;
1909 
1910     shortToBytes(dst, bufferOffset, buffer);
1911 }
1912 
1913 
1914 static inline int4 readR10G10B10A2_SINT(uint bufferOffset, constant uchar *buffer)
1915 {
1916     int4 color;
1917     int src = bytesToInt<int>(buffer, bufferOffset);
1918 
1919     constexpr int3 rgbSignMask(0x200);
1920     constexpr int3 negativeMask(0xfffffc00);
1921     constexpr int alphaSignMask = 0x2;
1922     constexpr int alphaNegMask = 0xfffffffc;
1923 
1924     color.r = getShiftedData<10, 0>(src);
1925     color.g = getShiftedData<10, 10>(src);
1926     color.b = getShiftedData<10, 20>(src);
1927 
1928     int3 isRgbNegative = (color.rgb & rgbSignMask) >> 9;
1929     color.rgb = (isRgbNegative * negativeMask) | color.rgb;
1930 
1931     color.a = getShiftedData<2, 30>(src);
1932     int isAlphaNegative = color.a & alphaSignMask >> 1;
1933     color.a = (isAlphaNegative * alphaNegMask) | color.a;
1934     return color;
1935 }
1936 
1937 static inline uint4 readR10G10B10A2_UINT(uint bufferOffset, constant uchar *buffer)
1938 {
1939     uint4 color;
1940     uint src = bytesToInt<uint>(buffer, bufferOffset);
1941 
1942     color.r = getShiftedData<10, 0>(src);
1943     color.g = getShiftedData<10, 10>(src);
1944     color.b = getShiftedData<10, 20>(src);
1945     color.a = getShiftedData<2, 30>(src);
1946     return color;
1947 }
1948 
1949 
1950 static inline float4 readR8G8B8A8(uint bufferOffset, constant uchar *buffer, bool isSRGB)
1951 {
1952     float4 color;
1953     uint src = bytesToInt<uint>(buffer, bufferOffset);
1954 
1955     if (isSRGB)
1956     {
1957         color = unpack_unorm4x8_srgb_to_float(src);
1958     }
1959     else
1960     {
1961         color = unpack_unorm4x8_to_float(src);
1962     }
1963     return color;
1964 }
1965 static inline void writeR8G8B8A8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB)
1966 {
1967     uint dst;
1968 
1969     if (isSRGB)
1970     {
1971         dst = pack_float_to_srgb_unorm4x8(color);
1972     }
1973     else
1974     {
1975         dst = pack_float_to_unorm4x8(color);
1976     }
1977 
1978     intToBytes(dst, bufferOffset, buffer);
1979 }
1980 
1981 static inline float4 readR8G8B8(uint bufferOffset, constant uchar *buffer, bool isSRGB)
1982 {
1983     float4 color;
1984     color.r = normalizedToFloat<uchar>(buffer[bufferOffset]);
1985     color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]);
1986     color.b = normalizedToFloat<uchar>(buffer[bufferOffset + 2]);
1987     color.a = 1.0;
1988 
1989     if (isSRGB)
1990     {
1991         color = sRGBtoLinear(color);
1992     }
1993     return color;
1994 }
1995 static inline void writeR8G8B8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB)
1996 {
1997     color.a = 1.0;
1998     uint dst;
1999 
2000     if (isSRGB)
2001     {
2002         dst = pack_float_to_srgb_unorm4x8(color);
2003     }
2004     else
2005     {
2006         dst = pack_float_to_unorm4x8(color);
2007     }
2008     int24bitToBytes(dst, bufferOffset, buffer);
2009 }
2010 
2011 
2012 static inline float4 readR8G8B8A8_SNORM(uint bufferOffset, constant uchar *buffer)
2013 {
2014     float4 color;
2015     uint src = bytesToInt<uint>(buffer, bufferOffset);
2016 
2017     color = unpack_snorm4x8_to_float(src);
2018 
2019     return color;
2020 }
2021 static inline void writeR8G8B8A8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2022 {
2023     uint dst = pack_float_to_snorm4x8(color);
2024 
2025     intToBytes(dst, bufferOffset, buffer);
2026 }
2027 
2028 
2029 static inline float4 readR8G8B8_SNORM(uint bufferOffset, constant uchar *buffer)
2030 {
2031     float4 color;
2032     color.r = normalizedToFloat<7, char>(buffer[bufferOffset]);
2033     color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]);
2034     color.b = normalizedToFloat<7, char>(buffer[bufferOffset + 2]);
2035     color.a = 1.0;
2036 
2037     return color;
2038 }
2039 static inline void writeR8G8B8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2040 {
2041     uint dst = pack_float_to_snorm4x8(color);
2042 
2043     int24bitToBytes(dst, bufferOffset, buffer);
2044 }
2045 
2046 
2047 static inline float4 readR8G8B8A8_UNORM(uint bufferOffset, constant uchar *buffer)
2048 {
2049     return readR8G8B8A8(bufferOffset, buffer, false);
2050 }
2051 static inline void writeR8G8B8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2052 {
2053     return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false);
2054 }
2055 
2056 static inline float4 readR8G8B8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer)
2057 {
2058     return readR8G8B8A8(bufferOffset, buffer, true);
2059 }
2060 static inline void writeR8G8B8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2061 {
2062     return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true);
2063 }
2064 
2065 
2066 static inline float4 readB8G8R8A8_UNORM(uint bufferOffset, constant uchar *buffer)
2067 {
2068     return readR8G8B8A8(bufferOffset, buffer, false).bgra;
2069 }
2070 static inline void writeB8G8R8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2071 {
2072     color.rgba = color.bgra;
2073     return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false);
2074 }
2075 
2076 static inline float4 readB8G8R8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer)
2077 {
2078     return readR8G8B8A8(bufferOffset, buffer, true).bgra;
2079 }
2080 static inline void writeB8G8R8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2081 {
2082     color.rgba = color.bgra;
2083     return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true);
2084 }
2085 
2086 
2087 static inline float4 readR8G8B8_UNORM(uint bufferOffset, constant uchar *buffer)
2088 {
2089     return readR8G8B8(bufferOffset, buffer, false);
2090 }
2091 static inline void writeR8G8B8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2092 {
2093     return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, false);
2094 }
2095 
2096 static inline float4 readR8G8B8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer)
2097 {
2098     return readR8G8B8(bufferOffset, buffer, true);
2099 }
2100 static inline void writeR8G8B8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2101 {
2102     return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, true);
2103 }
2104 
2105 
2106 static inline float4 readL8_UNORM(uint bufferOffset, constant uchar *buffer)
2107 {
2108     float4 color;
2109     color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset]));
2110     color.a = 1.0;
2111     return color;
2112 }
2113 static inline void writeL8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2114 {
2115     buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
2116 }
2117 
2118 
2119 static inline void writeA8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2120 {
2121     buffer[bufferOffset] = floatToNormalized<uchar>(color.a);
2122 }
2123 
2124 
2125 static inline float4 readL8A8_UNORM(uint bufferOffset, constant uchar *buffer)
2126 {
2127     float4 color;
2128     color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset]));
2129     color.a = normalizedToFloat<uchar>(buffer[bufferOffset + 1]);
2130     return color;
2131 }
2132 static inline void writeL8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2133 {
2134     buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
2135     buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.a);
2136 }
2137 
2138 
2139 static inline float4 readR8_UNORM(uint bufferOffset, constant uchar *buffer)
2140 {
2141     float4 color;
2142     color.r = normalizedToFloat<uchar>(buffer[bufferOffset]);
2143     color.g = color.b = 0.0;
2144     color.a = 1.0;
2145     return color;
2146 }
2147 static inline void writeR8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2148 {
2149     buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
2150 }
2151 
2152 static inline float4 readR8_SNORM(uint bufferOffset, constant uchar *buffer)
2153 {
2154     float4 color;
2155     color.r = normalizedToFloat<7, char>(buffer[bufferOffset]);
2156     color.g = color.b = 0.0;
2157     color.a = 1.0;
2158     return color;
2159 }
2160 static inline void writeR8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2161 {
2162     buffer[bufferOffset] = as_type<uchar>(floatToNormalized<char>(color.r));
2163 }
2164 
2165 
2166 static inline int4 readR8_SINT(uint bufferOffset, constant uchar *buffer)
2167 {
2168     int4 color;
2169     color.r = as_type<char>(buffer[bufferOffset]);
2170     color.g = color.b = 0;
2171     color.a = 1;
2172     return color;
2173 }
2174 static inline void writeR8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2175 {
2176     buffer[bufferOffset] = static_cast<uchar>(color.r);
2177 }
2178 
2179 
2180 static inline uint4 readR8_UINT(uint bufferOffset, constant uchar *buffer)
2181 {
2182     uint4 color;
2183     color.r = as_type<uchar>(buffer[bufferOffset]);
2184     color.g = color.b = 0;
2185     color.a = 1;
2186     return color;
2187 }
2188 static inline void writeR8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2189 {
2190     buffer[bufferOffset] = static_cast<uchar>(color.r);
2191 }
2192 
2193 
2194 static inline float4 readR8G8_UNORM(uint bufferOffset, constant uchar *buffer)
2195 {
2196     float4 color;
2197     color.r = normalizedToFloat<uchar>(buffer[bufferOffset]);
2198     color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]);
2199     color.b = 0.0;
2200     color.a = 1.0;
2201     return color;
2202 }
2203 static inline void writeR8G8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2204 {
2205     buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
2206     buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.g);
2207 }
2208 
2209 static inline float4 readR8G8_SNORM(uint bufferOffset, constant uchar *buffer)
2210 {
2211     float4 color;
2212     color.r = normalizedToFloat<7, char>(buffer[bufferOffset]);
2213     color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]);
2214     color.b = 0.0;
2215     color.a = 1.0;
2216     return color;
2217 }
2218 static inline void writeR8G8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2219 {
2220     buffer[bufferOffset] = as_type<uchar>(floatToNormalized<char>(color.r));
2221     buffer[bufferOffset + 1] = as_type<uchar>(floatToNormalized<char>(color.g));
2222 }
2223 
2224 
2225 static inline int4 readR8G8_SINT(uint bufferOffset, constant uchar *buffer)
2226 {
2227     int4 color;
2228     color.r = as_type<char>(buffer[bufferOffset]);
2229     color.g = as_type<char>(buffer[bufferOffset + 1]);
2230     color.b = 0;
2231     color.a = 1;
2232     return color;
2233 }
2234 static inline void writeR8G8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2235 {
2236     buffer[bufferOffset] = static_cast<uchar>(color.r);
2237     buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
2238 }
2239 
2240 
2241 static inline uint4 readR8G8_UINT(uint bufferOffset, constant uchar *buffer)
2242 {
2243     uint4 color;
2244     color.r = as_type<uchar>(buffer[bufferOffset]);
2245     color.g = as_type<uchar>(buffer[bufferOffset + 1]);
2246     color.b = 0;
2247     color.a = 1;
2248     return color;
2249 }
2250 static inline void writeR8G8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2251 {
2252     buffer[bufferOffset] = static_cast<uchar>(color.r);
2253     buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
2254 }
2255 
2256 
2257 static inline int4 readR8G8B8_SINT(uint bufferOffset, constant uchar *buffer)
2258 {
2259     int4 color;
2260     color.r = as_type<char>(buffer[bufferOffset]);
2261     color.g = as_type<char>(buffer[bufferOffset + 1]);
2262     color.b = as_type<char>(buffer[bufferOffset + 2]);
2263     color.a = 1;
2264     return color;
2265 }
2266 
2267 
2268 static inline uint4 readR8G8B8_UINT(uint bufferOffset, constant uchar *buffer)
2269 {
2270     uint4 color;
2271     color.r = as_type<uchar>(buffer[bufferOffset]);
2272     color.g = as_type<uchar>(buffer[bufferOffset + 1]);
2273     color.b = as_type<uchar>(buffer[bufferOffset + 2]);
2274     color.a = 1;
2275     return color;
2276 }
2277 
2278 
2279 static inline int4 readR8G8B8A8_SINT(uint bufferOffset, constant uchar *buffer)
2280 {
2281     int4 color;
2282     color.r = as_type<char>(buffer[bufferOffset]);
2283     color.g = as_type<char>(buffer[bufferOffset + 1]);
2284     color.b = as_type<char>(buffer[bufferOffset + 2]);
2285     color.a = as_type<char>(buffer[bufferOffset + 3]);
2286     return color;
2287 }
2288 static inline void writeR8G8B8A8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2289 {
2290     buffer[bufferOffset] = static_cast<uchar>(color.r);
2291     buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
2292     buffer[bufferOffset + 2] = static_cast<uchar>(color.b);
2293     buffer[bufferOffset + 3] = static_cast<uchar>(color.a);
2294 }
2295 
2296 
2297 static inline uint4 readR8G8B8A8_UINT(uint bufferOffset, constant uchar *buffer)
2298 {
2299     uint4 color;
2300     color.r = as_type<uchar>(buffer[bufferOffset]);
2301     color.g = as_type<uchar>(buffer[bufferOffset + 1]);
2302     color.b = as_type<uchar>(buffer[bufferOffset + 2]);
2303     color.a = as_type<uchar>(buffer[bufferOffset + 3]);
2304     return color;
2305 }
2306 static inline void writeR8G8B8A8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2307 {
2308     buffer[bufferOffset] = static_cast<uchar>(color.r);
2309     buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
2310     buffer[bufferOffset + 2] = static_cast<uchar>(color.b);
2311     buffer[bufferOffset + 3] = static_cast<uchar>(color.a);
2312 }
2313 
2314 
2315 static inline float4 readR16_FLOAT(uint bufferOffset, constant uchar *buffer)
2316 {
2317     float4 color;
2318     color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2319     color.g = color.b = 0.0;
2320     color.a = 1.0;
2321     return color;
2322 }
2323 static inline void writeR16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2324 {
2325     shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
2326 }
2327 
2328 template <typename ShortType>
2329 static inline float4 readR16_NORM(uint bufferOffset, constant uchar *buffer)
2330 {
2331     float4 color;
2332     color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
2333     color.g = color.b = 0.0;
2334     color.a = 1.0;
2335     return color;
2336 }
2337 
2338 
2339 
2340 template<typename ShortType>
2341 static inline void writeR16_NORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2342 {
2343     shortToBytes(floatToNormalized<ShortType>(color.r), bufferOffset, buffer);
2344 }
2345 
2346 
2347 
2348 
2349 static inline int4 readR16_SINT(uint bufferOffset, constant uchar *buffer)
2350 {
2351     int4 color;
2352     color.r = bytesToShort<short>(buffer, bufferOffset);
2353     color.g = color.b = 0;
2354     color.a = 1;
2355     return color;
2356 }
2357 static inline void writeR16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2358 {
2359     shortToBytes(static_cast<short>(color.r), bufferOffset, buffer);
2360 }
2361 
2362 
2363 static inline uint4 readR16_UINT(uint bufferOffset, constant uchar *buffer)
2364 {
2365     uint4 color;
2366     color.r = bytesToShort<ushort>(buffer, bufferOffset);
2367     color.g = color.b = 0;
2368     color.a = 1;
2369     return color;
2370 }
2371 static inline void writeR16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2372 {
2373     shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer);
2374 }
2375 
2376 
2377 static inline float4 readA16_FLOAT(uint bufferOffset, constant uchar *buffer)
2378 {
2379     float4 color;
2380     color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2381     color.rgb = 0.0;
2382     return color;
2383 }
2384 static inline void writeA16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2385 {
2386     shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset, buffer);
2387 }
2388 
2389 
2390 static inline float4 readL16_FLOAT(uint bufferOffset, constant uchar *buffer)
2391 {
2392     float4 color;
2393     color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2394     color.a = 1.0;
2395     return color;
2396 }
2397 static inline void writeL16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2398 {
2399     shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
2400 }
2401 
2402 
2403 static inline float4 readL16A16_FLOAT(uint bufferOffset, constant uchar *buffer)
2404 {
2405     float4 color;
2406     color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2407     color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
2408     return color;
2409 }
2410 static inline void writeL16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2411 {
2412     shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
2413     shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 2, buffer);
2414 }
2415 
2416 
2417 static inline float4 readR16G16_FLOAT(uint bufferOffset, constant uchar *buffer)
2418 {
2419     float4 color;
2420     color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2421     color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
2422     color.b = 0.0;
2423     color.a = 1.0;
2424     return color;
2425 }
2426 static inline void writeR16G16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2427 {
2428     shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
2429     shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer);
2430 }
2431 
2432 
2433 template <typename ShortType>
2434 static inline float4 readR16G16_NORM(uint bufferOffset, constant uchar *buffer)
2435 {
2436     float4 color;
2437     color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
2438     color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2));
2439     color.b = 0.0;
2440     color.a = 1.0;
2441     return color;
2442 }
2443 
2444 
2445 
2446 template<typename ShortType>
2447 static inline void writeR16G16_NORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2448 {
2449     shortToBytes(floatToNormalized<ShortType>(color.r), bufferOffset, buffer);
2450     shortToBytes(floatToNormalized<ShortType>(color.g), bufferOffset + 2, buffer);
2451 }
2452 
2453 
2454 
2455 
2456 static inline int4 readR16G16_SINT(uint bufferOffset, constant uchar *buffer)
2457 {
2458     int4 color;
2459     color.r = bytesToShort<short>(buffer, bufferOffset);
2460     color.g = bytesToShort<short>(buffer, bufferOffset + 2);
2461     color.b = 0;
2462     color.a = 1;
2463     return color;
2464 }
2465 static inline void writeR16G16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2466 {
2467     shortToBytes(static_cast<short>(color.r), bufferOffset, buffer);
2468     shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer);
2469 }
2470 
2471 
2472 static inline uint4 readR16G16_UINT(uint bufferOffset, constant uchar *buffer)
2473 {
2474     uint4 color;
2475     color.r = bytesToShort<ushort>(buffer, bufferOffset);
2476     color.g = bytesToShort<ushort>(buffer, bufferOffset + 2);
2477     color.b = 0;
2478     color.a = 1;
2479     return color;
2480 }
2481 static inline void writeR16G16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2482 {
2483     shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer);
2484     shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer);
2485 }
2486 
2487 
2488 static inline float4 readR16G16B16_FLOAT(uint bufferOffset, constant uchar *buffer)
2489 {
2490     float4 color;
2491     color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2492     color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
2493     color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4));
2494     color.a = 1.0;
2495     return color;
2496 }
2497 
2498 
2499 template <typename ShortType>
2500 static inline float4 readR16G16B16_NORM(uint bufferOffset, constant uchar *buffer)
2501 {
2502     float4 color;
2503     color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
2504     color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2));
2505     color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4));
2506     color.a = 1.0;
2507     return color;
2508 }
2509 
2510 
2511 
2512 static inline int4 readR16G16B16_SINT(uint bufferOffset, constant uchar *buffer)
2513 {
2514     int4 color;
2515     color.r = bytesToShort<short>(buffer, bufferOffset);
2516     color.g = bytesToShort<short>(buffer, bufferOffset + 2);
2517     color.b = bytesToShort<short>(buffer, bufferOffset + 4);
2518     color.a = 1;
2519     return color;
2520 }
2521 
2522 
2523 static inline uint4 readR16G16B16_UINT(uint bufferOffset, constant uchar *buffer)
2524 {
2525     uint4 color;
2526     color.r = bytesToShort<ushort>(buffer, bufferOffset);
2527     color.g = bytesToShort<ushort>(buffer, bufferOffset + 2);
2528     color.b = bytesToShort<ushort>(buffer, bufferOffset + 4);
2529     color.a = 1;
2530     return color;
2531 }
2532 
2533 
2534 static inline float4 readR16G16B16A16_FLOAT(uint bufferOffset, constant uchar *buffer)
2535 {
2536     float4 color;
2537     color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2538     color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
2539     color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4));
2540     color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 6));
2541     return color;
2542 }
2543 static inline void writeR16G16B16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2544 {
2545     shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
2546     shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer);
2547     shortToBytes(as_type<ushort>(static_cast<half>(color.b)), bufferOffset + 4, buffer);
2548     shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 6, buffer);
2549 }
2550 
2551 
2552 template <typename ShortType>
2553 static inline float4 readR16G16B16A16_NORM(uint bufferOffset, constant uchar *buffer)
2554 {
2555     float4 color;
2556     color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
2557     color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2));
2558     color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4));
2559     color.a = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 6));
2560     return color;
2561 }
2562 
2563 
2564 
2565 template<typename ShortType>
2566 static inline void writeR16G16B16A16_NORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2567 {
2568     shortToBytes(floatToNormalized<ShortType>(color.r), bufferOffset, buffer);
2569     shortToBytes(floatToNormalized<ShortType>(color.g), bufferOffset + 2, buffer);
2570     shortToBytes(floatToNormalized<ShortType>(color.b), bufferOffset + 4, buffer);
2571     shortToBytes(floatToNormalized<ShortType>(color.a), bufferOffset + 6, buffer);
2572 }
2573 
2574 
2575 
2576 
2577 static inline int4 readR16G16B16A16_SINT(uint bufferOffset, constant uchar *buffer)
2578 {
2579     int4 color;
2580     color.r = bytesToShort<short>(buffer, bufferOffset);
2581     color.g = bytesToShort<short>(buffer, bufferOffset + 2);
2582     color.b = bytesToShort<short>(buffer, bufferOffset + 4);
2583     color.a = bytesToShort<short>(buffer, bufferOffset + 6);
2584     return color;
2585 }
2586 static inline void writeR16G16B16A16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2587 {
2588     shortToBytes(static_cast<short>(color.r), bufferOffset, buffer);
2589     shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer);
2590     shortToBytes(static_cast<short>(color.b), bufferOffset + 4, buffer);
2591     shortToBytes(static_cast<short>(color.a), bufferOffset + 6, buffer);
2592 }
2593 
2594 
2595 static inline uint4 readR16G16B16A16_UINT(uint bufferOffset, constant uchar *buffer)
2596 {
2597     uint4 color;
2598     color.r = bytesToShort<ushort>(buffer, bufferOffset);
2599     color.g = bytesToShort<ushort>(buffer, bufferOffset + 2);
2600     color.b = bytesToShort<ushort>(buffer, bufferOffset + 4);
2601     color.a = bytesToShort<ushort>(buffer, bufferOffset + 6);
2602     return color;
2603 }
2604 static inline void writeR16G16B16A16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2605 {
2606     shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer);
2607     shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer);
2608     shortToBytes(static_cast<ushort>(color.b), bufferOffset + 4, buffer);
2609     shortToBytes(static_cast<ushort>(color.a), bufferOffset + 6, buffer);
2610 }
2611 
2612 
2613 static inline float4 readR32_FLOAT(uint bufferOffset, constant uchar *buffer)
2614 {
2615     float4 color;
2616     color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2617     color.g = color.b = 0.0;
2618     color.a = 1.0;
2619     return color;
2620 }
2621 static inline void writeR32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2622 {
2623     intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
2624 }
2625 
2626 
2627 template <typename IntType>
2628 static inline float4 readR32_NORM(uint bufferOffset, constant uchar *buffer)
2629 {
2630     float4 color;
2631     color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
2632     color.g = color.b = 0.0;
2633     color.a = 1.0;
2634     return color;
2635 }
2636 
2637 
2638 
2639 
2640 static inline float4 readA32_FLOAT(uint bufferOffset, constant uchar *buffer)
2641 {
2642     float4 color;
2643     color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2644     color.rgb = 0.0;
2645     return color;
2646 }
2647 static inline void writeA32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2648 {
2649     intToBytes(as_type<uint>(color.a), bufferOffset, buffer);
2650 }
2651 
2652 
2653 static inline float4 readL32_FLOAT(uint bufferOffset, constant uchar *buffer)
2654 {
2655     float4 color;
2656     color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2657     color.a = 1.0;
2658     return color;
2659 }
2660 static inline void writeL32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2661 {
2662     intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
2663 }
2664 
2665 
2666 static inline int4 readR32_SINT(uint bufferOffset, constant uchar *buffer)
2667 {
2668     int4 color;
2669     color.r = bytesToInt<int>(buffer, bufferOffset);
2670     color.g = color.b = 0;
2671     color.a = 1;
2672     return color;
2673 }
2674 static inline void writeR32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2675 {
2676     intToBytes(color.r, bufferOffset, buffer);
2677 }
2678 
2679 
2680 static inline float4 readR32_FIXED(uint bufferOffset, constant uchar *buffer)
2681 {
2682     float4 color;
2683     constexpr float kDivisor = 1.0f / (1 << 16);
2684     color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
2685     color.g = color.b = 0.0;
2686     color.a = 1.0;
2687     return color;
2688 }
2689 
2690 
2691 static inline uint4 readR32_UINT(uint bufferOffset, constant uchar *buffer)
2692 {
2693     uint4 color;
2694     color.r = bytesToInt<uint>(buffer, bufferOffset);
2695     color.g = color.b = 0;
2696     color.a = 1;
2697     return color;
2698 }
2699 static inline void writeR32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2700 {
2701     intToBytes(color.r, bufferOffset, buffer);
2702 }
2703 
2704 
2705 static inline float4 readL32A32_FLOAT(uint bufferOffset, constant uchar *buffer)
2706 {
2707     float4 color;
2708     color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2709     color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
2710     return color;
2711 }
2712 static inline void writeL32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2713 {
2714     intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
2715     intToBytes(as_type<uint>(color.a), bufferOffset + 4, buffer);
2716 }
2717 
2718 
2719 static inline float4 readR32G32_FLOAT(uint bufferOffset, constant uchar *buffer)
2720 {
2721     float4 color;
2722     color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2723     color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
2724     color.b = 0.0;
2725     color.a = 1.0;
2726     return color;
2727 }
2728 static inline void writeR32G32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2729 {
2730     intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
2731     intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer);
2732 }
2733 
2734 
2735 template <typename IntType>
2736 static inline float4 readR32G32_NORM(uint bufferOffset, constant uchar *buffer)
2737 {
2738     float4 color;
2739     color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
2740     color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4));
2741     color.b = 0.0;
2742     color.a = 1.0;
2743     return color;
2744 }
2745 
2746 
2747 
2748 
2749 static inline int4 readR32G32_SINT(uint bufferOffset, constant uchar *buffer)
2750 {
2751     int4 color;
2752     color.r = bytesToInt<int>(buffer, bufferOffset);
2753     color.g = bytesToInt<int>(buffer, bufferOffset + 4);
2754     color.b = 0;
2755     color.a = 1;
2756     return color;
2757 }
2758 static inline void writeR32G32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2759 {
2760     intToBytes(color.r, bufferOffset, buffer);
2761     intToBytes(color.g, bufferOffset + 4, buffer);
2762 }
2763 
2764 
2765 static inline float4 readR32G32_FIXED(uint bufferOffset, constant uchar *buffer)
2766 {
2767     float4 color;
2768     constexpr float kDivisor = 1.0f / (1 << 16);
2769     color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
2770     color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor;
2771     color.b = 0.0;
2772     color.a = 1.0;
2773     return color;
2774 }
2775 
2776 
2777 static inline uint4 readR32G32_UINT(uint bufferOffset, constant uchar *buffer)
2778 {
2779     uint4 color;
2780     color.r = bytesToInt<uint>(buffer, bufferOffset);
2781     color.g = bytesToInt<uint>(buffer, bufferOffset + 4);
2782     color.b = 0;
2783     color.a = 1;
2784     return color;
2785 }
2786 static inline void writeR32G32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2787 {
2788     intToBytes(color.r, bufferOffset, buffer);
2789     intToBytes(color.g, bufferOffset + 4, buffer);
2790 }
2791 
2792 
2793 static inline float4 readR32G32B32_FLOAT(uint bufferOffset, constant uchar *buffer)
2794 {
2795     float4 color;
2796     color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2797     color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
2798     color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8));
2799     color.a = 1.0;
2800     return color;
2801 }
2802 
2803 
2804 template <typename IntType>
2805 static inline float4 readR32G32B32_NORM(uint bufferOffset, constant uchar *buffer)
2806 {
2807     float4 color;
2808     color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
2809     color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4));
2810     color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8));
2811     color.a = 1.0;
2812     return color;
2813 }
2814 
2815 
2816 
2817 
2818 static inline int4 readR32G32B32_SINT(uint bufferOffset, constant uchar *buffer)
2819 {
2820     int4 color;
2821     color.r = bytesToInt<int>(buffer, bufferOffset);
2822     color.g = bytesToInt<int>(buffer, bufferOffset + 4);
2823     color.b = bytesToInt<int>(buffer, bufferOffset + 8);
2824     color.a = 1;
2825     return color;
2826 }
2827 
2828 
2829 static inline float4 readR32G32B32_FIXED(uint bufferOffset, constant uchar *buffer)
2830 {
2831     float4 color;
2832     constexpr float kDivisor = 1.0f / (1 << 16);
2833     color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
2834     color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor;
2835     color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor;
2836     color.a = 1.0;
2837     return color;
2838 }
2839 
2840 
2841 static inline uint4 readR32G32B32_UINT(uint bufferOffset, constant uchar *buffer)
2842 {
2843     uint4 color;
2844     color.r = bytesToInt<uint>(buffer, bufferOffset);
2845     color.g = bytesToInt<uint>(buffer, bufferOffset + 4);
2846     color.b = bytesToInt<uint>(buffer, bufferOffset + 8);
2847     color.a = 1;
2848     return color;
2849 }
2850 
2851 
2852 static inline float4 readR32G32B32A32_FLOAT(uint bufferOffset, constant uchar *buffer)
2853 {
2854     float4 color;
2855     color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2856     color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
2857     color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8));
2858     color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 12));
2859     return color;
2860 }
2861 static inline void writeR32G32B32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2862 {
2863     intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
2864     intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer);
2865     intToBytes(as_type<uint>(color.b), bufferOffset + 8, buffer);
2866     intToBytes(as_type<uint>(color.a), bufferOffset + 12, buffer);
2867 }
2868 
2869 
2870 template <typename IntType>
2871 static inline float4 readR32G32B32A32_NORM(uint bufferOffset, constant uchar *buffer)
2872 {
2873     float4 color;
2874     color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
2875     color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4));
2876     color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8));
2877     color.a = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 12));
2878     return color;
2879 }
2880 
2881 
2882 
2883 
2884 static inline int4 readR32G32B32A32_SINT(uint bufferOffset, constant uchar *buffer)
2885 {
2886     int4 color;
2887     color.r = bytesToInt<int>(buffer, bufferOffset);
2888     color.g = bytesToInt<int>(buffer, bufferOffset + 4);
2889     color.b = bytesToInt<int>(buffer, bufferOffset + 8);
2890     color.a = bytesToInt<int>(buffer, bufferOffset + 12);
2891     return color;
2892 }
2893 static inline void writeR32G32B32A32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2894 {
2895     intToBytes(color.r, bufferOffset, buffer);
2896     intToBytes(color.g, bufferOffset + 4, buffer);
2897     intToBytes(color.b, bufferOffset + 8, buffer);
2898     intToBytes(color.a, bufferOffset + 12, buffer);
2899 }
2900 
2901 static inline float4 readR32G32B32A32_FIXED(uint bufferOffset, constant uchar *buffer)
2902 {
2903     float4 color;
2904     constexpr float kDivisor = 1.0f / (1 << 16);
2905     color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
2906     color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor;
2907     color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor;
2908     color.a = bytesToInt<int>(buffer, bufferOffset + 12) * kDivisor;
2909     return color;
2910 }
2911 
2912 
2913 static inline uint4 readR32G32B32A32_UINT(uint bufferOffset, constant uchar *buffer)
2914 {
2915     uint4 color;
2916     color.r = bytesToInt<uint>(buffer, bufferOffset);
2917     color.g = bytesToInt<uint>(buffer, bufferOffset + 4);
2918     color.b = bytesToInt<uint>(buffer, bufferOffset + 8);
2919     color.a = bytesToInt<uint>(buffer, bufferOffset + 12);
2920     return color;
2921 }
2922 static inline void writeR32G32B32A32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2923 {
2924     intToBytes(color.r, bufferOffset, buffer);
2925     intToBytes(color.g, bufferOffset + 4, buffer);
2926     intToBytes(color.b, bufferOffset + 8, buffer);
2927     intToBytes(color.a, bufferOffset + 12, buffer);
2928 }
2929 # 1320 ".\\copy_buffer.metal"
2930 static inline int4 readR8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8_SINT(bufferOffset, buffer); } static inline uint4 readR8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8_UINT(bufferOffset, buffer); } static inline int4 readR8G8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8A8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8A8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_UINT(bufferOffset, buffer); }
2931 static inline int4 readR16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16_SINT(bufferOffset, buffer); } static inline uint4 readR16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16_UINT(bufferOffset, buffer); } static inline int4 readR16G16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16A16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16A16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_UINT(bufferOffset, buffer); }
2932 static inline int4 readR32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32_SINT(bufferOffset, buffer); } static inline uint4 readR32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32_UINT(bufferOffset, buffer); } static inline int4 readR32G32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32A32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32A32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_UINT(bufferOffset, buffer); }
2933 
2934 static inline int4 readR10G10B10A2_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_SINT(bufferOffset, buffer); } static inline uint4 readR10G10B10A2_USCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_UINT(bufferOffset, buffer); }
2935 
2936 
2937 kernel void readFromBufferToFloatTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<float, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
2938 {
2939     if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; }
2940 # 1372 ".\\copy_buffer.metal"
2941     uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2942 
2943     switch (kCopyFormatType)
2944     {
2945         case FormatID::R5G6B5_UNORM: { auto color = readR5G6B5_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM: { auto color = readR8G8B8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = readR8G8B8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SNORM: { auto color = readR8G8B8A8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM: { auto color = readB8G8R8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = readB8G8R8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM: { auto color = readR8G8B8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = readR8G8B8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SNORM: { auto color = readR8G8B8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8_UNORM: { auto color = readL8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8A8_UNORM: { auto color = readL8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R5G5B5A1_UNORM: { auto color = readR5G5B5A1_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R4G4B4A4_UNORM: { auto color = readR4G4B4A4_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_UNORM: { auto color = readR8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_SNORM: { auto color = readR8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UNORM: { auto color = readR8G8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SNORM: { auto color = readR8G8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_FLOAT: { auto color = readR16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_SNORM: { auto color = readR16_NORM<short>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_UNORM: { auto color = readR16_NORM<ushort>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A16_FLOAT: { auto color = readA16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16_FLOAT: { auto color = readL16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16A16_FLOAT: { auto color = readL16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_FLOAT: { auto color = readR16G16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_SNORM: { auto color = readR16G16_NORM<short>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_UNORM: { auto color = readR16G16_NORM<ushort>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_FLOAT: { auto color = readR16G16B16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_SNORM: { auto color = readR16G16B16_NORM<short>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_UNORM: { auto color = readR16G16B16_NORM<ushort>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = readR16G16B16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_SNORM: { auto color = readR16G16B16A16_NORM<short>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_UNORM: { auto color = readR16G16B16A16_NORM<ushort>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_FLOAT: { auto color = readR32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A32_FLOAT: { auto color = readA32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32_FLOAT: { auto color = readL32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32A32_FLOAT: { auto color = readL32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_FLOAT: { auto color = readR32G32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_FLOAT: { auto color = readR32G32B32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = readR32G32B32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break;
2946     }
2947 
2948 
2949 }
2950 
2951 kernel void readFromBufferToIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<int, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
2952 {
2953     if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; }
2954 # 1400 ".\\copy_buffer.metal"
2955     uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2956 
2957     switch (kCopyFormatType)
2958     {
2959         case FormatID::R8_SINT: { auto color = readR8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SINT: { auto color = readR8G8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SINT: { auto color = readR8G8B8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SINT: { auto color = readR8G8B8A8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_SINT: { auto color = readR16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_SINT: { auto color = readR16G16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_SINT: { auto color = readR16G16B16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_SINT: { auto color = readR16G16B16A16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_SINT: { auto color = readR32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_SINT: { auto color = readR32G32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_SINT: { auto color = readR32G32B32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_SINT: { auto color = readR32G32B32A32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break;
2960     }
2961 
2962 
2963 }
2964 
2965 kernel void readFromBufferToUIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<uint, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
2966 {
2967     if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; }
2968 # 1428 ".\\copy_buffer.metal"
2969     uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2970 
2971     switch (kCopyFormatType)
2972     {
2973         case FormatID::R8_UINT: { auto color = readR8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UINT: { auto color = readR8G8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UINT: { auto color = readR8G8B8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UINT: { auto color = readR8G8B8A8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_UINT: { auto color = readR16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_UINT: { auto color = readR16G16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_UINT: { auto color = readR16G16B16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_UINT: { auto color = readR16G16B16A16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_UINT: { auto color = readR32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_UINT: { auto color = readR32G32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_UINT: { auto color = readR32G32B32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_UINT: { auto color = readR32G32B32A32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break;
2974     }
2975 
2976 
2977 }
2978 
2979 
2980 kernel void writeFromFloatTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<float, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<float, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]])
2981 {
2982     if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; }
2983 # 1481 ".\\copy_buffer.metal"
2984     uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2985 
2986     switch (kCopyFormatType)
2987     {
2988         case FormatID::R5G6B5_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G6B5_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R5G5B5A1_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G5B5A1_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R4G4B4A4_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR4G4B4A4_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_NORM<short>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_NORM<ushort>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_NORM<short>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_NORM<ushort>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_NORM<short>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_NORM<ushort>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break;
2989     }
2990 
2991 
2992 }
2993 
2994 kernel void writeFromIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<int, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<int, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]])
2995 {
2996     if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; }
2997 # 1506 ".\\copy_buffer.metal"
2998     uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2999 
3000     switch (kCopyFormatType)
3001     {
3002         case FormatID::R8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_SINT(gIndices, options, bufferOffset, color, buffer); } break;
3003     }
3004 
3005 
3006 }
3007 
3008 kernel void writeFromUIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<uint, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<uint, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]])
3009 {
3010     if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; }
3011 # 1531 ".\\copy_buffer.metal"
3012     uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
3013 
3014     switch (kCopyFormatType)
3015     {
3016         case FormatID::R8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_UINT(gIndices, options, bufferOffset, color, buffer); } break;
3017     }
3018 
3019 
3020 }
3021 
3022 
3023 struct CopyVertexParams
3024 {
3025     uint srcBufferStartOffset;
3026     uint srcStride;
3027     uint srcComponentBytes;
3028     uint srcComponents;
3029 
3030 
3031 
3032     uchar4 srcDefaultAlphaData;
3033 
3034     uint dstBufferStartOffset;
3035     uint dstStride;
3036     uint dstComponents;
3037 
3038     uint vertexCount;
3039 };
3040 # 1581 ".\\copy_buffer.metal"
3041 template <typename IntType>
3042 static inline void writeFloatVertex(constant CopyVertexParams &options,
3043                                     uint idx,
3044                                     vec<IntType, 4> data,
3045                                     device uchar *dst)
3046 {
3047     uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset;
3048 
3049     for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4)
3050     {
3051         floatToBytes(static_cast<float>(data[component]), dstOffset, dst);
3052     }
3053 }
3054 
3055 template <>
3056 inline void writeFloatVertex(constant CopyVertexParams &options,
3057                              uint idx,
3058                              vec<float, 4> data,
3059                              device uchar *dst)
3060 {
3061     uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset;
3062 
3063     for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4)
3064     {
3065         floatToBytes(data[component], dstOffset, dst);
3066     }
3067 }
3068 
3069 
3070 static inline void convertToFloatVertexFormat(uint index,
3071                                               constant CopyVertexParams &options,
3072                                               constant uchar *srcBuffer,
3073                                               device uchar *dstBuffer)
3074 {
3075 # 1627 ".\\copy_buffer.metal"
3076     uint bufferOffset = options.srcBufferStartOffset + options.srcStride * index;
3077 # 1636 ".\\copy_buffer.metal"
3078     switch (kCopyFormatType)
3079     {
3080         case FormatID::R8_UNORM: { auto data = readR8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SNORM: { auto data = readR8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_UINT: { auto data = readR8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SINT: { auto data = readR8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_USCALED: { auto data = readR8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SSCALED: { auto data = readR8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UNORM: { auto data = readR8G8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SNORM: { auto data = readR8G8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UINT: { auto data = readR8G8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SINT: { auto data = readR8G8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_USCALED: { auto data = readR8G8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SSCALED: { auto data = readR8G8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UNORM: { auto data = readR8G8B8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SNORM: { auto data = readR8G8B8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UINT: { auto data = readR8G8B8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SINT: { auto data = readR8G8B8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_USCALED: { auto data = readR8G8B8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SSCALED: { auto data = readR8G8B8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UNORM: { auto data = readR8G8B8A8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SNORM: { auto data = readR8G8B8A8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UINT: { auto data = readR8G8B8A8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SINT: { auto data = readR8G8B8A8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_USCALED: { auto data = readR8G8B8A8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SSCALED: { auto data = readR8G8B8A8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UNORM: { auto data = readR16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SNORM: { auto data = readR16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UINT: { auto data = readR16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SINT: { auto data = readR16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_USCALED: { auto data = readR16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SSCALED: { auto data = readR16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UNORM: { auto data = readR16G16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SNORM: { auto data = readR16G16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UINT: { auto data = readR16G16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SINT: { auto data = readR16G16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_USCALED: { auto data = readR16G16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SSCALED: { auto data = readR16G16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UNORM: { auto data = readR16G16B16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SNORM: { auto data = readR16G16B16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UINT: { auto data = readR16G16B16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SINT: { auto data = readR16G16B16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_USCALED: { auto data = readR16G16B16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SSCALED: { auto data = readR16G16B16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UNORM: { auto data = readR16G16B16A16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SNORM: { auto data = readR16G16B16A16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UINT: { auto data = readR16G16B16A16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SINT: { auto data = readR16G16B16A16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_USCALED: { auto data = readR16G16B16A16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SSCALED: { auto data = readR16G16B16A16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UNORM: { auto data = readR32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SNORM: { auto data = readR32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UINT: { auto data = readR32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SINT: { auto data = readR32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_USCALED: { auto data = readR32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SSCALED: { auto data = readR32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UNORM: { auto data = readR32G32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SNORM: { auto data = readR32G32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UINT: { auto data = readR32G32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SINT: { auto data = readR32G32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_USCALED: { auto data = readR32G32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SSCALED: { auto data = readR32G32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UNORM: { auto data = readR32G32B32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SNORM: { auto data = readR32G32B32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UINT: { auto data = readR32G32B32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SINT: { auto data = readR32G32B32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_USCALED: { auto data = readR32G32B32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SSCALED: { auto data = readR32G32B32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UNORM: { auto data = readR32G32B32A32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SNORM: { auto data = readR32G32B32A32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UINT: { auto data = readR32G32B32A32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SINT: { auto data = readR32G32B32A32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_USCALED: { auto data = readR32G32B32A32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SSCALED: { auto data = readR32G32B32A32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_FLOAT: { auto data = readR16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_FLOAT: { auto data = readR16G16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_FLOAT: { auto data = readR16G16B16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto data = readR16G16B16A16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FLOAT: { auto data = readR32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FLOAT: { auto data = readR32G32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FLOAT: { auto data = readR32G32B32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto data = readR32G32B32A32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FIXED: { auto data = readR32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FIXED: { auto data = readR32G32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FIXED: { auto data = readR32G32B32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FIXED: { auto data = readR32G32B32A32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SINT: { auto data = readR10G10B10A2_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_UINT: { auto data = readR10G10B10A2_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SSCALED: { auto data = readR10G10B10A2_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_USCALED: { auto data = readR10G10B10A2_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break;
3081     }
3082 
3083 
3084 }
3085 
3086 
3087 kernel void convertToFloatVertexFormatCS(uint index [[thread_position_in_grid]],
3088                                          constant CopyVertexParams &options [[buffer(0)]],
3089                                          constant uchar *srcBuffer [[buffer(1)]],
3090                                          device uchar *dstBuffer [[buffer(2)]])
3091 {
3092     if (index >= options.vertexCount) { return; };
3093     convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer);
3094 }
3095 
3096 
3097 vertex void convertToFloatVertexFormatVS(uint index [[vertex_id]],
3098                                          constant CopyVertexParams &options [[buffer(0)]],
3099                                          constant uchar *srcBuffer [[buffer(1)]],
3100                                          device uchar *dstBuffer [[buffer(2)]])
3101 {
3102     convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer);
3103 }
3104 
3105 
3106 static inline void expandVertexFormatComponents(uint index,
3107                                                 constant CopyVertexParams &options,
3108                                                 constant uchar *srcBuffer,
3109                                                 device uchar *dstBuffer)
3110 {
3111     uint srcOffset = options.srcBufferStartOffset + options.srcStride * index;
3112     uint dstOffset = options.dstBufferStartOffset + options.dstStride * index;
3113 
3114     uint dstComponentsBeforeAlpha = min(options.dstComponents, 3u);
3115     uint component;
3116     for (component = 0; component < options.srcComponents; ++component,
3117         srcOffset += options.srcComponentBytes, dstOffset += options.srcComponentBytes)
3118     {
3119         for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
3120         {
3121             dstBuffer[dstOffset + byte] = srcBuffer[srcOffset + byte];
3122         }
3123     }
3124 
3125     for (; component < dstComponentsBeforeAlpha;
3126          ++component, dstOffset += options.srcComponentBytes)
3127     {
3128         for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
3129         {
3130             dstBuffer[dstOffset + byte] = 0;
3131         }
3132     }
3133 
3134     if (component < options.dstComponents)
3135     {
3136 
3137         for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
3138         {
3139             dstBuffer[dstOffset + byte] = options.srcDefaultAlphaData[byte];
3140         }
3141     }
3142 }
3143 
3144 
3145 kernel void expandVertexFormatComponentsCS(uint index [[thread_position_in_grid]],
3146                                            constant CopyVertexParams &options [[buffer(0)]],
3147                                            constant uchar *srcBuffer [[buffer(1)]],
3148                                            device uchar *dstBuffer [[buffer(2)]])
3149 {
3150     if (index >= options.vertexCount) { return; };
3151 
3152     expandVertexFormatComponents(index, options, srcBuffer, dstBuffer);
3153 }
3154 
3155 
3156 vertex void expandVertexFormatComponentsVS(uint index [[vertex_id]],
3157                                            constant CopyVertexParams &options [[buffer(0)]],
3158                                            constant uchar *srcBuffer [[buffer(1)]],
3159                                            device uchar *dstBuffer [[buffer(2)]])
3160 {
3161     expandVertexFormatComponents(index, options, srcBuffer, dstBuffer);
3162 }
3163 
3164 
3165 kernel void linearizeBlocks(ushort2 position [[thread_position_in_grid]],
3166                             constant uint2 *dimensions [[buffer(0)]],
3167                             constant uint2 *srcBuffer [[buffer(1)]],
3168                             device uint2 *dstBuffer [[buffer(2)]])
3169 {
3170     if (any(uint2(position) >= *dimensions))
3171     {
3172         return;
3173     }
3174     uint2 t = uint2(position);
3175     t = (t | (t << 8)) & 0x00FF00FF;
3176     t = (t | (t << 4)) & 0x0F0F0F0F;
3177     t = (t | (t << 2)) & 0x33333333;
3178     t = (t | (t << 1)) & 0x55555555;
3179     dstBuffer[position.y * (*dimensions).x + position.x] = srcBuffer[(t.x << 1) | t.y];
3180 }
3181 
3182 
3183 kernel void saturateDepth(uint2 position [[thread_position_in_grid]],
3184                           constant uint3 *dimensions [[buffer(0)]],
3185                           device float *srcBuffer [[buffer(1)]],
3186                           device float *dstBuffer [[buffer(2)]])
3187 {
3188     if (any(position >= (*dimensions).xy))
3189     {
3190         return;
3191     }
3192     const uint srcOffset = position.y * (*dimensions).z + position.x;
3193     const uint dstOffset = position.y * (*dimensions).x + position.x;
3194     dstBuffer[dstOffset] = saturate(srcBuffer[srcOffset]);
3195 }
3196 # 6 "temp_master_source.metal" 2
3197 # 1 ".\\visibility.metal" 1
3198 
3199 
3200 
3201 
3202 
3203 
3204 
3205 
3206 constant bool kCombineWithExistingResult [[function_constant(1000)]];
3207 
3208 
3209 
3210 struct CombineVisibilityResultOptions
3211 {
3212 
3213     uint startOffset;
3214 
3215     uint numOffsets;
3216 };
3217 
3218 kernel void combineVisibilityResult(uint idx [[thread_position_in_grid]],
3219                                     constant CombineVisibilityResultOptions &options [[buffer(0)]],
3220                                     constant ushort4 *renderpassVisibilityResult [[buffer(1)]],
3221                                     device ushort4 *finalResults [[buffer(2)]])
3222 {
3223     if (idx > 0)
3224     {
3225 
3226 
3227 
3228         return;
3229     }
3230     ushort4 finalResult16x4;
3231 
3232     if (kCombineWithExistingResult)
3233     {
3234         finalResult16x4 = finalResults[0];
3235     }
3236     else
3237     {
3238         finalResult16x4 = ushort4(0, 0, 0, 0);
3239     }
3240 
3241     for (uint i = 0; i < options.numOffsets; ++i)
3242     {
3243         uint offset = options.startOffset + i;
3244         ushort4 renderpassResult = renderpassVisibilityResult[offset];
3245 
3246 
3247         finalResult16x4 = finalResult16x4 | renderpassResult;
3248     }
3249     finalResults[0] = finalResult16x4;
3250 }
3251 # 7 "temp_master_source.metal" 2
3252 # 1 ".\\rewrite_indices.metal" 1
3253 # 11 ".\\rewrite_indices.metal"
3254 # 1 ".\\rewrite_indices_shared.h" 1
3255 # 12 ".\\rewrite_indices.metal" 2
3256 using namespace metal;
3257 
3258 constant uint fixIndexBufferKey [[ function_constant(2000) ]];
3259 constant bool indexBufferIsUint16 = (((fixIndexBufferKey >> 0U) & 0x03U) == 2U);
3260 constant bool indexBufferIsUint32 = (((fixIndexBufferKey >> 0U) & 0x03U) == 3U);
3261 constant bool outIndexBufferIsUint16 = (((fixIndexBufferKey >> 2U) & 0x03U) == 2U);
3262 constant bool outIndexBufferIsUint32 = (((fixIndexBufferKey >> 2U) & 0x03U) == 3U);
3263 constant bool doPrimRestart = (fixIndexBufferKey & 0x00100U);
3264 constant uint fixIndexBufferMode = (fixIndexBufferKey >> 4U) & 0x0FU;
3265 
3266 
3267 static inline uint readIdx(
3268                            const device ushort *indexBufferUint16,
3269                            const device uint *indexBufferUint32,
3270                            const uint restartIndex,
3271                            const uint indexCount,
3272                            uint idx,
3273                            thread bool &foundRestart,
3274                            thread uint &indexThatRestartedFirst
3275                            )
3276 {
3277     uint inIndex = idx;
3278     if(inIndex < indexCount)
3279     {
3280         if(indexBufferIsUint16)
3281         {
3282             inIndex = indexBufferUint16[inIndex];
3283         }
3284         else if(indexBufferIsUint32)
3285         {
3286             inIndex = indexBufferUint32[inIndex];
3287         }
3288     }
3289     else
3290     {
3291         foundRestart = true;
3292         indexThatRestartedFirst = idx;
3293     }
3294     if(doPrimRestart && !foundRestart && inIndex == restartIndex)
3295     {
3296         foundRestart = true;
3297         indexThatRestartedFirst = idx;
3298     }
3299     return inIndex;
3300 }
3301 
3302 static inline void outputPrimitive(
3303                                    const device ushort *indexBufferUint16,
3304                                    const device uint *indexBufferUint32,
3305                                    device ushort *outIndexBufferUint16,
3306                                    device uint *outIndexBufferUint32,
3307                                    const uint restartIndex,
3308                                    const uint indexCount,
3309                                    thread uint &baseIndex,
3310                                    uint onIndex,
3311                                    thread uint &onOutIndex
3312                                    )
3313 {
3314     if(baseIndex > onIndex) return;
3315     bool foundRestart = false;
3316     uint indexThatRestartedFirst = 0;
3317 # 86 ".\\rewrite_indices.metal"
3318     switch(fixIndexBufferMode)
3319     {
3320         case 0x00U:
3321         {
3322             auto tmpIndex = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex, foundRestart, indexThatRestartedFirst);
3323             if(foundRestart)
3324             {
3325                 baseIndex = indexThatRestartedFirst + 1;
3326                 return;
3327             }
3328 
3329             ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex; } onOutIndex++; });
3330         }
3331         break;
3332         case 0x01U:
3333         {
3334             auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst);
3335             auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst);
3336             if(foundRestart)
3337             {
3338                 baseIndex = indexThatRestartedFirst + 1;
3339                 return;
3340             }
3341             if((onIndex - baseIndex) & 1) return;
3342 
3343             if(fixIndexBufferKey & 0x00200U)
3344             {
3345                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3346                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3347             }
3348             else
3349             {
3350                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3351                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3352             }
3353         }
3354         break;
3355         case 0x03U:
3356         {
3357             auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst);
3358             auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst);
3359             if(foundRestart)
3360             {
3361                 baseIndex = indexThatRestartedFirst + 1;
3362                 return;
3363             }
3364 
3365             if(fixIndexBufferKey & 0x00200U)
3366             {
3367                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3368                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3369             }
3370             else
3371             {
3372                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3373                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3374             }
3375         }
3376         break;
3377         case 0x04U:
3378         {
3379             auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst);
3380             auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst);
3381             auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst);
3382             if(foundRestart)
3383             {
3384                 baseIndex = indexThatRestartedFirst + 1;
3385                 return;
3386             }
3387             if(((onIndex - baseIndex) % 3) != 0) return;
3388 
3389             if(fixIndexBufferKey & 0x00200U)
3390             {
3391                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
3392                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3393                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3394             }
3395             else
3396             {
3397                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3398                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3399                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
3400             }
3401         }
3402         break;
3403         case 0x05U:
3404         {
3405             uint isOdd = ((onIndex - baseIndex) & 1);
3406             auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0 + isOdd, foundRestart, indexThatRestartedFirst);
3407             auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1 - isOdd, foundRestart, indexThatRestartedFirst);
3408             auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst);
3409             if(foundRestart)
3410             {
3411                 baseIndex = indexThatRestartedFirst + 1;
3412                 return;
3413             }
3414 
3415             if(fixIndexBufferKey & 0x00200U)
3416             {
3417                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
3418                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3419                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3420             }
3421             else
3422             {
3423 
3424                 if(isOdd)
3425                 {
3426                     ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3427                     ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
3428                     ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3429                 }
3430                 else
3431                 {
3432                     ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3433                     ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3434                     ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
3435                 }
3436             }
3437 
3438             assert(onOutIndex <= (onIndex + 1) * 3);
3439             assert(onOutIndex <= (indexCount - 2) * 3);
3440         }
3441         break;
3442 
3443     }
3444 
3445 
3446 }
3447 
3448 kernel void fixIndexBuffer(
3449                            const device ushort *indexBufferUint16 [[ buffer(0), function_constant(indexBufferIsUint16) ]],
3450                            const device uint *indexBufferUint32 [[ buffer(0), function_constant(indexBufferIsUint32) ]],
3451                            device ushort *outIndexBufferUint16 [[ buffer(1), function_constant(outIndexBufferIsUint16) ]],
3452                            device uint *outIndexBufferUint32 [[ buffer(1), function_constant(outIndexBufferIsUint32) ]],
3453                            constant uint &indexCount [[ buffer(2) ]],
3454                            constant uint &primCount [[ buffer(3) ]],
3455                            uint prim [[thread_position_in_grid]])
3456 {
3457     constexpr uint restartIndex = 0xFFFFFFFF;
3458     uint baseIndex = 0;
3459     uint onIndex = onIndex;
3460     uint onOutIndex = onOutIndex;
3461     if(prim < primCount)
3462     {
3463         switch(fixIndexBufferMode)
3464         {
3465             case 0x00U:
3466                 onIndex = prim;
3467                 onOutIndex = prim;
3468                 break;
3469             case 0x01U:
3470                 onIndex = prim * 2;
3471                 onOutIndex = prim * 2;
3472                 break;
3473             case 0x03U:
3474                 onIndex = prim;
3475                 onOutIndex = prim * 2;
3476                 break;
3477             case 0x04U:
3478                 onIndex = prim * 3;
3479                 onOutIndex = prim * 3;
3480                 break;
3481             case 0x05U:
3482                 onIndex = prim;
3483                 onOutIndex = prim * 3;
3484                 break;
3485         }
3486         outputPrimitive(indexBufferUint16, indexBufferUint32, outIndexBufferUint16, outIndexBufferUint32, restartIndex, indexCount, baseIndex, onIndex, onOutIndex);
3487     }
3488 }
3489 
3490 
3491 
3492 static inline void generatePrimitive(
3493                                    device ushort *outIndexBufferUint16,
3494                                    device uint *outIndexBufferUint32,
3495                                    const uint firstVertex,
3496                                    const uint indexCount,
3497                                    thread uint &baseIndex,
3498                                    uint onIndex,
3499                                    uint primCount,
3500                                    thread uint &onOutIndex
3501                                    )
3502 {
3503     if(baseIndex > onIndex) return;
3504 # 284 ".\\rewrite_indices.metal"
3505     switch(fixIndexBufferMode)
3506     {
3507         case 0x00U:
3508         {
3509             ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = onIndex + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = onIndex + firstVertex; } onOutIndex++; });
3510         }
3511         break;
3512         case 0x01U:
3513         {
3514             auto tmpIndex0 = onIndex + 0;
3515             auto tmpIndex1 = onIndex + 1;
3516             if(fixIndexBufferKey & 0x00200U)
3517             {
3518                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3519                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3520             }
3521             else
3522             {
3523                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3524                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3525             }
3526         }
3527         break;
3528         case 0x02U:
3529         {
3530             auto tmpIndex0 = onIndex + 0;
3531             auto tmpIndex1 = (onIndex + 1) % primCount;
3532             if(fixIndexBufferKey & 0x00200U)
3533             {
3534                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3535                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3536             }
3537             else
3538             {
3539                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3540                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3541             }
3542         }
3543         break;
3544         case 0x03U:
3545         {
3546             auto tmpIndex0 = onIndex + 0;
3547             auto tmpIndex1 = onIndex + 1;
3548             if(fixIndexBufferKey & 0x00200U)
3549             {
3550                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3551                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3552             }
3553             else
3554             {
3555                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3556                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3557             }
3558         }
3559         break;
3560         case 0x04U:
3561         {
3562             auto tmpIndex0 = onIndex + 0;
3563             auto tmpIndex1 = onIndex + 1;
3564             auto tmpIndex2 = onIndex + 2;
3565             if(fixIndexBufferKey & 0x00200U)
3566             {
3567                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3568                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3569                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3570             }
3571             else
3572             {
3573                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3574                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3575                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3576             }
3577         }
3578         break;
3579         case 0x05U:
3580         {
3581             uint isOdd = ((onIndex - baseIndex) & 1);
3582             auto tmpIndex0 = onIndex + 0 + isOdd;
3583             auto tmpIndex1 = onIndex + 1 - isOdd;
3584             auto tmpIndex2 = onIndex + 2;
3585             if(fixIndexBufferKey & 0x00200U)
3586             {
3587                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3588                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3589                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3590             }
3591             else
3592             {
3593                 if(isOdd)
3594                 {
3595                     ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3596                     ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3597                     ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3598                 }
3599                 else
3600                 {
3601                     ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3602                     ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3603                     ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3604                 }
3605             }
3606 
3607             assert(onOutIndex <= (onIndex + 1) * 3);
3608             assert(onOutIndex <= (indexCount - 2) * 3);
3609             break;
3610         }
3611         case 0x06U:
3612         {
3613             auto tmpIndex0 = 0;
3614             auto tmpIndex1 = onIndex + 1;
3615             auto tmpIndex2 = onIndex + 2;
3616 
3617             if(fixIndexBufferKey & 0x00200U)
3618             {
3619                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3620                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3621                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3622             }
3623             else
3624             {
3625                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3626                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3627                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3628             }
3629         }
3630         break;
3631 
3632     }
3633 
3634 }
3635 
3636 
3637 
3638 kernel void genIndexBuffer(
3639                            device ushort *outIndexBufferUint16 [[ buffer(1), function_constant(outIndexBufferIsUint16) ]],
3640                            device uint *outIndexBufferUint32 [[ buffer(1), function_constant(outIndexBufferIsUint32) ]],
3641                            constant uint &indexCount [[ buffer(2) ]],
3642                            constant uint &primCount [[ buffer(3) ]],
3643                            constant uint &firstVertex [[ buffer(4) ]],
3644                            uint prim [[thread_position_in_grid]])
3645 {
3646     uint baseIndex = 0;
3647     uint onIndex = onIndex;
3648     uint onOutIndex = onOutIndex;
3649     if(prim < primCount)
3650     {
3651         switch(fixIndexBufferMode)
3652         {
3653             case 0x00U:
3654                 onIndex = prim;
3655                 onOutIndex = prim;
3656                 break;
3657             case 0x01U:
3658                 onIndex = prim * 2;
3659                 onOutIndex = prim * 2;
3660                 break;
3661             case 0x03U:
3662                 onIndex = prim;
3663                 onOutIndex = prim * 2;
3664                 break;
3665             case 0x02U:
3666                 onIndex = prim;
3667                 onOutIndex = prim * 2;
3668                 break;
3669             case 0x04U:
3670                 onIndex = prim * 3;
3671                 onOutIndex = prim * 3;
3672                 break;
3673             case 0x05U:
3674                 onIndex = prim;
3675                 onOutIndex = prim * 3;
3676                 break;
3677             case 0x06U:
3678                 onIndex = prim;
3679                 onOutIndex = prim * 3;
3680                 break;
3681         }
3682         generatePrimitive(outIndexBufferUint16, outIndexBufferUint32, firstVertex, indexCount, baseIndex, onIndex, primCount, onOutIndex);
3683     }
3684 }
3685 # 8 "temp_master_source.metal" 2
3686 
3687 
3688 )";
3689