1 // GENERATED FILE - DO NOT EDIT. 2 // Generated by gen_mtl_internal_shaders.py 3 // 4 // Copyright 2020 The ANGLE Project Authors. All rights reserved. 5 // Use of this source code is governed by a BSD-style license that can be 6 // found in the LICENSE file. 7 // 8 9 // C++ string version of combined Metal default shaders. 10 11 static char gDefaultMetallibSrc[] = R"( 12 # 1 "temp_master_source.metal" 13 # 1 "<built-in>" 1 14 # 1 "<built-in>" 3 15 # 435 "<built-in>" 3 16 # 1 "<command line>" 1 17 # 1 "<built-in>" 2 18 # 1 "temp_master_source.metal" 2 19 # 1 ".\\blit.metal" 1 20 21 22 23 24 25 26 27 # 1 ".\\common.h" 1 28 # 13 ".\\common.h" 29 # include <simd/simd.h> 30 # include <metal_stdlib> 31 32 33 # 1 ".\\constants.h" 1 34 # 11 ".\\constants.h" 35 namespace rx 36 { 37 namespace mtl_shader 38 { 39 40 enum 41 { 42 kTextureType2D = 0, 43 kTextureType2DMultisample = 1, 44 kTextureType2DArray = 2, 45 kTextureTypeCube = 3, 46 kTextureType3D = 4, 47 kTextureTypeCount = 5, 48 }; 49 50 51 52 53 } 54 } 55 # 18 ".\\common.h" 2 56 57 58 59 60 61 62 63 using namespace metal; 64 65 66 67 constant uint32_t kNumColorOutputs [[function_constant(0)]]; 68 constant bool kColorOutputAvailable0 = kNumColorOutputs > 0; 69 constant bool kColorOutputAvailable1 = kNumColorOutputs > 1; 70 constant bool kColorOutputAvailable2 = kNumColorOutputs > 2; 71 constant bool kColorOutputAvailable3 = kNumColorOutputs > 3; 72 constant bool kColorOutputAvailable4 = kNumColorOutputs > 4; 73 constant bool kColorOutputAvailable5 = kNumColorOutputs > 5; 74 constant bool kColorOutputAvailable6 = kNumColorOutputs > 6; 75 constant bool kColorOutputAvailable7 = kNumColorOutputs > 7; 76 77 namespace rx 78 { 79 namespace mtl_shader 80 { 81 82 83 constant float2 gCorners[3] = {float2(-1.0f, -1.0f), float2(3.0f, -1.0f), float2(-1.0f, 3.0f)}; 84 85 template <typename T> 86 struct MultipleColorOutputs 87 { 88 vec<T, 4> color0 [[color(0), function_constant(kColorOutputAvailable0)]]; 89 vec<T, 4> color1 [[color(1), function_constant(kColorOutputAvailable1)]]; 90 vec<T, 4> color2 [[color(2), function_constant(kColorOutputAvailable2)]]; 91 vec<T, 4> color3 [[color(3), function_constant(kColorOutputAvailable3)]]; 92 vec<T, 4> color4 [[color(4), function_constant(kColorOutputAvailable4)]]; 93 vec<T, 4> color5 [[color(5), function_constant(kColorOutputAvailable5)]]; 94 vec<T, 4> color6 [[color(6), function_constant(kColorOutputAvailable6)]]; 95 vec<T, 4> color7 [[color(7), function_constant(kColorOutputAvailable7)]]; 96 }; 97 # 69 ".\\common.h" 98 template <typename T> 99 static inline MultipleColorOutputs<T> toMultipleColorOutputs(vec<T, 4> color) 100 { 101 MultipleColorOutputs<T> re; 102 103 do { if (kColorOutputAvailable0) { re.color0 = color; } } while (0); 104 do { if (kColorOutputAvailable1) { re.color1 = color; } } while (0); 105 do { if (kColorOutputAvailable2) { re.color2 = color; } } while (0); 106 do { if (kColorOutputAvailable3) { re.color3 = color; } } while (0); 107 do { if (kColorOutputAvailable4) { re.color4 = color; } } while (0); 108 do { if (kColorOutputAvailable5) { re.color5 = color; } } while (0); 109 do { if (kColorOutputAvailable6) { re.color6 = color; } } while (0); 110 do { if (kColorOutputAvailable7) { re.color7 = color; } } while (0); 111 112 return re; 113 } 114 115 static inline float3 cubeTexcoords(float2 texcoords, int face) 116 { 117 texcoords = 2.0 * texcoords - 1.0; 118 switch (face) 119 { 120 case 0: 121 return float3(1.0, -texcoords.y, -texcoords.x); 122 case 1: 123 return float3(-1.0, -texcoords.y, texcoords.x); 124 case 2: 125 return float3(texcoords.x, 1.0, texcoords.y); 126 case 3: 127 return float3(texcoords.x, -1.0, -texcoords.y); 128 case 4: 129 return float3(texcoords.x, -texcoords.y, 1.0); 130 case 5: 131 return float3(-texcoords.x, -texcoords.y, -1.0); 132 } 133 return float3(texcoords, 0); 134 } 135 136 template <typename T> 137 static inline vec<T, 4> resolveTextureMS(texture2d_ms<T> srcTexture, uint2 coords) 138 { 139 uint samples = srcTexture.get_num_samples(); 140 141 vec<T, 4> output(0); 142 143 for (uint sample = 0; sample < samples; ++sample) 144 { 145 output += srcTexture.read(coords, sample); 146 } 147 148 output = output / samples; 149 150 return output; 151 } 152 153 static inline float4 sRGBtoLinear(float4 color) 154 { 155 float3 linear1 = color.rgb / 12.92; 156 float3 linear2 = powr((color.rgb + float3(0.055)) / 1.055, 2.4); 157 float3 factor = float3(color.rgb <= float3(0.04045)); 158 float4 linear = float4(factor * linear1 + float3(1.0 - factor) * linear2, color.a); 159 160 return linear; 161 } 162 163 static inline float linearToSRGB(float color) 164 { 165 if (color <= 0.0f) 166 return 0.0f; 167 if (color < 0.0031308f) 168 return 12.92f * color; 169 if (color < 1.0f) 170 return 1.055f * powr(color, 0.41666f) - 0.055f; 171 return 1.0f; 172 } 173 174 static inline float4 linearToSRGB(float4 color) 175 { 176 return float4(linearToSRGB(color.r), linearToSRGB(color.g), linearToSRGB(color.b), color.a); 177 } 178 179 template <typename Short> 180 static inline Short bytesToShort(constant uchar *input, uint offset) 181 { 182 Short inputLo = input[offset]; 183 Short inputHi = input[offset + 1]; 184 185 return inputLo | (inputHi << 8); 186 } 187 188 template <typename Int> 189 static inline Int bytesToInt(constant uchar *input, uint offset) 190 { 191 Int input0 = input[offset]; 192 Int input1 = input[offset + 1]; 193 Int input2 = input[offset + 2]; 194 Int input3 = input[offset + 3]; 195 196 return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24); 197 } 198 199 template <typename Short> 200 static inline void shortToBytes(Short val, uint offset, device uchar *output) 201 { 202 ushort valUnsigned = as_type<ushort>(val); 203 output[offset] = valUnsigned & 0xff; 204 output[offset + 1] = (valUnsigned >> 8) & 0xff; 205 } 206 207 template <typename Int> 208 static inline void intToBytes(Int val, uint offset, device uchar *output) 209 { 210 uint valUnsigned = as_type<uint>(val); 211 output[offset] = valUnsigned & 0xff; 212 output[offset + 1] = (valUnsigned >> 8) & 0xff; 213 output[offset + 2] = (valUnsigned >> 16) & 0xff; 214 output[offset + 3] = (valUnsigned >> 24) & 0xff; 215 } 216 217 static inline void floatToBytes(float val, uint offset, device uchar *output) 218 { 219 intToBytes(as_type<uint>(val), offset, output); 220 } 221 222 static inline void int24bitToBytes(uint val, uint offset, device uchar *output) 223 { 224 output[offset] = val & 0xff; 225 output[offset + 1] = (val >> 8) & 0xff; 226 output[offset + 2] = (val >> 16) & 0xff; 227 } 228 229 template <unsigned int inputBitCount, unsigned int inputBitStart, typename T> 230 static inline T getShiftedData(T input) 231 { 232 static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8), 233 "T must have at least as many bits as inputBitCount + inputBitStart."); 234 const T mask = (1 << inputBitCount) - 1; 235 return (input >> inputBitStart) & mask; 236 } 237 238 template <unsigned int inputBitCount, unsigned int inputBitStart, typename T> 239 static inline T shiftData(T input) 240 { 241 static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8), 242 "T must have at least as many bits as inputBitCount + inputBitStart."); 243 const T mask = (1 << inputBitCount) - 1; 244 return (input & mask) << inputBitStart; 245 } 246 247 template <unsigned int inputBitCount, typename T> 248 static inline float normalizedToFloat(T input) 249 { 250 static_assert(inputBitCount <= (sizeof(T) * 8), 251 "T must have more bits than or same bits as inputBitCount."); 252 static_assert(inputBitCount <= 23, "Only single precision is supported"); 253 254 constexpr float inverseMax = 1.0f / ((1 << inputBitCount) - 1); 255 return input * inverseMax; 256 } 257 258 template <typename T> 259 static inline float normalizedToFloat(T input) 260 { 261 return normalizedToFloat<sizeof(T) * 8, T>(input); 262 } 263 264 template <> 265 inline float normalizedToFloat(short input) 266 { 267 constexpr float inverseMax = 1.0f / 0x7fff; 268 return static_cast<float>(input) * inverseMax; 269 } 270 271 template <> 272 inline float normalizedToFloat(int input) 273 { 274 constexpr float inverseMax = 1.0f / 0x7fffffff; 275 return static_cast<float>(input) * inverseMax; 276 } 277 278 template <> 279 inline float normalizedToFloat(uint input) 280 { 281 constexpr float inverseMax = 1.0f / 0xffffffff; 282 return static_cast<float>(input) * inverseMax; 283 } 284 285 template <unsigned int outputBitCount, typename T> 286 static inline T floatToNormalized(float input) 287 { 288 static_assert(outputBitCount <= (sizeof(T) * 8), 289 "T must have more bits than or same bits as inputBitCount."); 290 static_assert(outputBitCount > (metal::is_unsigned<T>::value ? 0 : 1), 291 "outputBitCount must be at least 1 not counting the sign bit."); 292 constexpr unsigned int bits = 293 metal::is_unsigned<T>::value ? outputBitCount : outputBitCount - 1; 294 static_assert(bits <= 23, "Only single precision is supported"); 295 296 return static_cast<T>(metal::round(((1 << bits) - 1) * input)); 297 } 298 299 template <typename T> 300 static inline T floatToNormalized(float input) 301 { 302 return floatToNormalized<sizeof(T) * 8, T>(input); 303 } 304 305 } 306 } 307 # 9 ".\\blit.metal" 2 308 309 using namespace rx::mtl_shader; 310 311 312 constant bool kPremultiplyAlpha [[function_constant(1)]]; 313 constant bool kUnmultiplyAlpha [[function_constant(2)]]; 314 constant bool kTransformLinearToSrgb [[function_constant(3)]]; 315 constant int kSourceTextureType [[function_constant(4)]]; 316 constant int kSourceTexture2Type [[function_constant(5)]]; 317 318 constant bool kSourceTextureType2D = kSourceTextureType == kTextureType2D; 319 constant bool kSourceTextureType2DArray = kSourceTextureType == kTextureType2DArray; 320 constant bool kSourceTextureType2DMS = kSourceTextureType == kTextureType2DMultisample; 321 constant bool kSourceTextureTypeCube = kSourceTextureType == kTextureTypeCube; 322 constant bool kSourceTextureType3D = kSourceTextureType == kTextureType3D; 323 324 constant bool kSourceTexture2Type2D = kSourceTexture2Type == kTextureType2D; 325 constant bool kSourceTexture2Type2DArray = kSourceTexture2Type == kTextureType2DArray; 326 constant bool kSourceTexture2Type2DMS = kSourceTexture2Type == kTextureType2DMultisample; 327 constant bool kSourceTexture2TypeCube = kSourceTexture2Type == kTextureTypeCube; 328 329 struct BlitParams 330 { 331 332 float4 srcTexCoords; 333 int srcLevel; 334 int srcLayer; 335 bool dstLuminance; 336 uint8_t padding[7]; 337 }; 338 339 struct BlitVSOut 340 { 341 float4 position [[position]]; 342 float2 texCoords [[center_no_perspective, user(locn1)]]; 343 }; 344 345 vertex BlitVSOut blitVS(unsigned int vid [[vertex_id]], constant BlitParams &options [[buffer(0)]]) 346 { 347 BlitVSOut output; 348 output.position.xy = select(float2(-1.0f), float2(1.0f), bool2(vid & uint2(2, 1))); 349 output.position.zw = float2(0.0, 1.0); 350 output.texCoords = select(options.srcTexCoords.xy, options.srcTexCoords.zw, bool2(vid & uint2(2, 1))); 351 352 return output; 353 } 354 355 template <typename SrcTexture2d> 356 static uint2 getImageCoords(SrcTexture2d srcTexture, float2 texCoords) 357 { 358 uint2 dimens(srcTexture.get_width(), srcTexture.get_height()); 359 uint2 coords = uint2(texCoords * float2(dimens)); 360 361 return coords; 362 } 363 364 template <typename T> 365 static inline vec<T, 4> blitSampleTextureMS(texture2d_ms<T> srcTexture, float2 texCoords) 366 { 367 uint2 coords = getImageCoords(srcTexture, texCoords); 368 return resolveTextureMS(srcTexture, coords); 369 } 370 371 template <typename T> 372 static inline vec<T, 4> blitSampleTexture3D(texture3d<T> srcTexture, 373 sampler textureSampler, 374 float2 texCoords, 375 constant BlitParams &options) 376 { 377 uint depth = srcTexture.get_depth(options.srcLevel); 378 float zCoord = (float(options.srcLayer) + 0.5) / float(depth); 379 380 return srcTexture.sample(textureSampler, float3(texCoords, zCoord), level(options.srcLevel)); 381 } 382 # 101 ".\\blit.metal" 383 template <typename T> 384 static inline vec<T, 4> blitReadTexture(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 385 { 386 vec<T, 4> output; 387 388 switch (kSourceTextureType) 389 { 390 case kTextureType2D: 391 output = srcTexture2d.sample(textureSampler, input.texCoords, level(options.srcLevel)); 392 break; 393 case kTextureType2DArray: 394 output = srcTexture2dArray.sample(textureSampler, input.texCoords, options.srcLayer, 395 level(options.srcLevel)); 396 break; 397 case kTextureType2DMultisample: 398 output = blitSampleTextureMS(srcTexture2dMS, input.texCoords); 399 break; 400 case kTextureTypeCube: 401 output = srcTextureCube.sample(textureSampler, 402 cubeTexcoords(input.texCoords, options.srcLayer), 403 level(options.srcLevel)); 404 break; 405 case kTextureType3D: 406 output = blitSampleTexture3D(srcTexture3d, textureSampler, input.texCoords, options); 407 break; 408 } 409 410 if (kTransformLinearToSrgb) { 411 output.x = linearToSRGB(output.x); 412 output.y = linearToSRGB(output.y); 413 output.z = linearToSRGB(output.z); 414 } 415 if (kUnmultiplyAlpha) 416 { 417 if (output.a != 0.0) 418 { 419 output.xyz /= output.a; 420 } 421 } 422 if (kPremultiplyAlpha) 423 { 424 output.xyz *= output.a; 425 } 426 427 if (options.dstLuminance) 428 { 429 output.g = output.b = output.r; 430 } 431 432 return output; 433 } 434 435 template <typename T> 436 static inline MultipleColorOutputs<T> blitFS(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 437 { 438 vec<T, 4> output = blitReadTexture(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 439 440 return toMultipleColorOutputs(output); 441 } 442 443 fragment MultipleColorOutputs<float> blitFloatFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 444 { 445 return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 446 } 447 fragment MultipleColorOutputs<int> blitIntFS(BlitVSOut input [[stage_in]], texture2d<int> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<int> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<int> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<int> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<int> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 448 { 449 return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 450 } 451 fragment MultipleColorOutputs<uint> blitUIntFS(BlitVSOut input [[stage_in]], texture2d<uint> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<uint> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<uint> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<uint> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<uint> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 452 { 453 return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 454 } 455 456 fragment MultipleColorOutputs<uint> copyTextureFloatToUIntFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 457 { 458 float4 inputColor = blitReadTexture<>(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 459 uint4 output = uint4(inputColor * float4(255.0)); 460 461 return toMultipleColorOutputs(output); 462 } 463 464 465 struct FragmentDepthOut 466 { 467 float depth [[depth(any)]]; 468 }; 469 470 static inline float sampleDepth( 471 texture2d<float> srcTexture2d [[function_constant(kSourceTextureType2D)]], 472 texture2d_array<float> srcTexture2dArray [[function_constant(kSourceTextureType2DArray)]], 473 texture2d_ms<float> srcTexture2dMS [[function_constant(kSourceTextureType2DMS)]], 474 texturecube<float> srcTextureCube [[function_constant(kSourceTextureTypeCube)]], 475 float2 texCoords, 476 constant BlitParams &options) 477 { 478 float4 output; 479 480 constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest); 481 482 switch (kSourceTextureType) 483 { 484 case kTextureType2D: 485 output = srcTexture2d.sample(textureSampler, texCoords, level(options.srcLevel)); 486 break; 487 case kTextureType2DArray: 488 output = srcTexture2dArray.sample(textureSampler, texCoords, options.srcLayer, 489 level(options.srcLevel)); 490 break; 491 case kTextureType2DMultisample: 492 493 output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0); 494 break; 495 case kTextureTypeCube: 496 output = 497 srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, options.srcLayer), 498 level(options.srcLevel)); 499 break; 500 } 501 502 return output.r; 503 } 504 505 fragment FragmentDepthOut blitDepthFS(BlitVSOut input [[stage_in]], 506 texture2d<float> srcTexture2d 507 [[texture(0), function_constant(kSourceTextureType2D)]], 508 texture2d_array<float> srcTexture2dArray 509 [[texture(0), function_constant(kSourceTextureType2DArray)]], 510 texture2d_ms<float> srcTexture2dMS 511 [[texture(0), function_constant(kSourceTextureType2DMS)]], 512 texturecube<float> srcTextureCube 513 [[texture(0), function_constant(kSourceTextureTypeCube)]], 514 constant BlitParams &options [[buffer(0)]]) 515 { 516 FragmentDepthOut re; 517 518 re.depth = sampleDepth(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, 519 input.texCoords, options); 520 521 return re; 522 } 523 524 static inline uint32_t sampleStencil( 525 texture2d<uint32_t> srcTexture2d [[function_constant(kSourceTexture2Type2D)]], 526 texture2d_array<uint32_t> srcTexture2dArray [[function_constant(kSourceTexture2Type2DArray)]], 527 texture2d_ms<uint32_t> srcTexture2dMS [[function_constant(kSourceTexture2Type2DMS)]], 528 texturecube<uint32_t> srcTextureCube [[function_constant(kSourceTexture2TypeCube)]], 529 float2 texCoords, 530 int srcLevel, 531 int srcLayer) 532 { 533 uint4 output; 534 constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest); 535 536 switch (kSourceTexture2Type) 537 { 538 case kTextureType2D: 539 output = srcTexture2d.sample(textureSampler, texCoords, level(srcLevel)); 540 break; 541 case kTextureType2DArray: 542 output = srcTexture2dArray.sample(textureSampler, texCoords, srcLayer, level(srcLevel)); 543 break; 544 case kTextureType2DMultisample: 545 546 output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0); 547 break; 548 case kTextureTypeCube: 549 output = srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, srcLayer), 550 level(srcLevel)); 551 break; 552 } 553 554 return output.r; 555 } 556 557 558 struct BlitStencilToBufferParams 559 { 560 float2 srcStartTexCoords; 561 float2 srcTexCoordSteps; 562 int srcLevel; 563 int srcLayer; 564 565 uint2 dstSize; 566 uint dstBufferRowPitch; 567 568 bool resolveMS; 569 }; 570 571 kernel void blitStencilToBufferCS(ushort2 gIndices [[thread_position_in_grid]], 572 texture2d<uint32_t> srcTexture2d 573 [[texture(1), function_constant(kSourceTexture2Type2D)]], 574 texture2d_array<uint32_t> srcTexture2dArray 575 [[texture(1), function_constant(kSourceTexture2Type2DArray)]], 576 texture2d_ms<uint32_t> srcTexture2dMS 577 [[texture(1), function_constant(kSourceTexture2Type2DMS)]], 578 texturecube<uint32_t> srcTextureCube 579 [[texture(1), function_constant(kSourceTexture2TypeCube)]], 580 constant BlitStencilToBufferParams &options [[buffer(0)]], 581 device uchar *buffer [[buffer(1)]]) 582 { 583 if (gIndices.x >= options.dstSize.x || gIndices.y >= options.dstSize.y) 584 { 585 return; 586 } 587 588 float2 srcTexCoords = options.srcStartTexCoords + float2(gIndices) * options.srcTexCoordSteps; 589 590 if (kSourceTexture2Type == kTextureType2DMultisample && !options.resolveMS) 591 { 592 uint samples = srcTexture2dMS.get_num_samples(); 593 uint2 imageCoords = getImageCoords(srcTexture2dMS, srcTexCoords); 594 uint bufferOffset = options.dstBufferRowPitch * gIndices.y + samples * gIndices.x; 595 596 for (uint sample = 0; sample < samples; ++sample) 597 { 598 uint stencilPerSample = srcTexture2dMS.read(imageCoords, sample).r; 599 buffer[bufferOffset + sample] = static_cast<uchar>(stencilPerSample); 600 } 601 } 602 else 603 { 604 uint32_t stencil = 605 sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, 606 srcTexCoords, options.srcLevel, options.srcLayer); 607 608 buffer[options.dstBufferRowPitch * gIndices.y + gIndices.x] = static_cast<uchar>(stencil); 609 } 610 } 611 612 613 #if __METAL_VERSION__ >= 210 614 615 struct FragmentStencilOut 616 { 617 uint32_t stencil [[stencil]]; 618 }; 619 620 struct FragmentDepthStencilOut 621 { 622 float depth [[depth(any)]]; 623 uint32_t stencil [[stencil]]; 624 }; 625 626 fragment FragmentStencilOut blitStencilFS( 627 BlitVSOut input [[stage_in]], 628 texture2d<uint32_t> srcTexture2d [[texture(1), function_constant(kSourceTexture2Type2D)]], 629 texture2d_array<uint32_t> srcTexture2dArray 630 [[texture(1), function_constant(kSourceTexture2Type2DArray)]], 631 texture2d_ms<uint32_t> srcTexture2dMS 632 [[texture(1), function_constant(kSourceTexture2Type2DMS)]], 633 texturecube<uint32_t> srcTextureCube [[texture(1), function_constant(kSourceTexture2TypeCube)]], 634 constant BlitParams &options [[buffer(0)]]) 635 { 636 FragmentStencilOut re; 637 638 re.stencil = sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, 639 input.texCoords, options.srcLevel, options.srcLayer); 640 641 return re; 642 } 643 644 fragment FragmentDepthStencilOut blitDepthStencilFS( 645 BlitVSOut input [[stage_in]], 646 647 texture2d<float> srcDepthTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], 648 texture2d_array<float> srcDepthTexture2dArray 649 [[texture(0), function_constant(kSourceTextureType2DArray)]], 650 texture2d_ms<float> srcDepthTexture2dMS 651 [[texture(0), function_constant(kSourceTextureType2DMS)]], 652 texturecube<float> srcDepthTextureCube 653 [[texture(0), function_constant(kSourceTextureTypeCube)]], 654 655 656 texture2d<uint32_t> srcStencilTexture2d 657 [[texture(1), function_constant(kSourceTexture2Type2D)]], 658 texture2d_array<uint32_t> srcStencilTexture2dArray 659 [[texture(1), function_constant(kSourceTexture2Type2DArray)]], 660 texture2d_ms<uint32_t> srcStencilTexture2dMS 661 [[texture(1), function_constant(kSourceTexture2Type2DMS)]], 662 texturecube<uint32_t> srcStencilTextureCube 663 [[texture(1), function_constant(kSourceTexture2TypeCube)]], 664 665 constant BlitParams &options [[buffer(0)]]) 666 { 667 FragmentDepthStencilOut re; 668 669 re.depth = sampleDepth(srcDepthTexture2d, srcDepthTexture2dArray, srcDepthTexture2dMS, 670 srcDepthTextureCube, input.texCoords, options); 671 re.stencil = 672 sampleStencil(srcStencilTexture2d, srcStencilTexture2dArray, srcStencilTexture2dMS, 673 srcStencilTextureCube, input.texCoords, options.srcLevel, options.srcLayer); 674 return re; 675 } 676 #endif 677 # 2 "temp_master_source.metal" 2 678 # 1 ".\\clear.metal" 1 679 # 10 ".\\clear.metal" 680 using namespace rx::mtl_shader; 681 682 struct ClearParams 683 { 684 float4 clearColor; 685 float clearDepth; 686 }; 687 688 vertex float4 clearVS(unsigned int vid [[ vertex_id ]], 689 constant ClearParams &clearParams [[buffer(0)]]) 690 { 691 return float4(gCorners[vid], clearParams.clearDepth, 1.0); 692 } 693 694 fragment MultipleColorOutputs<float> clearFloatFS(constant ClearParams &clearParams [[buffer(0)]]) 695 { 696 return toMultipleColorOutputs(clearParams.clearColor); 697 } 698 699 fragment MultipleColorOutputs<int> clearIntFS(constant ClearParams &clearParams [[buffer(0)]]) 700 { 701 return toMultipleColorOutputs(as_type<int4>(clearParams.clearColor)); 702 } 703 704 fragment MultipleColorOutputs<uint> clearUIntFS(constant ClearParams &clearParams [[buffer(0)]]) 705 { 706 return toMultipleColorOutputs(as_type<uint4>(clearParams.clearColor)); 707 } 708 # 3 "temp_master_source.metal" 2 709 # 1 ".\\gen_indices.metal" 1 710 711 712 713 714 715 716 717 718 using namespace rx::mtl_shader; 719 720 721 constant bool kSourceBufferAligned[[function_constant(100)]]; 722 constant bool kSourceIndexIsU8[[function_constant(200)]]; 723 constant bool kSourceIndexIsU16[[function_constant(300)]]; 724 constant bool kSourceIndexIsU32[[function_constant(400)]]; 725 constant bool kSourceBufferUnaligned = !kSourceBufferAligned; 726 constant bool kUseSourceBufferU8 = kSourceIndexIsU8 || kSourceBufferUnaligned; 727 constant bool kUseSourceBufferU16 = kSourceIndexIsU16 && kSourceBufferAligned; 728 constant bool kUseSourceBufferU32 = kSourceIndexIsU32 && kSourceBufferAligned; 729 730 struct IndexConversionParams 731 { 732 uint32_t srcOffset; 733 uint32_t indexCount; 734 bool primitiveRestartEnabled; 735 }; 736 737 738 739 inline ushort getIndexAligned(constant ushort *inputAligned, uint offset, uint idx) 740 { 741 return inputAligned[offset / 2 + idx]; 742 } 743 inline uint getIndexAligned(constant uint *inputAligned, uint offset, uint idx) 744 { 745 return inputAligned[offset / 4 + idx]; 746 } 747 inline uchar getIndexAligned(constant uchar *input, uint offset, uint idx) 748 { 749 return input[offset + idx]; 750 } 751 inline ushort getIndexUnalignedU16(constant uchar *input, uint offset, uint idx) 752 { 753 ushort inputLo = input[offset + 2 * idx]; 754 ushort inputHi = input[offset + 2 * idx + 1]; 755 756 return inputLo | (inputHi << 8); 757 } 758 inline uint getIndexUnalignedU32(constant uchar *input, uint offset, uint idx) 759 { 760 uint input0 = input[offset + 4 * idx]; 761 uint input1 = input[offset + 4 * idx + 1]; 762 uint input2 = input[offset + 4 * idx + 2]; 763 uint input3 = input[offset + 4 * idx + 3]; 764 765 return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24); 766 } 767 768 kernel void convertIndexU8ToU16(uint idx [[thread_position_in_grid]], 769 constant IndexConversionParams &options [[buffer(0)]], 770 constant uchar *input [[buffer(1)]], 771 device ushort *output [[buffer(2)]]) 772 { 773 if (idx >= options.indexCount) { return; }; 774 775 uchar value = getIndexAligned(input, options.srcOffset, idx); 776 777 if (options.primitiveRestartEnabled && value == 0xff) 778 { 779 output[idx] = 0xffff; 780 } 781 else 782 { 783 output[idx] = value; 784 } 785 } 786 787 kernel void convertIndexU16(uint idx [[thread_position_in_grid]], 788 constant IndexConversionParams &options [[buffer(0)]], 789 constant uchar *input 790 [[buffer(1), function_constant(kSourceBufferUnaligned)]], 791 constant ushort *inputAligned 792 [[buffer(1), function_constant(kSourceBufferAligned)]], 793 device ushort *output [[buffer(2)]]) 794 { 795 if (idx >= options.indexCount) { return; }; 796 797 ushort value; 798 if (kSourceBufferAligned) 799 { 800 value = getIndexAligned(inputAligned, options.srcOffset, idx); 801 } 802 else 803 { 804 value = getIndexUnalignedU16(input, options.srcOffset, idx); 805 } 806 output[idx] = value; 807 } 808 809 kernel void convertIndexU32(uint idx [[thread_position_in_grid]], 810 constant IndexConversionParams &options [[buffer(0)]], 811 constant uchar *input 812 [[buffer(1), function_constant(kSourceBufferUnaligned)]], 813 constant uint *inputAligned 814 [[buffer(1), function_constant(kSourceBufferAligned)]], 815 device uint *output [[buffer(2)]]) 816 { 817 if (idx >= options.indexCount) { return; }; 818 819 uint value; 820 if (kSourceBufferAligned) 821 { 822 value = getIndexAligned(inputAligned, options.srcOffset, idx); 823 } 824 else 825 { 826 value = getIndexUnalignedU32(input, options.srcOffset, idx); 827 } 828 output[idx] = value; 829 } 830 831 struct IndexFromArrayParams 832 { 833 uint firstVertex; 834 835 uint vertexCount; 836 }; 837 838 839 kernel void genTriFanIndicesFromArray(uint idx [[thread_position_in_grid]], 840 constant IndexFromArrayParams &options [[buffer(0)]], 841 device uint *output [[buffer(2)]]) 842 { 843 if (idx >= options.vertexCount) { return; }; 844 845 uint vertexIdx = options.firstVertex + 2 + idx; 846 847 848 849 output[3 * idx ] = vertexIdx - 1; 850 output[3 * idx + 1] = vertexIdx; 851 output[3 * idx + 2] = options.firstVertex; 852 } 853 854 inline uint getIndexU32(uint offset, 855 uint idx, 856 constant uchar *inputU8 [[function_constant(kUseSourceBufferU8)]], 857 constant ushort *inputU16 [[function_constant(kUseSourceBufferU16)]], 858 constant uint *inputU32 [[function_constant(kUseSourceBufferU32)]]) 859 { 860 if (kUseSourceBufferU8) 861 { 862 if (kSourceIndexIsU16) 863 { 864 return getIndexUnalignedU16(inputU8, offset, idx); 865 } 866 else if (kSourceIndexIsU32) 867 { 868 return getIndexUnalignedU32(inputU8, offset, idx); 869 } 870 return getIndexAligned(inputU8, offset, idx); 871 } 872 else if (kUseSourceBufferU16) 873 { 874 return getIndexAligned(inputU16, offset, idx); 875 } 876 else if (kUseSourceBufferU32) 877 { 878 return getIndexAligned(inputU32, offset, idx); 879 } 880 return 0; 881 } 882 883 884 885 886 kernel void genTriFanIndicesFromElements(uint idx [[thread_position_in_grid]], 887 constant IndexConversionParams &options [[buffer(0)]], 888 constant uchar *inputU8 889 [[buffer(1), function_constant(kUseSourceBufferU8)]], 890 constant ushort *inputU16 891 [[buffer(1), function_constant(kUseSourceBufferU16)]], 892 constant uint *inputU32 893 [[buffer(1), function_constant(kUseSourceBufferU32)]], 894 device uint *output [[buffer(2)]]) 895 { 896 if (idx >= options.indexCount) { return; }; 897 898 uint elemIdx = 2 + idx; 899 900 output[3 * idx] = getIndexU32(options.srcOffset, 0, inputU8, inputU16, inputU32); 901 output[3 * idx + 1] = getIndexU32(options.srcOffset, elemIdx - 1, inputU8, inputU16, inputU32); 902 output[3 * idx + 2] = getIndexU32(options.srcOffset, elemIdx, inputU8, inputU16, inputU32); 903 } 904 905 906 kernel void genLineLoopIndicesFromArray(uint idx [[thread_position_in_grid]], 907 constant IndexFromArrayParams &options [[buffer(0)]], 908 device uint *output [[buffer(2)]]) 909 { 910 uint totalIndices = options.vertexCount + 1; 911 if (idx >= totalIndices) { return; }; 912 913 output[idx] = options.firstVertex + idx % options.vertexCount; 914 } 915 916 917 918 kernel void genLineLoopIndicesFromElements(uint idx [[thread_position_in_grid]], 919 constant IndexConversionParams &options [[buffer(0)]], 920 constant uchar *inputU8 921 [[buffer(1), function_constant(kUseSourceBufferU8)]], 922 constant ushort *inputU16 923 [[buffer(1), function_constant(kUseSourceBufferU16)]], 924 constant uint *inputU32 925 [[buffer(1), function_constant(kUseSourceBufferU32)]], 926 device uint *output [[buffer(2)]]) 927 { 928 uint totalTargetIndices = options.indexCount + 1; 929 if (idx >= totalTargetIndices) { return; }; 930 931 output[idx] = 932 getIndexU32(options.srcOffset, idx % options.indexCount, inputU8, inputU16, inputU32); 933 } 934 # 4 "temp_master_source.metal" 2 935 # 1 ".\\gen_mipmap.metal" 1 936 937 938 939 940 941 942 943 944 using namespace rx::mtl_shader; 945 # 31 ".\\gen_mipmap.metal" 946 struct GenMipParams 947 { 948 uint srcLevel; 949 uint numMipLevelsToGen; 950 bool sRGB; 951 }; 952 953 954 955 kernel void generate3DMipmaps(uint lIndex [[thread_index_in_threadgroup]], 956 ushort3 gIndices [[thread_position_in_grid]], 957 texture3d<float> srcTexture [[texture(0)]], 958 texture3d<float, access::write> dstMip1 [[texture(1)]], 959 texture3d<float, access::write> dstMip2 [[texture(2)]], 960 texture3d<float, access::write> dstMip3 [[texture(3)]], 961 texture3d<float, access::write> dstMip4 [[texture(4)]], 962 constant GenMipParams &options [[buffer(0)]]) 963 { 964 ushort3 mipSize = ushort3(dstMip1.get_width(), dstMip1.get_height(), dstMip1.get_depth()); 965 bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y && gIndices.z < mipSize.z; 966 967 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 968 969 970 971 972 threadgroup float sR[(8 * 8 * 8)]; 973 threadgroup float sG[(8 * 8 * 8)]; 974 threadgroup float sB[(8 * 8 * 8)]; 975 threadgroup float sA[(8 * 8 * 8)]; 976 977 978 float4 texel1; 979 if (validThread) 980 { 981 float3 texCoords = (float3(gIndices) + float3(0.5, 0.5, 0.5)) / float3(mipSize); 982 texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel)); 983 984 985 dstMip1.write(texel1, gIndices); 986 } 987 else 988 { 989 990 lIndex = 0xffffffff; 991 } 992 993 if (options.numMipLevelsToGen == 1) 994 { 995 return; 996 } 997 998 999 1000 1001 if (options.sRGB) 1002 { 1003 texel1 = linearToSRGB(texel1); 1004 } 1005 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1006 1007 threadgroup_barrier(mem_flags::mem_threadgroup); 1008 1009 1010 if ((lIndex & 0x49) == 0) 1011 { 1012 bool3 atEdge = gIndices == (mipSize - ushort3(1)); 1013 1014 1015 1016 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1])); 1017 1018 float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8])); 1019 1020 float4 texel4 = (atEdge.z) ? (texel1) : (float4(sR[lIndex + (8 * 8)], sG[lIndex + (8 * 8)], sB[lIndex + (8 * 8)], sA[lIndex + (8 * 8)])); 1021 1022 float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)])); 1023 1024 1025 float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + ((8 * 8) + 1)], sG[lIndex + ((8 * 8) + 1)], sB[lIndex + ((8 * 8) + 1)], sA[lIndex + ((8 * 8) + 1)])); 1026 1027 1028 float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + ((8 * 8) + 8)], sG[lIndex + ((8 * 8) + 8)], sB[lIndex + ((8 * 8) + 8)], sA[lIndex + ((8 * 8) + 8)])); 1029 1030 1031 float4 texel8 = 1032 (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + ((8 * 8) + 8 + 1)], sG[lIndex + ((8 * 8) + 8 + 1)], sB[lIndex + ((8 * 8) + 8 + 1)], sA[lIndex + ((8 * 8) + 8 + 1)])); 1033 1034 1035 texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; 1036 1037 dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1); 1038 1039 1040 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1041 } 1042 1043 if (options.numMipLevelsToGen == 2) 1044 { 1045 return; 1046 } 1047 1048 1049 threadgroup_barrier(mem_flags::mem_threadgroup); 1050 1051 1052 if ((lIndex & 0xdb) == 0) 1053 { 1054 mipSize = max(mipSize >> 1, ushort3(1)); 1055 bool3 atEdge = (gIndices >> 1) == (mipSize - ushort3(1)); 1056 1057 1058 1059 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2])); 1060 1061 float4 texel3 = 1062 (atEdge.y) ? (texel1) : (float4(sR[lIndex + (2 * 8)], sG[lIndex + (2 * 8)], sB[lIndex + (2 * 8)], sA[lIndex + (2 * 8)])); 1063 1064 float4 texel4 = 1065 (atEdge.z) ? (texel1) : (float4(sR[lIndex + (2 * (8 * 8))], sG[lIndex + (2 * (8 * 8))], sB[lIndex + (2 * (8 * 8))], sA[lIndex + (2 * (8 * 8))])); 1066 1067 float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)])); 1068 1069 1070 float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (2 * (8 * 8) + 2)], sG[lIndex + (2 * (8 * 8) + 2)], sB[lIndex + (2 * (8 * 8) + 2)], sA[lIndex + (2 * (8 * 8) + 2)])); 1071 1072 1073 float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8)], sG[lIndex + (2 * (8 * 8) + 2 * 8)], sB[lIndex + (2 * (8 * 8) + 2 * 8)], sA[lIndex + (2 * (8 * 8) + 2 * 8)])); 1074 1075 1076 1077 float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sG[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sB[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sA[lIndex + (2 * (8 * 8) + 2 * 8 + 2)])); 1078 1079 1080 1081 texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; 1082 1083 dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2); 1084 1085 1086 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1087 } 1088 1089 if (options.numMipLevelsToGen == 3) 1090 { 1091 return; 1092 } 1093 1094 1095 threadgroup_barrier(mem_flags::mem_threadgroup); 1096 1097 1098 if ((lIndex & 0x1ff) == 0) 1099 { 1100 mipSize = max(mipSize >> 1, ushort3(1)); 1101 bool3 atEdge = (gIndices >> 2) == (mipSize - ushort3(1)); 1102 1103 1104 1105 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4])); 1106 1107 float4 texel3 = 1108 (atEdge.y) ? (texel1) : (float4(sR[lIndex + (4 * 8)], sG[lIndex + (4 * 8)], sB[lIndex + (4 * 8)], sA[lIndex + (4 * 8)])); 1109 1110 float4 texel4 = 1111 (atEdge.z) ? (texel1) : (float4(sR[lIndex + (4 * (8 * 8))], sG[lIndex + (4 * (8 * 8))], sB[lIndex + (4 * (8 * 8))], sA[lIndex + (4 * (8 * 8))])); 1112 1113 float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)])); 1114 1115 1116 float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (4 * (8 * 8) + 4)], sG[lIndex + (4 * (8 * 8) + 4)], sB[lIndex + (4 * (8 * 8) + 4)], sA[lIndex + (4 * (8 * 8) + 4)])); 1117 1118 1119 float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8)], sG[lIndex + (4 * (8 * 8) + 4 * 8)], sB[lIndex + (4 * (8 * 8) + 4 * 8)], sA[lIndex + (4 * (8 * 8) + 4 * 8)])); 1120 1121 1122 1123 float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sG[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sB[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sA[lIndex + (4 * (8 * 8) + 4 * 8 + 4)])); 1124 1125 1126 1127 texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; 1128 1129 dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3); 1130 } 1131 } 1132 1133 kernel void generate2DMipmaps(uint lIndex [[thread_index_in_threadgroup]], 1134 ushort2 gIndices [[thread_position_in_grid]], 1135 texture2d<float> srcTexture [[texture(0)]], 1136 texture2d<float, access::write> dstMip1 [[texture(1)]], 1137 texture2d<float, access::write> dstMip2 [[texture(2)]], 1138 texture2d<float, access::write> dstMip3 [[texture(3)]], 1139 texture2d<float, access::write> dstMip4 [[texture(4)]], 1140 constant GenMipParams &options [[buffer(0)]]) 1141 { 1142 uint firstMipLevel = options.srcLevel + 1; 1143 ushort2 mipSize = 1144 ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); 1145 bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y; 1146 1147 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 1148 1149 1150 1151 1152 threadgroup float sR[(8 * 8)]; 1153 threadgroup float sG[(8 * 8)]; 1154 threadgroup float sB[(8 * 8)]; 1155 threadgroup float sA[(8 * 8)]; 1156 1157 1158 float4 texel1; 1159 if (validThread) 1160 { 1161 float2 texCoords = (float2(gIndices) + float2(0.5, 0.5)) / float2(mipSize); 1162 texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel)); 1163 1164 1165 dstMip1.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices); 1166 } 1167 else 1168 { 1169 1170 lIndex = 0xffffffff; 1171 } 1172 1173 if (options.numMipLevelsToGen == 1) 1174 { 1175 return; 1176 } 1177 1178 1179 1180 1181 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1182 1183 threadgroup_barrier(mem_flags::mem_threadgroup); 1184 1185 1186 if ((lIndex & 0x09) == 0) 1187 { 1188 bool2 atEdge = gIndices == (mipSize - ushort2(1)); 1189 1190 1191 1192 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1])); 1193 1194 float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8])); 1195 1196 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)])); 1197 1198 1199 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1200 1201 dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1); 1202 1203 1204 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1205 } 1206 1207 if (options.numMipLevelsToGen == 2) 1208 { 1209 return; 1210 } 1211 1212 1213 threadgroup_barrier(mem_flags::mem_threadgroup); 1214 1215 1216 if ((lIndex & 0x1b) == 0) 1217 { 1218 mipSize = max(mipSize >> 1, ushort2(1)); 1219 bool2 atEdge = (gIndices >> 1) == (mipSize - ushort2(1)); 1220 1221 1222 1223 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2])); 1224 1225 float4 texel3 = 1226 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8])); 1227 1228 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)])); 1229 1230 1231 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1232 1233 dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2); 1234 1235 1236 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1237 } 1238 1239 if (options.numMipLevelsToGen == 3) 1240 { 1241 return; 1242 } 1243 1244 1245 threadgroup_barrier(mem_flags::mem_threadgroup); 1246 1247 1248 if ((lIndex & 0x3f) == 0) 1249 { 1250 mipSize = max(mipSize >> 1, ushort2(1)); 1251 bool2 atEdge = (gIndices >> 2) == (mipSize - ushort2(1)); 1252 1253 1254 1255 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4])); 1256 1257 float4 texel3 = 1258 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8])); 1259 1260 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)])); 1261 1262 1263 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1264 1265 dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3); 1266 } 1267 } 1268 1269 template <typename TextureTypeR, typename TextureTypeW> 1270 static __attribute__((always_inline)) void generateCubeOr2DArray2ndAndMoreMipmaps( 1271 uint lIndex, 1272 ushort3 gIndices, 1273 TextureTypeR srcTexture, 1274 TextureTypeW dstMip2, 1275 TextureTypeW dstMip3, 1276 TextureTypeW dstMip4, 1277 ushort2 mip1Size, 1278 float4 mip1Texel, 1279 threadgroup float *sR, 1280 threadgroup float *sG, 1281 threadgroup float *sB, 1282 threadgroup float *sA, 1283 constant GenMipParams &options) 1284 { 1285 ushort2 mipSize = mip1Size; 1286 float4 texel1 = mip1Texel; 1287 1288 1289 1290 1291 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1292 1293 threadgroup_barrier(mem_flags::mem_threadgroup); 1294 1295 1296 if ((lIndex & 0x09) == 0) 1297 { 1298 bool2 atEdge = gIndices.xy == (mipSize - ushort2(1)); 1299 1300 1301 1302 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1])); 1303 1304 float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8])); 1305 1306 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)])); 1307 1308 1309 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1310 1311 dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 1, gIndices.z); 1312 1313 1314 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1315 } 1316 1317 if (options.numMipLevelsToGen == 2) 1318 { 1319 return; 1320 } 1321 1322 1323 threadgroup_barrier(mem_flags::mem_threadgroup); 1324 1325 1326 if ((lIndex & 0x1b) == 0) 1327 { 1328 mipSize = max(mipSize >> 1, ushort2(1)); 1329 bool2 atEdge = (gIndices.xy >> 1) == (mipSize - ushort2(1)); 1330 1331 1332 1333 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2])); 1334 1335 float4 texel3 = 1336 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8])); 1337 1338 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)])); 1339 1340 1341 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1342 1343 dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 2, gIndices.z); 1344 1345 1346 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1347 } 1348 1349 if (options.numMipLevelsToGen == 3) 1350 { 1351 return; 1352 } 1353 1354 1355 threadgroup_barrier(mem_flags::mem_threadgroup); 1356 1357 1358 if ((lIndex & 0x3f) == 0) 1359 { 1360 mipSize = max(mipSize >> 1, ushort2(1)); 1361 bool2 atEdge = (gIndices.xy >> 2) == (mipSize - ushort2(1)); 1362 1363 1364 1365 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4])); 1366 1367 float4 texel3 = 1368 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8])); 1369 1370 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)])); 1371 1372 1373 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1374 1375 dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 3, gIndices.z); 1376 } 1377 } 1378 1379 kernel void generateCubeMipmaps(uint lIndex [[thread_index_in_threadgroup]], 1380 ushort3 gIndices [[thread_position_in_grid]], 1381 texturecube<float> srcTexture [[texture(0)]], 1382 texturecube<float, access::write> dstMip1 [[texture(1)]], 1383 texturecube<float, access::write> dstMip2 [[texture(2)]], 1384 texturecube<float, access::write> dstMip3 [[texture(3)]], 1385 texturecube<float, access::write> dstMip4 [[texture(4)]], 1386 constant GenMipParams &options [[buffer(0)]]) 1387 { 1388 uint firstMipLevel = options.srcLevel + 1; 1389 ushort2 mip1Size = 1390 ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); 1391 bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y; 1392 1393 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 1394 1395 1396 float4 mip1Texel; 1397 if (validThread) 1398 { 1399 float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size); 1400 mip1Texel = srcTexture.sample(textureSampler, cubeTexcoords(texCoords, int(gIndices.z)), 1401 level(options.srcLevel)); 1402 1403 1404 dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z); 1405 } 1406 else 1407 { 1408 1409 lIndex = 0xffffffff; 1410 } 1411 1412 if (options.numMipLevelsToGen == 1) 1413 { 1414 return; 1415 } 1416 1417 1418 threadgroup float sR[(8 * 8)]; 1419 threadgroup float sG[(8 * 8)]; 1420 threadgroup float sB[(8 * 8)]; 1421 threadgroup float sA[(8 * 8)]; 1422 1423 generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4, 1424 mip1Size, mip1Texel, sR, sG, sB, sA, options); 1425 } 1426 1427 kernel void generate2DArrayMipmaps(uint lIndex [[thread_index_in_threadgroup]], 1428 ushort3 gIndices [[thread_position_in_grid]], 1429 texture2d_array<float> srcTexture [[texture(0)]], 1430 texture2d_array<float, access::write> dstMip1 [[texture(1)]], 1431 texture2d_array<float, access::write> dstMip2 [[texture(2)]], 1432 texture2d_array<float, access::write> dstMip3 [[texture(3)]], 1433 texture2d_array<float, access::write> dstMip4 [[texture(4)]], 1434 constant GenMipParams &options [[buffer(0)]]) 1435 { 1436 uint firstMipLevel = options.srcLevel + 1; 1437 ushort2 mip1Size = 1438 ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); 1439 bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y; 1440 1441 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 1442 1443 1444 float4 mip1Texel; 1445 if (validThread) 1446 { 1447 float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size); 1448 mip1Texel = 1449 srcTexture.sample(textureSampler, texCoords, gIndices.z, level(options.srcLevel)); 1450 1451 1452 dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z); 1453 } 1454 else 1455 { 1456 1457 lIndex = 0xffffffff; 1458 } 1459 1460 if (options.numMipLevelsToGen == 1) 1461 { 1462 return; 1463 } 1464 1465 1466 threadgroup float sR[(8 * 8)]; 1467 threadgroup float sG[(8 * 8)]; 1468 threadgroup float sB[(8 * 8)]; 1469 threadgroup float sA[(8 * 8)]; 1470 1471 generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4, 1472 mip1Size, mip1Texel, sR, sG, sB, sA, options); 1473 } 1474 # 5 "temp_master_source.metal" 2 1475 # 1 ".\\copy_buffer.metal" 1 1476 # 12 ".\\copy_buffer.metal" 1477 #include <metal_pack> 1478 1479 1480 # 1 ".\\format_autogen.h" 1 1481 1482 1483 1484 namespace rx 1485 { 1486 namespace mtl_shader 1487 { 1488 1489 namespace FormatID 1490 { 1491 enum 1492 { 1493 NONE, 1494 D16_UNORM, 1495 D24_UNORM_S8_UINT, 1496 D24_UNORM_X8_UINT, 1497 D32_FLOAT, 1498 D32_FLOAT_S8X24_UINT, 1499 D32_UNORM, 1500 S8_UINT, 1501 A16_FLOAT, 1502 A1R5G5B5_UNORM, 1503 A2R10G10B10_SINT_VERTEX, 1504 A2R10G10B10_SNORM_VERTEX, 1505 A2R10G10B10_SSCALED_VERTEX, 1506 A2R10G10B10_UINT_VERTEX, 1507 A2R10G10B10_UNORM_VERTEX, 1508 A2R10G10B10_USCALED_VERTEX, 1509 A32_FLOAT, 1510 A8_UNORM, 1511 ASTC_10x10_SRGB_BLOCK, 1512 ASTC_10x10_UNORM_BLOCK, 1513 ASTC_10x5_SRGB_BLOCK, 1514 ASTC_10x5_UNORM_BLOCK, 1515 ASTC_10x6_SRGB_BLOCK, 1516 ASTC_10x6_UNORM_BLOCK, 1517 ASTC_10x8_SRGB_BLOCK, 1518 ASTC_10x8_UNORM_BLOCK, 1519 ASTC_12x10_SRGB_BLOCK, 1520 ASTC_12x10_UNORM_BLOCK, 1521 ASTC_12x12_SRGB_BLOCK, 1522 ASTC_12x12_UNORM_BLOCK, 1523 ASTC_3x3x3_UNORM_BLOCK, 1524 ASTC_3x3x3_UNORM_SRGB_BLOCK, 1525 ASTC_4x3x3_UNORM_BLOCK, 1526 ASTC_4x3x3_UNORM_SRGB_BLOCK, 1527 ASTC_4x4_SRGB_BLOCK, 1528 ASTC_4x4_UNORM_BLOCK, 1529 ASTC_4x4x3_UNORM_BLOCK, 1530 ASTC_4x4x3_UNORM_SRGB_BLOCK, 1531 ASTC_4x4x4_UNORM_BLOCK, 1532 ASTC_4x4x4_UNORM_SRGB_BLOCK, 1533 ASTC_5x4_SRGB_BLOCK, 1534 ASTC_5x4_UNORM_BLOCK, 1535 ASTC_5x4x4_UNORM_BLOCK, 1536 ASTC_5x4x4_UNORM_SRGB_BLOCK, 1537 ASTC_5x5_SRGB_BLOCK, 1538 ASTC_5x5_UNORM_BLOCK, 1539 ASTC_5x5x4_UNORM_BLOCK, 1540 ASTC_5x5x4_UNORM_SRGB_BLOCK, 1541 ASTC_5x5x5_UNORM_BLOCK, 1542 ASTC_5x5x5_UNORM_SRGB_BLOCK, 1543 ASTC_6x5_SRGB_BLOCK, 1544 ASTC_6x5_UNORM_BLOCK, 1545 ASTC_6x5x5_UNORM_BLOCK, 1546 ASTC_6x5x5_UNORM_SRGB_BLOCK, 1547 ASTC_6x6_SRGB_BLOCK, 1548 ASTC_6x6_UNORM_BLOCK, 1549 ASTC_6x6x5_UNORM_BLOCK, 1550 ASTC_6x6x5_UNORM_SRGB_BLOCK, 1551 ASTC_6x6x6_UNORM_BLOCK, 1552 ASTC_6x6x6_UNORM_SRGB_BLOCK, 1553 ASTC_8x5_SRGB_BLOCK, 1554 ASTC_8x5_UNORM_BLOCK, 1555 ASTC_8x6_SRGB_BLOCK, 1556 ASTC_8x6_UNORM_BLOCK, 1557 ASTC_8x8_SRGB_BLOCK, 1558 ASTC_8x8_UNORM_BLOCK, 1559 B10G10R10A2_UNORM, 1560 B4G4R4A4_UNORM, 1561 B5G5R5A1_UNORM, 1562 B5G6R5_UNORM, 1563 B8G8R8A8_TYPELESS, 1564 B8G8R8A8_TYPELESS_SRGB, 1565 B8G8R8A8_UNORM, 1566 B8G8R8A8_UNORM_SRGB, 1567 B8G8R8X8_UNORM, 1568 B8G8R8X8_UNORM_SRGB, 1569 BC1_RGBA_UNORM_BLOCK, 1570 BC1_RGBA_UNORM_SRGB_BLOCK, 1571 BC1_RGB_UNORM_BLOCK, 1572 BC1_RGB_UNORM_SRGB_BLOCK, 1573 BC2_RGBA_UNORM_BLOCK, 1574 BC2_RGBA_UNORM_SRGB_BLOCK, 1575 BC3_RGBA_UNORM_BLOCK, 1576 BC3_RGBA_UNORM_SRGB_BLOCK, 1577 BC4_RED_SNORM_BLOCK, 1578 BC4_RED_UNORM_BLOCK, 1579 BC5_RG_SNORM_BLOCK, 1580 BC5_RG_UNORM_BLOCK, 1581 BC6H_RGB_SFLOAT_BLOCK, 1582 BC6H_RGB_UFLOAT_BLOCK, 1583 BC7_RGBA_UNORM_BLOCK, 1584 BC7_RGBA_UNORM_SRGB_BLOCK, 1585 EAC_R11G11_SNORM_BLOCK, 1586 EAC_R11G11_UNORM_BLOCK, 1587 EAC_R11_SNORM_BLOCK, 1588 EAC_R11_UNORM_BLOCK, 1589 ETC1_LOSSY_DECODE_R8G8B8_UNORM_BLOCK, 1590 ETC1_R8G8B8_UNORM_BLOCK, 1591 ETC2_R8G8B8A1_SRGB_BLOCK, 1592 ETC2_R8G8B8A1_UNORM_BLOCK, 1593 ETC2_R8G8B8A8_SRGB_BLOCK, 1594 ETC2_R8G8B8A8_UNORM_BLOCK, 1595 ETC2_R8G8B8_SRGB_BLOCK, 1596 ETC2_R8G8B8_UNORM_BLOCK, 1597 G8_B8R8_2PLANE_420_UNORM, 1598 G8_B8_R8_3PLANE_420_UNORM, 1599 L16A16_FLOAT, 1600 L16_FLOAT, 1601 L32A32_FLOAT, 1602 L32_FLOAT, 1603 L4A4_UNORM, 1604 L8A8_UNORM, 1605 L8_UNORM, 1606 PALETTE4_R4G4B4A4_UNORM, 1607 PALETTE4_R5G5B5A1_UNORM, 1608 PALETTE4_R5G6B5_UNORM, 1609 PALETTE4_R8G8B8A8_UNORM, 1610 PALETTE4_R8G8B8_UNORM, 1611 PALETTE8_R4G4B4A4_UNORM, 1612 PALETTE8_R5G5B5A1_UNORM, 1613 PALETTE8_R5G6B5_UNORM, 1614 PALETTE8_R8G8B8A8_UNORM, 1615 PALETTE8_R8G8B8_UNORM, 1616 PVRTC1_RGBA_2BPP_UNORM_BLOCK, 1617 PVRTC1_RGBA_2BPP_UNORM_SRGB_BLOCK, 1618 PVRTC1_RGBA_4BPP_UNORM_BLOCK, 1619 PVRTC1_RGBA_4BPP_UNORM_SRGB_BLOCK, 1620 PVRTC1_RGB_2BPP_UNORM_BLOCK, 1621 PVRTC1_RGB_2BPP_UNORM_SRGB_BLOCK, 1622 PVRTC1_RGB_4BPP_UNORM_BLOCK, 1623 PVRTC1_RGB_4BPP_UNORM_SRGB_BLOCK, 1624 R10G10B10A2_SINT, 1625 R10G10B10A2_SNORM, 1626 R10G10B10A2_SSCALED, 1627 R10G10B10A2_UINT, 1628 R10G10B10A2_UNORM, 1629 R10G10B10A2_USCALED, 1630 R10G10B10X2_UNORM, 1631 R11G11B10_FLOAT, 1632 R16G16B16A16_FLOAT, 1633 R16G16B16A16_SINT, 1634 R16G16B16A16_SNORM, 1635 R16G16B16A16_SSCALED, 1636 R16G16B16A16_UINT, 1637 R16G16B16A16_UNORM, 1638 R16G16B16A16_USCALED, 1639 R16G16B16_FLOAT, 1640 R16G16B16_SINT, 1641 R16G16B16_SNORM, 1642 R16G16B16_SSCALED, 1643 R16G16B16_UINT, 1644 R16G16B16_UNORM, 1645 R16G16B16_USCALED, 1646 R16G16_FLOAT, 1647 R16G16_SINT, 1648 R16G16_SNORM, 1649 R16G16_SSCALED, 1650 R16G16_UINT, 1651 R16G16_UNORM, 1652 R16G16_USCALED, 1653 R16_FLOAT, 1654 R16_SINT, 1655 R16_SNORM, 1656 R16_SSCALED, 1657 R16_UINT, 1658 R16_UNORM, 1659 R16_USCALED, 1660 R32G32B32A32_FIXED, 1661 R32G32B32A32_FLOAT, 1662 R32G32B32A32_SINT, 1663 R32G32B32A32_SNORM, 1664 R32G32B32A32_SSCALED, 1665 R32G32B32A32_UINT, 1666 R32G32B32A32_UNORM, 1667 R32G32B32A32_USCALED, 1668 R32G32B32_FIXED, 1669 R32G32B32_FLOAT, 1670 R32G32B32_SINT, 1671 R32G32B32_SNORM, 1672 R32G32B32_SSCALED, 1673 R32G32B32_UINT, 1674 R32G32B32_UNORM, 1675 R32G32B32_USCALED, 1676 R32G32_FIXED, 1677 R32G32_FLOAT, 1678 R32G32_SINT, 1679 R32G32_SNORM, 1680 R32G32_SSCALED, 1681 R32G32_UINT, 1682 R32G32_UNORM, 1683 R32G32_USCALED, 1684 R32_FIXED, 1685 R32_FLOAT, 1686 R32_SINT, 1687 R32_SNORM, 1688 R32_SSCALED, 1689 R32_UINT, 1690 R32_UNORM, 1691 R32_USCALED, 1692 R4G4B4A4_UNORM, 1693 R5G5B5A1_UNORM, 1694 R5G6B5_UNORM, 1695 R8G8B8A8_SINT, 1696 R8G8B8A8_SNORM, 1697 R8G8B8A8_SSCALED, 1698 R8G8B8A8_TYPELESS, 1699 R8G8B8A8_TYPELESS_SRGB, 1700 R8G8B8A8_UINT, 1701 R8G8B8A8_UNORM, 1702 R8G8B8A8_UNORM_SRGB, 1703 R8G8B8A8_USCALED, 1704 R8G8B8X8_UNORM, 1705 R8G8B8X8_UNORM_SRGB, 1706 R8G8B8_SINT, 1707 R8G8B8_SNORM, 1708 R8G8B8_SSCALED, 1709 R8G8B8_UINT, 1710 R8G8B8_UNORM, 1711 R8G8B8_UNORM_SRGB, 1712 R8G8B8_USCALED, 1713 R8G8_SINT, 1714 R8G8_SNORM, 1715 R8G8_SSCALED, 1716 R8G8_UINT, 1717 R8G8_UNORM, 1718 R8G8_UNORM_SRGB, 1719 R8G8_USCALED, 1720 R8_SINT, 1721 R8_SNORM, 1722 R8_SSCALED, 1723 R8_UINT, 1724 R8_UNORM, 1725 R8_UNORM_SRGB, 1726 R8_USCALED, 1727 R9G9B9E5_SHAREDEXP, 1728 X2R10G10B10_SINT_VERTEX, 1729 X2R10G10B10_SNORM_VERTEX, 1730 X2R10G10B10_SSCALED_VERTEX, 1731 X2R10G10B10_UINT_VERTEX, 1732 X2R10G10B10_UNORM_VERTEX, 1733 X2R10G10B10_USCALED_VERTEX, 1734 EXTERNAL0, 1735 EXTERNAL1, 1736 EXTERNAL2, 1737 EXTERNAL3, 1738 EXTERNAL4, 1739 EXTERNAL5, 1740 EXTERNAL6, 1741 EXTERNAL7 1742 }; 1743 1744 } 1745 1746 } 1747 } 1748 # 16 ".\\copy_buffer.metal" 2 1749 1750 using namespace rx::mtl_shader; 1751 1752 constant int kCopyFormatType [[function_constant(10)]]; 1753 1754 1755 constant int kCopyTextureType [[function_constant(20)]]; 1756 constant bool kCopyTextureType2D = kCopyTextureType == kTextureType2D; 1757 constant bool kCopyTextureType2DArray = kCopyTextureType == kTextureType2DArray; 1758 constant bool kCopyTextureType2DMS = kCopyTextureType == kTextureType2DMultisample; 1759 constant bool kCopyTextureTypeCube = kCopyTextureType == kTextureTypeCube; 1760 constant bool kCopyTextureType3D = kCopyTextureType == kTextureType3D; 1761 1762 struct CopyPixelParams 1763 { 1764 uint3 copySize; 1765 uint3 textureOffset; 1766 1767 uint bufferStartOffset; 1768 uint pixelSize; 1769 uint bufferRowPitch; 1770 uint bufferDepthPitch; 1771 }; 1772 1773 struct WritePixelParams 1774 { 1775 uint2 copySize; 1776 uint2 textureOffset; 1777 1778 uint bufferStartOffset; 1779 1780 uint pixelSize; 1781 uint bufferRowPitch; 1782 1783 uint textureLevel; 1784 uint textureLayer; 1785 1786 bool reverseTextureRowOrder; 1787 }; 1788 # 120 ".\\copy_buffer.metal" 1789 template <typename T> 1790 static inline void textureWrite(ushort3 gIndices, 1791 constant CopyPixelParams &options, 1792 vec<T, 4> color, 1793 texture2d<T, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 1794 { 1795 uint3 writeIndices = options.textureOffset + uint3(gIndices); 1796 switch (kCopyTextureType) 1797 { 1798 case kTextureType2D: 1799 dstTexture2d.write(color, writeIndices.xy); 1800 break; 1801 case kTextureType2DArray: 1802 dstTexture2dArray.write(color, writeIndices.xy, writeIndices.z); 1803 break; 1804 case kTextureType3D: 1805 dstTexture3d.write(color, writeIndices); 1806 break; 1807 case kTextureTypeCube: 1808 dstTextureCube.write(color, writeIndices.xy, writeIndices.z); 1809 break; 1810 } 1811 } 1812 1813 1814 template <typename T> 1815 static inline vec<T, 4> textureRead(ushort2 gIndices, 1816 constant WritePixelParams &options, 1817 texture2d<T, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<T, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]]) 1818 { 1819 vec<T, 4> color; 1820 uint2 coords = uint2(gIndices); 1821 if (options.reverseTextureRowOrder) 1822 { 1823 coords.y = options.copySize.y - 1 - gIndices.y; 1824 } 1825 coords += options.textureOffset; 1826 switch (kCopyTextureType) 1827 { 1828 case kTextureType2D: 1829 color = srcTexture2d.read(coords.xy, options.textureLevel); 1830 break; 1831 case kTextureType2DArray: 1832 color = srcTexture2dArray.read(coords.xy, options.textureLayer, options.textureLevel); 1833 break; 1834 case kTextureType2DMultisample: 1835 color = resolveTextureMS(srcTexture2dMS, coords.xy); 1836 break; 1837 case kTextureType3D: 1838 color = srcTexture3d.read(uint3(coords, options.textureLayer), options.textureLevel); 1839 break; 1840 case kTextureTypeCube: 1841 color = srcTextureCube.read(coords.xy, options.textureLayer, options.textureLevel); 1842 break; 1843 } 1844 return color; 1845 } 1846 # 215 ".\\copy_buffer.metal" 1847 static inline float4 readR5G6B5_UNORM(uint bufferOffset, constant uchar *buffer) 1848 { 1849 float4 color; 1850 ushort src = bytesToShort<ushort>(buffer, bufferOffset); 1851 1852 color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src)); 1853 color.g = normalizedToFloat<6>(getShiftedData<6, 5>(src)); 1854 color.b = normalizedToFloat<5>(getShiftedData<5, 0>(src)); 1855 color.a = 1.0; 1856 return color; 1857 } 1858 static inline void writeR5G6B5_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1859 { 1860 ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) | 1861 shiftData<6, 5>(floatToNormalized<6, ushort>(color.g)) | 1862 shiftData<5, 0>(floatToNormalized<5, ushort>(color.b)); 1863 1864 shortToBytes(dst, bufferOffset, buffer); 1865 } 1866 1867 1868 static inline float4 readR4G4B4A4_UNORM(uint bufferOffset, constant uchar *buffer) 1869 { 1870 float4 color; 1871 ushort src = bytesToShort<ushort>(buffer, bufferOffset); 1872 1873 color.r = normalizedToFloat<4>(getShiftedData<4, 12>(src)); 1874 color.g = normalizedToFloat<4>(getShiftedData<4, 8>(src)); 1875 color.b = normalizedToFloat<4>(getShiftedData<4, 4>(src)); 1876 color.a = normalizedToFloat<4>(getShiftedData<4, 0>(src)); 1877 return color; 1878 } 1879 static inline void writeR4G4B4A4_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1880 { 1881 ushort dst = shiftData<4, 12>(floatToNormalized<4, ushort>(color.r)) | 1882 shiftData<4, 8>(floatToNormalized<4, ushort>(color.g)) | 1883 shiftData<4, 4>(floatToNormalized<4, ushort>(color.b)) | 1884 shiftData<4, 0>(floatToNormalized<4, ushort>(color.a)); 1885 ; 1886 1887 shortToBytes(dst, bufferOffset, buffer); 1888 } 1889 1890 1891 static inline float4 readR5G5B5A1_UNORM(uint bufferOffset, constant uchar *buffer) 1892 { 1893 float4 color; 1894 ushort src = bytesToShort<ushort>(buffer, bufferOffset); 1895 1896 color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src)); 1897 color.g = normalizedToFloat<5>(getShiftedData<5, 6>(src)); 1898 color.b = normalizedToFloat<5>(getShiftedData<5, 1>(src)); 1899 color.a = normalizedToFloat<1>(getShiftedData<1, 0>(src)); 1900 return color; 1901 } 1902 static inline void writeR5G5B5A1_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1903 { 1904 ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) | 1905 shiftData<5, 6>(floatToNormalized<5, ushort>(color.g)) | 1906 shiftData<5, 1>(floatToNormalized<5, ushort>(color.b)) | 1907 shiftData<1, 0>(floatToNormalized<1, ushort>(color.a)); 1908 ; 1909 1910 shortToBytes(dst, bufferOffset, buffer); 1911 } 1912 1913 1914 static inline int4 readR10G10B10A2_SINT(uint bufferOffset, constant uchar *buffer) 1915 { 1916 int4 color; 1917 int src = bytesToInt<int>(buffer, bufferOffset); 1918 1919 constexpr int3 rgbSignMask(0x200); 1920 constexpr int3 negativeMask(0xfffffc00); 1921 constexpr int alphaSignMask = 0x2; 1922 constexpr int alphaNegMask = 0xfffffffc; 1923 1924 color.r = getShiftedData<10, 0>(src); 1925 color.g = getShiftedData<10, 10>(src); 1926 color.b = getShiftedData<10, 20>(src); 1927 1928 int3 isRgbNegative = (color.rgb & rgbSignMask) >> 9; 1929 color.rgb = (isRgbNegative * negativeMask) | color.rgb; 1930 1931 color.a = getShiftedData<2, 30>(src); 1932 int isAlphaNegative = color.a & alphaSignMask >> 1; 1933 color.a = (isAlphaNegative * alphaNegMask) | color.a; 1934 return color; 1935 } 1936 1937 static inline uint4 readR10G10B10A2_UINT(uint bufferOffset, constant uchar *buffer) 1938 { 1939 uint4 color; 1940 uint src = bytesToInt<uint>(buffer, bufferOffset); 1941 1942 color.r = getShiftedData<10, 0>(src); 1943 color.g = getShiftedData<10, 10>(src); 1944 color.b = getShiftedData<10, 20>(src); 1945 color.a = getShiftedData<2, 30>(src); 1946 return color; 1947 } 1948 1949 1950 static inline float4 readR8G8B8A8(uint bufferOffset, constant uchar *buffer, bool isSRGB) 1951 { 1952 float4 color; 1953 uint src = bytesToInt<uint>(buffer, bufferOffset); 1954 1955 if (isSRGB) 1956 { 1957 color = unpack_unorm4x8_srgb_to_float(src); 1958 } 1959 else 1960 { 1961 color = unpack_unorm4x8_to_float(src); 1962 } 1963 return color; 1964 } 1965 static inline void writeR8G8B8A8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB) 1966 { 1967 uint dst; 1968 1969 if (isSRGB) 1970 { 1971 dst = pack_float_to_srgb_unorm4x8(color); 1972 } 1973 else 1974 { 1975 dst = pack_float_to_unorm4x8(color); 1976 } 1977 1978 intToBytes(dst, bufferOffset, buffer); 1979 } 1980 1981 static inline float4 readR8G8B8(uint bufferOffset, constant uchar *buffer, bool isSRGB) 1982 { 1983 float4 color; 1984 color.r = normalizedToFloat<uchar>(buffer[bufferOffset]); 1985 color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]); 1986 color.b = normalizedToFloat<uchar>(buffer[bufferOffset + 2]); 1987 color.a = 1.0; 1988 1989 if (isSRGB) 1990 { 1991 color = sRGBtoLinear(color); 1992 } 1993 return color; 1994 } 1995 static inline void writeR8G8B8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB) 1996 { 1997 color.a = 1.0; 1998 uint dst; 1999 2000 if (isSRGB) 2001 { 2002 dst = pack_float_to_srgb_unorm4x8(color); 2003 } 2004 else 2005 { 2006 dst = pack_float_to_unorm4x8(color); 2007 } 2008 int24bitToBytes(dst, bufferOffset, buffer); 2009 } 2010 2011 2012 static inline float4 readR8G8B8A8_SNORM(uint bufferOffset, constant uchar *buffer) 2013 { 2014 float4 color; 2015 uint src = bytesToInt<uint>(buffer, bufferOffset); 2016 2017 color = unpack_snorm4x8_to_float(src); 2018 2019 return color; 2020 } 2021 static inline void writeR8G8B8A8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2022 { 2023 uint dst = pack_float_to_snorm4x8(color); 2024 2025 intToBytes(dst, bufferOffset, buffer); 2026 } 2027 2028 2029 static inline float4 readR8G8B8_SNORM(uint bufferOffset, constant uchar *buffer) 2030 { 2031 float4 color; 2032 color.r = normalizedToFloat<7, char>(buffer[bufferOffset]); 2033 color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]); 2034 color.b = normalizedToFloat<7, char>(buffer[bufferOffset + 2]); 2035 color.a = 1.0; 2036 2037 return color; 2038 } 2039 static inline void writeR8G8B8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2040 { 2041 uint dst = pack_float_to_snorm4x8(color); 2042 2043 int24bitToBytes(dst, bufferOffset, buffer); 2044 } 2045 2046 2047 static inline float4 readR8G8B8A8_UNORM(uint bufferOffset, constant uchar *buffer) 2048 { 2049 return readR8G8B8A8(bufferOffset, buffer, false); 2050 } 2051 static inline void writeR8G8B8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2052 { 2053 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false); 2054 } 2055 2056 static inline float4 readR8G8B8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer) 2057 { 2058 return readR8G8B8A8(bufferOffset, buffer, true); 2059 } 2060 static inline void writeR8G8B8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2061 { 2062 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true); 2063 } 2064 2065 2066 static inline float4 readB8G8R8A8_UNORM(uint bufferOffset, constant uchar *buffer) 2067 { 2068 return readR8G8B8A8(bufferOffset, buffer, false).bgra; 2069 } 2070 static inline void writeB8G8R8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2071 { 2072 color.rgba = color.bgra; 2073 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false); 2074 } 2075 2076 static inline float4 readB8G8R8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer) 2077 { 2078 return readR8G8B8A8(bufferOffset, buffer, true).bgra; 2079 } 2080 static inline void writeB8G8R8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2081 { 2082 color.rgba = color.bgra; 2083 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true); 2084 } 2085 2086 2087 static inline float4 readR8G8B8_UNORM(uint bufferOffset, constant uchar *buffer) 2088 { 2089 return readR8G8B8(bufferOffset, buffer, false); 2090 } 2091 static inline void writeR8G8B8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2092 { 2093 return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, false); 2094 } 2095 2096 static inline float4 readR8G8B8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer) 2097 { 2098 return readR8G8B8(bufferOffset, buffer, true); 2099 } 2100 static inline void writeR8G8B8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2101 { 2102 return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, true); 2103 } 2104 2105 2106 static inline float4 readL8_UNORM(uint bufferOffset, constant uchar *buffer) 2107 { 2108 float4 color; 2109 color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset])); 2110 color.a = 1.0; 2111 return color; 2112 } 2113 static inline void writeL8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2114 { 2115 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 2116 } 2117 2118 2119 static inline void writeA8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2120 { 2121 buffer[bufferOffset] = floatToNormalized<uchar>(color.a); 2122 } 2123 2124 2125 static inline float4 readL8A8_UNORM(uint bufferOffset, constant uchar *buffer) 2126 { 2127 float4 color; 2128 color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset])); 2129 color.a = normalizedToFloat<uchar>(buffer[bufferOffset + 1]); 2130 return color; 2131 } 2132 static inline void writeL8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2133 { 2134 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 2135 buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.a); 2136 } 2137 2138 2139 static inline float4 readR8_UNORM(uint bufferOffset, constant uchar *buffer) 2140 { 2141 float4 color; 2142 color.r = normalizedToFloat<uchar>(buffer[bufferOffset]); 2143 color.g = color.b = 0.0; 2144 color.a = 1.0; 2145 return color; 2146 } 2147 static inline void writeR8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2148 { 2149 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 2150 } 2151 2152 static inline float4 readR8_SNORM(uint bufferOffset, constant uchar *buffer) 2153 { 2154 float4 color; 2155 color.r = normalizedToFloat<7, char>(buffer[bufferOffset]); 2156 color.g = color.b = 0.0; 2157 color.a = 1.0; 2158 return color; 2159 } 2160 static inline void writeR8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2161 { 2162 buffer[bufferOffset] = as_type<uchar>(floatToNormalized<char>(color.r)); 2163 } 2164 2165 2166 static inline int4 readR8_SINT(uint bufferOffset, constant uchar *buffer) 2167 { 2168 int4 color; 2169 color.r = as_type<char>(buffer[bufferOffset]); 2170 color.g = color.b = 0; 2171 color.a = 1; 2172 return color; 2173 } 2174 static inline void writeR8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2175 { 2176 buffer[bufferOffset] = static_cast<uchar>(color.r); 2177 } 2178 2179 2180 static inline uint4 readR8_UINT(uint bufferOffset, constant uchar *buffer) 2181 { 2182 uint4 color; 2183 color.r = as_type<uchar>(buffer[bufferOffset]); 2184 color.g = color.b = 0; 2185 color.a = 1; 2186 return color; 2187 } 2188 static inline void writeR8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2189 { 2190 buffer[bufferOffset] = static_cast<uchar>(color.r); 2191 } 2192 2193 2194 static inline float4 readR8G8_UNORM(uint bufferOffset, constant uchar *buffer) 2195 { 2196 float4 color; 2197 color.r = normalizedToFloat<uchar>(buffer[bufferOffset]); 2198 color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]); 2199 color.b = 0.0; 2200 color.a = 1.0; 2201 return color; 2202 } 2203 static inline void writeR8G8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2204 { 2205 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 2206 buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.g); 2207 } 2208 2209 static inline float4 readR8G8_SNORM(uint bufferOffset, constant uchar *buffer) 2210 { 2211 float4 color; 2212 color.r = normalizedToFloat<7, char>(buffer[bufferOffset]); 2213 color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]); 2214 color.b = 0.0; 2215 color.a = 1.0; 2216 return color; 2217 } 2218 static inline void writeR8G8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2219 { 2220 buffer[bufferOffset] = as_type<uchar>(floatToNormalized<char>(color.r)); 2221 buffer[bufferOffset + 1] = as_type<uchar>(floatToNormalized<char>(color.g)); 2222 } 2223 2224 2225 static inline int4 readR8G8_SINT(uint bufferOffset, constant uchar *buffer) 2226 { 2227 int4 color; 2228 color.r = as_type<char>(buffer[bufferOffset]); 2229 color.g = as_type<char>(buffer[bufferOffset + 1]); 2230 color.b = 0; 2231 color.a = 1; 2232 return color; 2233 } 2234 static inline void writeR8G8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2235 { 2236 buffer[bufferOffset] = static_cast<uchar>(color.r); 2237 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 2238 } 2239 2240 2241 static inline uint4 readR8G8_UINT(uint bufferOffset, constant uchar *buffer) 2242 { 2243 uint4 color; 2244 color.r = as_type<uchar>(buffer[bufferOffset]); 2245 color.g = as_type<uchar>(buffer[bufferOffset + 1]); 2246 color.b = 0; 2247 color.a = 1; 2248 return color; 2249 } 2250 static inline void writeR8G8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2251 { 2252 buffer[bufferOffset] = static_cast<uchar>(color.r); 2253 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 2254 } 2255 2256 2257 static inline int4 readR8G8B8_SINT(uint bufferOffset, constant uchar *buffer) 2258 { 2259 int4 color; 2260 color.r = as_type<char>(buffer[bufferOffset]); 2261 color.g = as_type<char>(buffer[bufferOffset + 1]); 2262 color.b = as_type<char>(buffer[bufferOffset + 2]); 2263 color.a = 1; 2264 return color; 2265 } 2266 2267 2268 static inline uint4 readR8G8B8_UINT(uint bufferOffset, constant uchar *buffer) 2269 { 2270 uint4 color; 2271 color.r = as_type<uchar>(buffer[bufferOffset]); 2272 color.g = as_type<uchar>(buffer[bufferOffset + 1]); 2273 color.b = as_type<uchar>(buffer[bufferOffset + 2]); 2274 color.a = 1; 2275 return color; 2276 } 2277 2278 2279 static inline int4 readR8G8B8A8_SINT(uint bufferOffset, constant uchar *buffer) 2280 { 2281 int4 color; 2282 color.r = as_type<char>(buffer[bufferOffset]); 2283 color.g = as_type<char>(buffer[bufferOffset + 1]); 2284 color.b = as_type<char>(buffer[bufferOffset + 2]); 2285 color.a = as_type<char>(buffer[bufferOffset + 3]); 2286 return color; 2287 } 2288 static inline void writeR8G8B8A8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2289 { 2290 buffer[bufferOffset] = static_cast<uchar>(color.r); 2291 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 2292 buffer[bufferOffset + 2] = static_cast<uchar>(color.b); 2293 buffer[bufferOffset + 3] = static_cast<uchar>(color.a); 2294 } 2295 2296 2297 static inline uint4 readR8G8B8A8_UINT(uint bufferOffset, constant uchar *buffer) 2298 { 2299 uint4 color; 2300 color.r = as_type<uchar>(buffer[bufferOffset]); 2301 color.g = as_type<uchar>(buffer[bufferOffset + 1]); 2302 color.b = as_type<uchar>(buffer[bufferOffset + 2]); 2303 color.a = as_type<uchar>(buffer[bufferOffset + 3]); 2304 return color; 2305 } 2306 static inline void writeR8G8B8A8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2307 { 2308 buffer[bufferOffset] = static_cast<uchar>(color.r); 2309 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 2310 buffer[bufferOffset + 2] = static_cast<uchar>(color.b); 2311 buffer[bufferOffset + 3] = static_cast<uchar>(color.a); 2312 } 2313 2314 2315 static inline float4 readR16_FLOAT(uint bufferOffset, constant uchar *buffer) 2316 { 2317 float4 color; 2318 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2319 color.g = color.b = 0.0; 2320 color.a = 1.0; 2321 return color; 2322 } 2323 static inline void writeR16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2324 { 2325 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 2326 } 2327 2328 template <typename ShortType> 2329 static inline float4 readR16_NORM(uint bufferOffset, constant uchar *buffer) 2330 { 2331 float4 color; 2332 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 2333 color.g = color.b = 0.0; 2334 color.a = 1.0; 2335 return color; 2336 } 2337 2338 2339 2340 template<typename ShortType> 2341 static inline void writeR16_NORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2342 { 2343 shortToBytes(floatToNormalized<ShortType>(color.r), bufferOffset, buffer); 2344 } 2345 2346 2347 2348 2349 static inline int4 readR16_SINT(uint bufferOffset, constant uchar *buffer) 2350 { 2351 int4 color; 2352 color.r = bytesToShort<short>(buffer, bufferOffset); 2353 color.g = color.b = 0; 2354 color.a = 1; 2355 return color; 2356 } 2357 static inline void writeR16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2358 { 2359 shortToBytes(static_cast<short>(color.r), bufferOffset, buffer); 2360 } 2361 2362 2363 static inline uint4 readR16_UINT(uint bufferOffset, constant uchar *buffer) 2364 { 2365 uint4 color; 2366 color.r = bytesToShort<ushort>(buffer, bufferOffset); 2367 color.g = color.b = 0; 2368 color.a = 1; 2369 return color; 2370 } 2371 static inline void writeR16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2372 { 2373 shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer); 2374 } 2375 2376 2377 static inline float4 readA16_FLOAT(uint bufferOffset, constant uchar *buffer) 2378 { 2379 float4 color; 2380 color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2381 color.rgb = 0.0; 2382 return color; 2383 } 2384 static inline void writeA16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2385 { 2386 shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset, buffer); 2387 } 2388 2389 2390 static inline float4 readL16_FLOAT(uint bufferOffset, constant uchar *buffer) 2391 { 2392 float4 color; 2393 color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2394 color.a = 1.0; 2395 return color; 2396 } 2397 static inline void writeL16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2398 { 2399 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 2400 } 2401 2402 2403 static inline float4 readL16A16_FLOAT(uint bufferOffset, constant uchar *buffer) 2404 { 2405 float4 color; 2406 color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2407 color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 2408 return color; 2409 } 2410 static inline void writeL16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2411 { 2412 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 2413 shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 2, buffer); 2414 } 2415 2416 2417 static inline float4 readR16G16_FLOAT(uint bufferOffset, constant uchar *buffer) 2418 { 2419 float4 color; 2420 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2421 color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 2422 color.b = 0.0; 2423 color.a = 1.0; 2424 return color; 2425 } 2426 static inline void writeR16G16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2427 { 2428 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 2429 shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer); 2430 } 2431 2432 2433 template <typename ShortType> 2434 static inline float4 readR16G16_NORM(uint bufferOffset, constant uchar *buffer) 2435 { 2436 float4 color; 2437 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 2438 color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2)); 2439 color.b = 0.0; 2440 color.a = 1.0; 2441 return color; 2442 } 2443 2444 2445 2446 template<typename ShortType> 2447 static inline void writeR16G16_NORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2448 { 2449 shortToBytes(floatToNormalized<ShortType>(color.r), bufferOffset, buffer); 2450 shortToBytes(floatToNormalized<ShortType>(color.g), bufferOffset + 2, buffer); 2451 } 2452 2453 2454 2455 2456 static inline int4 readR16G16_SINT(uint bufferOffset, constant uchar *buffer) 2457 { 2458 int4 color; 2459 color.r = bytesToShort<short>(buffer, bufferOffset); 2460 color.g = bytesToShort<short>(buffer, bufferOffset + 2); 2461 color.b = 0; 2462 color.a = 1; 2463 return color; 2464 } 2465 static inline void writeR16G16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2466 { 2467 shortToBytes(static_cast<short>(color.r), bufferOffset, buffer); 2468 shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer); 2469 } 2470 2471 2472 static inline uint4 readR16G16_UINT(uint bufferOffset, constant uchar *buffer) 2473 { 2474 uint4 color; 2475 color.r = bytesToShort<ushort>(buffer, bufferOffset); 2476 color.g = bytesToShort<ushort>(buffer, bufferOffset + 2); 2477 color.b = 0; 2478 color.a = 1; 2479 return color; 2480 } 2481 static inline void writeR16G16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2482 { 2483 shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer); 2484 shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer); 2485 } 2486 2487 2488 static inline float4 readR16G16B16_FLOAT(uint bufferOffset, constant uchar *buffer) 2489 { 2490 float4 color; 2491 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2492 color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 2493 color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4)); 2494 color.a = 1.0; 2495 return color; 2496 } 2497 2498 2499 template <typename ShortType> 2500 static inline float4 readR16G16B16_NORM(uint bufferOffset, constant uchar *buffer) 2501 { 2502 float4 color; 2503 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 2504 color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2)); 2505 color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4)); 2506 color.a = 1.0; 2507 return color; 2508 } 2509 2510 2511 2512 static inline int4 readR16G16B16_SINT(uint bufferOffset, constant uchar *buffer) 2513 { 2514 int4 color; 2515 color.r = bytesToShort<short>(buffer, bufferOffset); 2516 color.g = bytesToShort<short>(buffer, bufferOffset + 2); 2517 color.b = bytesToShort<short>(buffer, bufferOffset + 4); 2518 color.a = 1; 2519 return color; 2520 } 2521 2522 2523 static inline uint4 readR16G16B16_UINT(uint bufferOffset, constant uchar *buffer) 2524 { 2525 uint4 color; 2526 color.r = bytesToShort<ushort>(buffer, bufferOffset); 2527 color.g = bytesToShort<ushort>(buffer, bufferOffset + 2); 2528 color.b = bytesToShort<ushort>(buffer, bufferOffset + 4); 2529 color.a = 1; 2530 return color; 2531 } 2532 2533 2534 static inline float4 readR16G16B16A16_FLOAT(uint bufferOffset, constant uchar *buffer) 2535 { 2536 float4 color; 2537 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2538 color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 2539 color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4)); 2540 color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 6)); 2541 return color; 2542 } 2543 static inline void writeR16G16B16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2544 { 2545 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 2546 shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer); 2547 shortToBytes(as_type<ushort>(static_cast<half>(color.b)), bufferOffset + 4, buffer); 2548 shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 6, buffer); 2549 } 2550 2551 2552 template <typename ShortType> 2553 static inline float4 readR16G16B16A16_NORM(uint bufferOffset, constant uchar *buffer) 2554 { 2555 float4 color; 2556 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 2557 color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2)); 2558 color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4)); 2559 color.a = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 6)); 2560 return color; 2561 } 2562 2563 2564 2565 template<typename ShortType> 2566 static inline void writeR16G16B16A16_NORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2567 { 2568 shortToBytes(floatToNormalized<ShortType>(color.r), bufferOffset, buffer); 2569 shortToBytes(floatToNormalized<ShortType>(color.g), bufferOffset + 2, buffer); 2570 shortToBytes(floatToNormalized<ShortType>(color.b), bufferOffset + 4, buffer); 2571 shortToBytes(floatToNormalized<ShortType>(color.a), bufferOffset + 6, buffer); 2572 } 2573 2574 2575 2576 2577 static inline int4 readR16G16B16A16_SINT(uint bufferOffset, constant uchar *buffer) 2578 { 2579 int4 color; 2580 color.r = bytesToShort<short>(buffer, bufferOffset); 2581 color.g = bytesToShort<short>(buffer, bufferOffset + 2); 2582 color.b = bytesToShort<short>(buffer, bufferOffset + 4); 2583 color.a = bytesToShort<short>(buffer, bufferOffset + 6); 2584 return color; 2585 } 2586 static inline void writeR16G16B16A16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2587 { 2588 shortToBytes(static_cast<short>(color.r), bufferOffset, buffer); 2589 shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer); 2590 shortToBytes(static_cast<short>(color.b), bufferOffset + 4, buffer); 2591 shortToBytes(static_cast<short>(color.a), bufferOffset + 6, buffer); 2592 } 2593 2594 2595 static inline uint4 readR16G16B16A16_UINT(uint bufferOffset, constant uchar *buffer) 2596 { 2597 uint4 color; 2598 color.r = bytesToShort<ushort>(buffer, bufferOffset); 2599 color.g = bytesToShort<ushort>(buffer, bufferOffset + 2); 2600 color.b = bytesToShort<ushort>(buffer, bufferOffset + 4); 2601 color.a = bytesToShort<ushort>(buffer, bufferOffset + 6); 2602 return color; 2603 } 2604 static inline void writeR16G16B16A16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2605 { 2606 shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer); 2607 shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer); 2608 shortToBytes(static_cast<ushort>(color.b), bufferOffset + 4, buffer); 2609 shortToBytes(static_cast<ushort>(color.a), bufferOffset + 6, buffer); 2610 } 2611 2612 2613 static inline float4 readR32_FLOAT(uint bufferOffset, constant uchar *buffer) 2614 { 2615 float4 color; 2616 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2617 color.g = color.b = 0.0; 2618 color.a = 1.0; 2619 return color; 2620 } 2621 static inline void writeR32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2622 { 2623 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2624 } 2625 2626 2627 template <typename IntType> 2628 static inline float4 readR32_NORM(uint bufferOffset, constant uchar *buffer) 2629 { 2630 float4 color; 2631 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2632 color.g = color.b = 0.0; 2633 color.a = 1.0; 2634 return color; 2635 } 2636 2637 2638 2639 2640 static inline float4 readA32_FLOAT(uint bufferOffset, constant uchar *buffer) 2641 { 2642 float4 color; 2643 color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2644 color.rgb = 0.0; 2645 return color; 2646 } 2647 static inline void writeA32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2648 { 2649 intToBytes(as_type<uint>(color.a), bufferOffset, buffer); 2650 } 2651 2652 2653 static inline float4 readL32_FLOAT(uint bufferOffset, constant uchar *buffer) 2654 { 2655 float4 color; 2656 color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2657 color.a = 1.0; 2658 return color; 2659 } 2660 static inline void writeL32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2661 { 2662 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2663 } 2664 2665 2666 static inline int4 readR32_SINT(uint bufferOffset, constant uchar *buffer) 2667 { 2668 int4 color; 2669 color.r = bytesToInt<int>(buffer, bufferOffset); 2670 color.g = color.b = 0; 2671 color.a = 1; 2672 return color; 2673 } 2674 static inline void writeR32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2675 { 2676 intToBytes(color.r, bufferOffset, buffer); 2677 } 2678 2679 2680 static inline float4 readR32_FIXED(uint bufferOffset, constant uchar *buffer) 2681 { 2682 float4 color; 2683 constexpr float kDivisor = 1.0f / (1 << 16); 2684 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2685 color.g = color.b = 0.0; 2686 color.a = 1.0; 2687 return color; 2688 } 2689 2690 2691 static inline uint4 readR32_UINT(uint bufferOffset, constant uchar *buffer) 2692 { 2693 uint4 color; 2694 color.r = bytesToInt<uint>(buffer, bufferOffset); 2695 color.g = color.b = 0; 2696 color.a = 1; 2697 return color; 2698 } 2699 static inline void writeR32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2700 { 2701 intToBytes(color.r, bufferOffset, buffer); 2702 } 2703 2704 2705 static inline float4 readL32A32_FLOAT(uint bufferOffset, constant uchar *buffer) 2706 { 2707 float4 color; 2708 color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2709 color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2710 return color; 2711 } 2712 static inline void writeL32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2713 { 2714 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2715 intToBytes(as_type<uint>(color.a), bufferOffset + 4, buffer); 2716 } 2717 2718 2719 static inline float4 readR32G32_FLOAT(uint bufferOffset, constant uchar *buffer) 2720 { 2721 float4 color; 2722 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2723 color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2724 color.b = 0.0; 2725 color.a = 1.0; 2726 return color; 2727 } 2728 static inline void writeR32G32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2729 { 2730 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2731 intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer); 2732 } 2733 2734 2735 template <typename IntType> 2736 static inline float4 readR32G32_NORM(uint bufferOffset, constant uchar *buffer) 2737 { 2738 float4 color; 2739 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2740 color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4)); 2741 color.b = 0.0; 2742 color.a = 1.0; 2743 return color; 2744 } 2745 2746 2747 2748 2749 static inline int4 readR32G32_SINT(uint bufferOffset, constant uchar *buffer) 2750 { 2751 int4 color; 2752 color.r = bytesToInt<int>(buffer, bufferOffset); 2753 color.g = bytesToInt<int>(buffer, bufferOffset + 4); 2754 color.b = 0; 2755 color.a = 1; 2756 return color; 2757 } 2758 static inline void writeR32G32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2759 { 2760 intToBytes(color.r, bufferOffset, buffer); 2761 intToBytes(color.g, bufferOffset + 4, buffer); 2762 } 2763 2764 2765 static inline float4 readR32G32_FIXED(uint bufferOffset, constant uchar *buffer) 2766 { 2767 float4 color; 2768 constexpr float kDivisor = 1.0f / (1 << 16); 2769 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2770 color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor; 2771 color.b = 0.0; 2772 color.a = 1.0; 2773 return color; 2774 } 2775 2776 2777 static inline uint4 readR32G32_UINT(uint bufferOffset, constant uchar *buffer) 2778 { 2779 uint4 color; 2780 color.r = bytesToInt<uint>(buffer, bufferOffset); 2781 color.g = bytesToInt<uint>(buffer, bufferOffset + 4); 2782 color.b = 0; 2783 color.a = 1; 2784 return color; 2785 } 2786 static inline void writeR32G32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2787 { 2788 intToBytes(color.r, bufferOffset, buffer); 2789 intToBytes(color.g, bufferOffset + 4, buffer); 2790 } 2791 2792 2793 static inline float4 readR32G32B32_FLOAT(uint bufferOffset, constant uchar *buffer) 2794 { 2795 float4 color; 2796 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2797 color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2798 color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8)); 2799 color.a = 1.0; 2800 return color; 2801 } 2802 2803 2804 template <typename IntType> 2805 static inline float4 readR32G32B32_NORM(uint bufferOffset, constant uchar *buffer) 2806 { 2807 float4 color; 2808 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2809 color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4)); 2810 color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8)); 2811 color.a = 1.0; 2812 return color; 2813 } 2814 2815 2816 2817 2818 static inline int4 readR32G32B32_SINT(uint bufferOffset, constant uchar *buffer) 2819 { 2820 int4 color; 2821 color.r = bytesToInt<int>(buffer, bufferOffset); 2822 color.g = bytesToInt<int>(buffer, bufferOffset + 4); 2823 color.b = bytesToInt<int>(buffer, bufferOffset + 8); 2824 color.a = 1; 2825 return color; 2826 } 2827 2828 2829 static inline float4 readR32G32B32_FIXED(uint bufferOffset, constant uchar *buffer) 2830 { 2831 float4 color; 2832 constexpr float kDivisor = 1.0f / (1 << 16); 2833 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2834 color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor; 2835 color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor; 2836 color.a = 1.0; 2837 return color; 2838 } 2839 2840 2841 static inline uint4 readR32G32B32_UINT(uint bufferOffset, constant uchar *buffer) 2842 { 2843 uint4 color; 2844 color.r = bytesToInt<uint>(buffer, bufferOffset); 2845 color.g = bytesToInt<uint>(buffer, bufferOffset + 4); 2846 color.b = bytesToInt<uint>(buffer, bufferOffset + 8); 2847 color.a = 1; 2848 return color; 2849 } 2850 2851 2852 static inline float4 readR32G32B32A32_FLOAT(uint bufferOffset, constant uchar *buffer) 2853 { 2854 float4 color; 2855 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2856 color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2857 color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8)); 2858 color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 12)); 2859 return color; 2860 } 2861 static inline void writeR32G32B32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2862 { 2863 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2864 intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer); 2865 intToBytes(as_type<uint>(color.b), bufferOffset + 8, buffer); 2866 intToBytes(as_type<uint>(color.a), bufferOffset + 12, buffer); 2867 } 2868 2869 2870 template <typename IntType> 2871 static inline float4 readR32G32B32A32_NORM(uint bufferOffset, constant uchar *buffer) 2872 { 2873 float4 color; 2874 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2875 color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4)); 2876 color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8)); 2877 color.a = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 12)); 2878 return color; 2879 } 2880 2881 2882 2883 2884 static inline int4 readR32G32B32A32_SINT(uint bufferOffset, constant uchar *buffer) 2885 { 2886 int4 color; 2887 color.r = bytesToInt<int>(buffer, bufferOffset); 2888 color.g = bytesToInt<int>(buffer, bufferOffset + 4); 2889 color.b = bytesToInt<int>(buffer, bufferOffset + 8); 2890 color.a = bytesToInt<int>(buffer, bufferOffset + 12); 2891 return color; 2892 } 2893 static inline void writeR32G32B32A32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2894 { 2895 intToBytes(color.r, bufferOffset, buffer); 2896 intToBytes(color.g, bufferOffset + 4, buffer); 2897 intToBytes(color.b, bufferOffset + 8, buffer); 2898 intToBytes(color.a, bufferOffset + 12, buffer); 2899 } 2900 2901 static inline float4 readR32G32B32A32_FIXED(uint bufferOffset, constant uchar *buffer) 2902 { 2903 float4 color; 2904 constexpr float kDivisor = 1.0f / (1 << 16); 2905 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2906 color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor; 2907 color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor; 2908 color.a = bytesToInt<int>(buffer, bufferOffset + 12) * kDivisor; 2909 return color; 2910 } 2911 2912 2913 static inline uint4 readR32G32B32A32_UINT(uint bufferOffset, constant uchar *buffer) 2914 { 2915 uint4 color; 2916 color.r = bytesToInt<uint>(buffer, bufferOffset); 2917 color.g = bytesToInt<uint>(buffer, bufferOffset + 4); 2918 color.b = bytesToInt<uint>(buffer, bufferOffset + 8); 2919 color.a = bytesToInt<uint>(buffer, bufferOffset + 12); 2920 return color; 2921 } 2922 static inline void writeR32G32B32A32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2923 { 2924 intToBytes(color.r, bufferOffset, buffer); 2925 intToBytes(color.g, bufferOffset + 4, buffer); 2926 intToBytes(color.b, bufferOffset + 8, buffer); 2927 intToBytes(color.a, bufferOffset + 12, buffer); 2928 } 2929 # 1320 ".\\copy_buffer.metal" 2930 static inline int4 readR8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8_SINT(bufferOffset, buffer); } static inline uint4 readR8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8_UINT(bufferOffset, buffer); } static inline int4 readR8G8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8A8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8A8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_UINT(bufferOffset, buffer); } 2931 static inline int4 readR16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16_SINT(bufferOffset, buffer); } static inline uint4 readR16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16_UINT(bufferOffset, buffer); } static inline int4 readR16G16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16A16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16A16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_UINT(bufferOffset, buffer); } 2932 static inline int4 readR32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32_SINT(bufferOffset, buffer); } static inline uint4 readR32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32_UINT(bufferOffset, buffer); } static inline int4 readR32G32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32A32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32A32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_UINT(bufferOffset, buffer); } 2933 2934 static inline int4 readR10G10B10A2_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_SINT(bufferOffset, buffer); } static inline uint4 readR10G10B10A2_USCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_UINT(bufferOffset, buffer); } 2935 2936 2937 kernel void readFromBufferToFloatTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<float, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 2938 { 2939 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; } 2940 # 1372 ".\\copy_buffer.metal" 2941 uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2942 2943 switch (kCopyFormatType) 2944 { 2945 case FormatID::R5G6B5_UNORM: { auto color = readR5G6B5_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM: { auto color = readR8G8B8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = readR8G8B8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SNORM: { auto color = readR8G8B8A8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM: { auto color = readB8G8R8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = readB8G8R8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM: { auto color = readR8G8B8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = readR8G8B8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SNORM: { auto color = readR8G8B8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8_UNORM: { auto color = readL8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8A8_UNORM: { auto color = readL8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R5G5B5A1_UNORM: { auto color = readR5G5B5A1_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R4G4B4A4_UNORM: { auto color = readR4G4B4A4_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_UNORM: { auto color = readR8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_SNORM: { auto color = readR8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UNORM: { auto color = readR8G8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SNORM: { auto color = readR8G8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_FLOAT: { auto color = readR16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_SNORM: { auto color = readR16_NORM<short>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_UNORM: { auto color = readR16_NORM<ushort>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A16_FLOAT: { auto color = readA16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16_FLOAT: { auto color = readL16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16A16_FLOAT: { auto color = readL16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_FLOAT: { auto color = readR16G16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_SNORM: { auto color = readR16G16_NORM<short>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_UNORM: { auto color = readR16G16_NORM<ushort>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_FLOAT: { auto color = readR16G16B16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_SNORM: { auto color = readR16G16B16_NORM<short>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_UNORM: { auto color = readR16G16B16_NORM<ushort>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = readR16G16B16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_SNORM: { auto color = readR16G16B16A16_NORM<short>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_UNORM: { auto color = readR16G16B16A16_NORM<ushort>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_FLOAT: { auto color = readR32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A32_FLOAT: { auto color = readA32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32_FLOAT: { auto color = readL32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32A32_FLOAT: { auto color = readL32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_FLOAT: { auto color = readR32G32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_FLOAT: { auto color = readR32G32B32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = readR32G32B32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; 2946 } 2947 2948 2949 } 2950 2951 kernel void readFromBufferToIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<int, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 2952 { 2953 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; } 2954 # 1400 ".\\copy_buffer.metal" 2955 uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2956 2957 switch (kCopyFormatType) 2958 { 2959 case FormatID::R8_SINT: { auto color = readR8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SINT: { auto color = readR8G8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SINT: { auto color = readR8G8B8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SINT: { auto color = readR8G8B8A8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_SINT: { auto color = readR16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_SINT: { auto color = readR16G16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_SINT: { auto color = readR16G16B16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_SINT: { auto color = readR16G16B16A16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_SINT: { auto color = readR32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_SINT: { auto color = readR32G32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_SINT: { auto color = readR32G32B32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_SINT: { auto color = readR32G32B32A32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; 2960 } 2961 2962 2963 } 2964 2965 kernel void readFromBufferToUIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<uint, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 2966 { 2967 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; } 2968 # 1428 ".\\copy_buffer.metal" 2969 uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2970 2971 switch (kCopyFormatType) 2972 { 2973 case FormatID::R8_UINT: { auto color = readR8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UINT: { auto color = readR8G8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UINT: { auto color = readR8G8B8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UINT: { auto color = readR8G8B8A8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_UINT: { auto color = readR16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_UINT: { auto color = readR16G16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_UINT: { auto color = readR16G16B16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_UINT: { auto color = readR16G16B16A16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_UINT: { auto color = readR32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_UINT: { auto color = readR32G32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_UINT: { auto color = readR32G32B32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_UINT: { auto color = readR32G32B32A32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; 2974 } 2975 2976 2977 } 2978 2979 2980 kernel void writeFromFloatTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<float, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<float, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]]) 2981 { 2982 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; } 2983 # 1481 ".\\copy_buffer.metal" 2984 uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2985 2986 switch (kCopyFormatType) 2987 { 2988 case FormatID::R5G6B5_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G6B5_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R5G5B5A1_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G5B5A1_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R4G4B4A4_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR4G4B4A4_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_NORM<short>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_NORM<ushort>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_NORM<short>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_NORM<ushort>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_NORM<short>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_NORM<ushort>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; 2989 } 2990 2991 2992 } 2993 2994 kernel void writeFromIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<int, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<int, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]]) 2995 { 2996 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; } 2997 # 1506 ".\\copy_buffer.metal" 2998 uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2999 3000 switch (kCopyFormatType) 3001 { 3002 case FormatID::R8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_SINT(gIndices, options, bufferOffset, color, buffer); } break; 3003 } 3004 3005 3006 } 3007 3008 kernel void writeFromUIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<uint, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<uint, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]]) 3009 { 3010 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; } 3011 # 1531 ".\\copy_buffer.metal" 3012 uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 3013 3014 switch (kCopyFormatType) 3015 { 3016 case FormatID::R8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_UINT(gIndices, options, bufferOffset, color, buffer); } break; 3017 } 3018 3019 3020 } 3021 3022 3023 struct CopyVertexParams 3024 { 3025 uint srcBufferStartOffset; 3026 uint srcStride; 3027 uint srcComponentBytes; 3028 uint srcComponents; 3029 3030 3031 3032 uchar4 srcDefaultAlphaData; 3033 3034 uint dstBufferStartOffset; 3035 uint dstStride; 3036 uint dstComponents; 3037 3038 uint vertexCount; 3039 }; 3040 # 1581 ".\\copy_buffer.metal" 3041 template <typename IntType> 3042 static inline void writeFloatVertex(constant CopyVertexParams &options, 3043 uint idx, 3044 vec<IntType, 4> data, 3045 device uchar *dst) 3046 { 3047 uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset; 3048 3049 for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4) 3050 { 3051 floatToBytes(static_cast<float>(data[component]), dstOffset, dst); 3052 } 3053 } 3054 3055 template <> 3056 inline void writeFloatVertex(constant CopyVertexParams &options, 3057 uint idx, 3058 vec<float, 4> data, 3059 device uchar *dst) 3060 { 3061 uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset; 3062 3063 for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4) 3064 { 3065 floatToBytes(data[component], dstOffset, dst); 3066 } 3067 } 3068 3069 3070 static inline void convertToFloatVertexFormat(uint index, 3071 constant CopyVertexParams &options, 3072 constant uchar *srcBuffer, 3073 device uchar *dstBuffer) 3074 { 3075 # 1627 ".\\copy_buffer.metal" 3076 uint bufferOffset = options.srcBufferStartOffset + options.srcStride * index; 3077 # 1636 ".\\copy_buffer.metal" 3078 switch (kCopyFormatType) 3079 { 3080 case FormatID::R8_UNORM: { auto data = readR8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SNORM: { auto data = readR8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_UINT: { auto data = readR8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SINT: { auto data = readR8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_USCALED: { auto data = readR8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SSCALED: { auto data = readR8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UNORM: { auto data = readR8G8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SNORM: { auto data = readR8G8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UINT: { auto data = readR8G8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SINT: { auto data = readR8G8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_USCALED: { auto data = readR8G8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SSCALED: { auto data = readR8G8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UNORM: { auto data = readR8G8B8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SNORM: { auto data = readR8G8B8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UINT: { auto data = readR8G8B8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SINT: { auto data = readR8G8B8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_USCALED: { auto data = readR8G8B8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SSCALED: { auto data = readR8G8B8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UNORM: { auto data = readR8G8B8A8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SNORM: { auto data = readR8G8B8A8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UINT: { auto data = readR8G8B8A8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SINT: { auto data = readR8G8B8A8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_USCALED: { auto data = readR8G8B8A8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SSCALED: { auto data = readR8G8B8A8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UNORM: { auto data = readR16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SNORM: { auto data = readR16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UINT: { auto data = readR16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SINT: { auto data = readR16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_USCALED: { auto data = readR16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SSCALED: { auto data = readR16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UNORM: { auto data = readR16G16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SNORM: { auto data = readR16G16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UINT: { auto data = readR16G16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SINT: { auto data = readR16G16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_USCALED: { auto data = readR16G16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SSCALED: { auto data = readR16G16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UNORM: { auto data = readR16G16B16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SNORM: { auto data = readR16G16B16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UINT: { auto data = readR16G16B16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SINT: { auto data = readR16G16B16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_USCALED: { auto data = readR16G16B16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SSCALED: { auto data = readR16G16B16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UNORM: { auto data = readR16G16B16A16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SNORM: { auto data = readR16G16B16A16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UINT: { auto data = readR16G16B16A16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SINT: { auto data = readR16G16B16A16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_USCALED: { auto data = readR16G16B16A16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SSCALED: { auto data = readR16G16B16A16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UNORM: { auto data = readR32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SNORM: { auto data = readR32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UINT: { auto data = readR32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SINT: { auto data = readR32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_USCALED: { auto data = readR32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SSCALED: { auto data = readR32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UNORM: { auto data = readR32G32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SNORM: { auto data = readR32G32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UINT: { auto data = readR32G32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SINT: { auto data = readR32G32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_USCALED: { auto data = readR32G32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SSCALED: { auto data = readR32G32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UNORM: { auto data = readR32G32B32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SNORM: { auto data = readR32G32B32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UINT: { auto data = readR32G32B32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SINT: { auto data = readR32G32B32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_USCALED: { auto data = readR32G32B32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SSCALED: { auto data = readR32G32B32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UNORM: { auto data = readR32G32B32A32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SNORM: { auto data = readR32G32B32A32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UINT: { auto data = readR32G32B32A32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SINT: { auto data = readR32G32B32A32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_USCALED: { auto data = readR32G32B32A32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SSCALED: { auto data = readR32G32B32A32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_FLOAT: { auto data = readR16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_FLOAT: { auto data = readR16G16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_FLOAT: { auto data = readR16G16B16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto data = readR16G16B16A16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FLOAT: { auto data = readR32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FLOAT: { auto data = readR32G32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FLOAT: { auto data = readR32G32B32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto data = readR32G32B32A32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FIXED: { auto data = readR32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FIXED: { auto data = readR32G32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FIXED: { auto data = readR32G32B32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FIXED: { auto data = readR32G32B32A32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SINT: { auto data = readR10G10B10A2_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_UINT: { auto data = readR10G10B10A2_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SSCALED: { auto data = readR10G10B10A2_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_USCALED: { auto data = readR10G10B10A2_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; 3081 } 3082 3083 3084 } 3085 3086 3087 kernel void convertToFloatVertexFormatCS(uint index [[thread_position_in_grid]], 3088 constant CopyVertexParams &options [[buffer(0)]], 3089 constant uchar *srcBuffer [[buffer(1)]], 3090 device uchar *dstBuffer [[buffer(2)]]) 3091 { 3092 if (index >= options.vertexCount) { return; }; 3093 convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer); 3094 } 3095 3096 3097 vertex void convertToFloatVertexFormatVS(uint index [[vertex_id]], 3098 constant CopyVertexParams &options [[buffer(0)]], 3099 constant uchar *srcBuffer [[buffer(1)]], 3100 device uchar *dstBuffer [[buffer(2)]]) 3101 { 3102 convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer); 3103 } 3104 3105 3106 static inline void expandVertexFormatComponents(uint index, 3107 constant CopyVertexParams &options, 3108 constant uchar *srcBuffer, 3109 device uchar *dstBuffer) 3110 { 3111 uint srcOffset = options.srcBufferStartOffset + options.srcStride * index; 3112 uint dstOffset = options.dstBufferStartOffset + options.dstStride * index; 3113 3114 uint dstComponentsBeforeAlpha = min(options.dstComponents, 3u); 3115 uint component; 3116 for (component = 0; component < options.srcComponents; ++component, 3117 srcOffset += options.srcComponentBytes, dstOffset += options.srcComponentBytes) 3118 { 3119 for (uint byte = 0; byte < options.srcComponentBytes; ++byte) 3120 { 3121 dstBuffer[dstOffset + byte] = srcBuffer[srcOffset + byte]; 3122 } 3123 } 3124 3125 for (; component < dstComponentsBeforeAlpha; 3126 ++component, dstOffset += options.srcComponentBytes) 3127 { 3128 for (uint byte = 0; byte < options.srcComponentBytes; ++byte) 3129 { 3130 dstBuffer[dstOffset + byte] = 0; 3131 } 3132 } 3133 3134 if (component < options.dstComponents) 3135 { 3136 3137 for (uint byte = 0; byte < options.srcComponentBytes; ++byte) 3138 { 3139 dstBuffer[dstOffset + byte] = options.srcDefaultAlphaData[byte]; 3140 } 3141 } 3142 } 3143 3144 3145 kernel void expandVertexFormatComponentsCS(uint index [[thread_position_in_grid]], 3146 constant CopyVertexParams &options [[buffer(0)]], 3147 constant uchar *srcBuffer [[buffer(1)]], 3148 device uchar *dstBuffer [[buffer(2)]]) 3149 { 3150 if (index >= options.vertexCount) { return; }; 3151 3152 expandVertexFormatComponents(index, options, srcBuffer, dstBuffer); 3153 } 3154 3155 3156 vertex void expandVertexFormatComponentsVS(uint index [[vertex_id]], 3157 constant CopyVertexParams &options [[buffer(0)]], 3158 constant uchar *srcBuffer [[buffer(1)]], 3159 device uchar *dstBuffer [[buffer(2)]]) 3160 { 3161 expandVertexFormatComponents(index, options, srcBuffer, dstBuffer); 3162 } 3163 3164 3165 kernel void linearizeBlocks(ushort2 position [[thread_position_in_grid]], 3166 constant uint2 *dimensions [[buffer(0)]], 3167 constant uint2 *srcBuffer [[buffer(1)]], 3168 device uint2 *dstBuffer [[buffer(2)]]) 3169 { 3170 if (any(uint2(position) >= *dimensions)) 3171 { 3172 return; 3173 } 3174 uint2 t = uint2(position); 3175 t = (t | (t << 8)) & 0x00FF00FF; 3176 t = (t | (t << 4)) & 0x0F0F0F0F; 3177 t = (t | (t << 2)) & 0x33333333; 3178 t = (t | (t << 1)) & 0x55555555; 3179 dstBuffer[position.y * (*dimensions).x + position.x] = srcBuffer[(t.x << 1) | t.y]; 3180 } 3181 3182 3183 kernel void saturateDepth(uint2 position [[thread_position_in_grid]], 3184 constant uint3 *dimensions [[buffer(0)]], 3185 device float *srcBuffer [[buffer(1)]], 3186 device float *dstBuffer [[buffer(2)]]) 3187 { 3188 if (any(position >= (*dimensions).xy)) 3189 { 3190 return; 3191 } 3192 const uint srcOffset = position.y * (*dimensions).z + position.x; 3193 const uint dstOffset = position.y * (*dimensions).x + position.x; 3194 dstBuffer[dstOffset] = saturate(srcBuffer[srcOffset]); 3195 } 3196 # 6 "temp_master_source.metal" 2 3197 # 1 ".\\visibility.metal" 1 3198 3199 3200 3201 3202 3203 3204 3205 3206 constant bool kCombineWithExistingResult [[function_constant(1000)]]; 3207 3208 3209 3210 struct CombineVisibilityResultOptions 3211 { 3212 3213 uint startOffset; 3214 3215 uint numOffsets; 3216 }; 3217 3218 kernel void combineVisibilityResult(uint idx [[thread_position_in_grid]], 3219 constant CombineVisibilityResultOptions &options [[buffer(0)]], 3220 constant ushort4 *renderpassVisibilityResult [[buffer(1)]], 3221 device ushort4 *finalResults [[buffer(2)]]) 3222 { 3223 if (idx > 0) 3224 { 3225 3226 3227 3228 return; 3229 } 3230 ushort4 finalResult16x4; 3231 3232 if (kCombineWithExistingResult) 3233 { 3234 finalResult16x4 = finalResults[0]; 3235 } 3236 else 3237 { 3238 finalResult16x4 = ushort4(0, 0, 0, 0); 3239 } 3240 3241 for (uint i = 0; i < options.numOffsets; ++i) 3242 { 3243 uint offset = options.startOffset + i; 3244 ushort4 renderpassResult = renderpassVisibilityResult[offset]; 3245 3246 3247 finalResult16x4 = finalResult16x4 | renderpassResult; 3248 } 3249 finalResults[0] = finalResult16x4; 3250 } 3251 # 7 "temp_master_source.metal" 2 3252 # 1 ".\\rewrite_indices.metal" 1 3253 # 11 ".\\rewrite_indices.metal" 3254 # 1 ".\\rewrite_indices_shared.h" 1 3255 # 12 ".\\rewrite_indices.metal" 2 3256 using namespace metal; 3257 3258 constant uint fixIndexBufferKey [[ function_constant(2000) ]]; 3259 constant bool indexBufferIsUint16 = (((fixIndexBufferKey >> 0U) & 0x03U) == 2U); 3260 constant bool indexBufferIsUint32 = (((fixIndexBufferKey >> 0U) & 0x03U) == 3U); 3261 constant bool outIndexBufferIsUint16 = (((fixIndexBufferKey >> 2U) & 0x03U) == 2U); 3262 constant bool outIndexBufferIsUint32 = (((fixIndexBufferKey >> 2U) & 0x03U) == 3U); 3263 constant bool doPrimRestart = (fixIndexBufferKey & 0x00100U); 3264 constant uint fixIndexBufferMode = (fixIndexBufferKey >> 4U) & 0x0FU; 3265 3266 3267 static inline uint readIdx( 3268 const device ushort *indexBufferUint16, 3269 const device uint *indexBufferUint32, 3270 const uint restartIndex, 3271 const uint indexCount, 3272 uint idx, 3273 thread bool &foundRestart, 3274 thread uint &indexThatRestartedFirst 3275 ) 3276 { 3277 uint inIndex = idx; 3278 if(inIndex < indexCount) 3279 { 3280 if(indexBufferIsUint16) 3281 { 3282 inIndex = indexBufferUint16[inIndex]; 3283 } 3284 else if(indexBufferIsUint32) 3285 { 3286 inIndex = indexBufferUint32[inIndex]; 3287 } 3288 } 3289 else 3290 { 3291 foundRestart = true; 3292 indexThatRestartedFirst = idx; 3293 } 3294 if(doPrimRestart && !foundRestart && inIndex == restartIndex) 3295 { 3296 foundRestart = true; 3297 indexThatRestartedFirst = idx; 3298 } 3299 return inIndex; 3300 } 3301 3302 static inline void outputPrimitive( 3303 const device ushort *indexBufferUint16, 3304 const device uint *indexBufferUint32, 3305 device ushort *outIndexBufferUint16, 3306 device uint *outIndexBufferUint32, 3307 const uint restartIndex, 3308 const uint indexCount, 3309 thread uint &baseIndex, 3310 uint onIndex, 3311 thread uint &onOutIndex 3312 ) 3313 { 3314 if(baseIndex > onIndex) return; 3315 bool foundRestart = false; 3316 uint indexThatRestartedFirst = 0; 3317 # 86 ".\\rewrite_indices.metal" 3318 switch(fixIndexBufferMode) 3319 { 3320 case 0x00U: 3321 { 3322 auto tmpIndex = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex, foundRestart, indexThatRestartedFirst); 3323 if(foundRestart) 3324 { 3325 baseIndex = indexThatRestartedFirst + 1; 3326 return; 3327 } 3328 3329 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex; } onOutIndex++; }); 3330 } 3331 break; 3332 case 0x01U: 3333 { 3334 auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst); 3335 auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst); 3336 if(foundRestart) 3337 { 3338 baseIndex = indexThatRestartedFirst + 1; 3339 return; 3340 } 3341 if((onIndex - baseIndex) & 1) return; 3342 3343 if(fixIndexBufferKey & 0x00200U) 3344 { 3345 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3346 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3347 } 3348 else 3349 { 3350 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3351 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3352 } 3353 } 3354 break; 3355 case 0x03U: 3356 { 3357 auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst); 3358 auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst); 3359 if(foundRestart) 3360 { 3361 baseIndex = indexThatRestartedFirst + 1; 3362 return; 3363 } 3364 3365 if(fixIndexBufferKey & 0x00200U) 3366 { 3367 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3368 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3369 } 3370 else 3371 { 3372 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3373 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3374 } 3375 } 3376 break; 3377 case 0x04U: 3378 { 3379 auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst); 3380 auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst); 3381 auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst); 3382 if(foundRestart) 3383 { 3384 baseIndex = indexThatRestartedFirst + 1; 3385 return; 3386 } 3387 if(((onIndex - baseIndex) % 3) != 0) return; 3388 3389 if(fixIndexBufferKey & 0x00200U) 3390 { 3391 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 3392 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3393 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3394 } 3395 else 3396 { 3397 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3398 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3399 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 3400 } 3401 } 3402 break; 3403 case 0x05U: 3404 { 3405 uint isOdd = ((onIndex - baseIndex) & 1); 3406 auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0 + isOdd, foundRestart, indexThatRestartedFirst); 3407 auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1 - isOdd, foundRestart, indexThatRestartedFirst); 3408 auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst); 3409 if(foundRestart) 3410 { 3411 baseIndex = indexThatRestartedFirst + 1; 3412 return; 3413 } 3414 3415 if(fixIndexBufferKey & 0x00200U) 3416 { 3417 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 3418 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3419 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3420 } 3421 else 3422 { 3423 3424 if(isOdd) 3425 { 3426 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3427 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 3428 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3429 } 3430 else 3431 { 3432 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3433 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3434 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 3435 } 3436 } 3437 3438 assert(onOutIndex <= (onIndex + 1) * 3); 3439 assert(onOutIndex <= (indexCount - 2) * 3); 3440 } 3441 break; 3442 3443 } 3444 3445 3446 } 3447 3448 kernel void fixIndexBuffer( 3449 const device ushort *indexBufferUint16 [[ buffer(0), function_constant(indexBufferIsUint16) ]], 3450 const device uint *indexBufferUint32 [[ buffer(0), function_constant(indexBufferIsUint32) ]], 3451 device ushort *outIndexBufferUint16 [[ buffer(1), function_constant(outIndexBufferIsUint16) ]], 3452 device uint *outIndexBufferUint32 [[ buffer(1), function_constant(outIndexBufferIsUint32) ]], 3453 constant uint &indexCount [[ buffer(2) ]], 3454 constant uint &primCount [[ buffer(3) ]], 3455 uint prim [[thread_position_in_grid]]) 3456 { 3457 constexpr uint restartIndex = 0xFFFFFFFF; 3458 uint baseIndex = 0; 3459 uint onIndex = onIndex; 3460 uint onOutIndex = onOutIndex; 3461 if(prim < primCount) 3462 { 3463 switch(fixIndexBufferMode) 3464 { 3465 case 0x00U: 3466 onIndex = prim; 3467 onOutIndex = prim; 3468 break; 3469 case 0x01U: 3470 onIndex = prim * 2; 3471 onOutIndex = prim * 2; 3472 break; 3473 case 0x03U: 3474 onIndex = prim; 3475 onOutIndex = prim * 2; 3476 break; 3477 case 0x04U: 3478 onIndex = prim * 3; 3479 onOutIndex = prim * 3; 3480 break; 3481 case 0x05U: 3482 onIndex = prim; 3483 onOutIndex = prim * 3; 3484 break; 3485 } 3486 outputPrimitive(indexBufferUint16, indexBufferUint32, outIndexBufferUint16, outIndexBufferUint32, restartIndex, indexCount, baseIndex, onIndex, onOutIndex); 3487 } 3488 } 3489 3490 3491 3492 static inline void generatePrimitive( 3493 device ushort *outIndexBufferUint16, 3494 device uint *outIndexBufferUint32, 3495 const uint firstVertex, 3496 const uint indexCount, 3497 thread uint &baseIndex, 3498 uint onIndex, 3499 uint primCount, 3500 thread uint &onOutIndex 3501 ) 3502 { 3503 if(baseIndex > onIndex) return; 3504 # 284 ".\\rewrite_indices.metal" 3505 switch(fixIndexBufferMode) 3506 { 3507 case 0x00U: 3508 { 3509 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = onIndex + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = onIndex + firstVertex; } onOutIndex++; }); 3510 } 3511 break; 3512 case 0x01U: 3513 { 3514 auto tmpIndex0 = onIndex + 0; 3515 auto tmpIndex1 = onIndex + 1; 3516 if(fixIndexBufferKey & 0x00200U) 3517 { 3518 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3519 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3520 } 3521 else 3522 { 3523 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3524 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3525 } 3526 } 3527 break; 3528 case 0x02U: 3529 { 3530 auto tmpIndex0 = onIndex + 0; 3531 auto tmpIndex1 = (onIndex + 1) % primCount; 3532 if(fixIndexBufferKey & 0x00200U) 3533 { 3534 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3535 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3536 } 3537 else 3538 { 3539 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3540 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3541 } 3542 } 3543 break; 3544 case 0x03U: 3545 { 3546 auto tmpIndex0 = onIndex + 0; 3547 auto tmpIndex1 = onIndex + 1; 3548 if(fixIndexBufferKey & 0x00200U) 3549 { 3550 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3551 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3552 } 3553 else 3554 { 3555 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3556 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3557 } 3558 } 3559 break; 3560 case 0x04U: 3561 { 3562 auto tmpIndex0 = onIndex + 0; 3563 auto tmpIndex1 = onIndex + 1; 3564 auto tmpIndex2 = onIndex + 2; 3565 if(fixIndexBufferKey & 0x00200U) 3566 { 3567 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3568 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3569 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3570 } 3571 else 3572 { 3573 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3574 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3575 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3576 } 3577 } 3578 break; 3579 case 0x05U: 3580 { 3581 uint isOdd = ((onIndex - baseIndex) & 1); 3582 auto tmpIndex0 = onIndex + 0 + isOdd; 3583 auto tmpIndex1 = onIndex + 1 - isOdd; 3584 auto tmpIndex2 = onIndex + 2; 3585 if(fixIndexBufferKey & 0x00200U) 3586 { 3587 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3588 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3589 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3590 } 3591 else 3592 { 3593 if(isOdd) 3594 { 3595 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3596 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3597 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3598 } 3599 else 3600 { 3601 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3602 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3603 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3604 } 3605 } 3606 3607 assert(onOutIndex <= (onIndex + 1) * 3); 3608 assert(onOutIndex <= (indexCount - 2) * 3); 3609 break; 3610 } 3611 case 0x06U: 3612 { 3613 auto tmpIndex0 = 0; 3614 auto tmpIndex1 = onIndex + 1; 3615 auto tmpIndex2 = onIndex + 2; 3616 3617 if(fixIndexBufferKey & 0x00200U) 3618 { 3619 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3620 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3621 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3622 } 3623 else 3624 { 3625 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3626 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3627 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3628 } 3629 } 3630 break; 3631 3632 } 3633 3634 } 3635 3636 3637 3638 kernel void genIndexBuffer( 3639 device ushort *outIndexBufferUint16 [[ buffer(1), function_constant(outIndexBufferIsUint16) ]], 3640 device uint *outIndexBufferUint32 [[ buffer(1), function_constant(outIndexBufferIsUint32) ]], 3641 constant uint &indexCount [[ buffer(2) ]], 3642 constant uint &primCount [[ buffer(3) ]], 3643 constant uint &firstVertex [[ buffer(4) ]], 3644 uint prim [[thread_position_in_grid]]) 3645 { 3646 uint baseIndex = 0; 3647 uint onIndex = onIndex; 3648 uint onOutIndex = onOutIndex; 3649 if(prim < primCount) 3650 { 3651 switch(fixIndexBufferMode) 3652 { 3653 case 0x00U: 3654 onIndex = prim; 3655 onOutIndex = prim; 3656 break; 3657 case 0x01U: 3658 onIndex = prim * 2; 3659 onOutIndex = prim * 2; 3660 break; 3661 case 0x03U: 3662 onIndex = prim; 3663 onOutIndex = prim * 2; 3664 break; 3665 case 0x02U: 3666 onIndex = prim; 3667 onOutIndex = prim * 2; 3668 break; 3669 case 0x04U: 3670 onIndex = prim * 3; 3671 onOutIndex = prim * 3; 3672 break; 3673 case 0x05U: 3674 onIndex = prim; 3675 onOutIndex = prim * 3; 3676 break; 3677 case 0x06U: 3678 onIndex = prim; 3679 onOutIndex = prim * 3; 3680 break; 3681 } 3682 generatePrimitive(outIndexBufferUint16, outIndexBufferUint32, firstVertex, indexCount, baseIndex, onIndex, primCount, onOutIndex); 3683 } 3684 } 3685 # 8 "temp_master_source.metal" 2 3686 3687 3688 )"; 3689