1// GENERATED FILE - DO NOT EDIT. 2// Generated by gen_mtl_internal_shaders.py 3// 4// Copyright 2020 The ANGLE Project Authors. All rights reserved. 5// Use of this source code is governed by a BSD-style license that can be 6// found in the LICENSE file. 7// 8 9// Combined Metal default shaders. 10 11# 1 "temp_master_source.metal" 12# 1 "<built-in>" 1 13# 1 "<built-in>" 3 14# 435 "<built-in>" 3 15# 1 "<command line>" 1 16# 1 "<built-in>" 2 17# 1 "temp_master_source.metal" 2 18# 1 ".\\blit.metal" 1 19 20 21 22 23 24 25 26# 1 ".\\common.h" 1 27# 13 ".\\common.h" 28# include <simd/simd.h> 29# include <metal_stdlib> 30 31 32# 1 ".\\constants.h" 1 33# 11 ".\\constants.h" 34namespace rx 35{ 36namespace mtl_shader 37{ 38 39enum 40{ 41 kTextureType2D = 0, 42 kTextureType2DMultisample = 1, 43 kTextureType2DArray = 2, 44 kTextureTypeCube = 3, 45 kTextureType3D = 4, 46 kTextureTypeCount = 5, 47}; 48 49 50 51 52} 53} 54# 18 ".\\common.h" 2 55 56 57 58 59 60 61 62using namespace metal; 63 64 65 66constant uint32_t kNumColorOutputs [[function_constant(0)]]; 67constant bool kColorOutputAvailable0 = kNumColorOutputs > 0; 68constant bool kColorOutputAvailable1 = kNumColorOutputs > 1; 69constant bool kColorOutputAvailable2 = kNumColorOutputs > 2; 70constant bool kColorOutputAvailable3 = kNumColorOutputs > 3; 71constant bool kColorOutputAvailable4 = kNumColorOutputs > 4; 72constant bool kColorOutputAvailable5 = kNumColorOutputs > 5; 73constant bool kColorOutputAvailable6 = kNumColorOutputs > 6; 74constant bool kColorOutputAvailable7 = kNumColorOutputs > 7; 75 76namespace rx 77{ 78namespace mtl_shader 79{ 80 81 82constant float2 gCorners[3] = {float2(-1.0f, -1.0f), float2(3.0f, -1.0f), float2(-1.0f, 3.0f)}; 83 84template <typename T> 85struct MultipleColorOutputs 86{ 87 vec<T, 4> color0 [[color(0), function_constant(kColorOutputAvailable0)]]; 88 vec<T, 4> color1 [[color(1), function_constant(kColorOutputAvailable1)]]; 89 vec<T, 4> color2 [[color(2), function_constant(kColorOutputAvailable2)]]; 90 vec<T, 4> color3 [[color(3), function_constant(kColorOutputAvailable3)]]; 91 vec<T, 4> color4 [[color(4), function_constant(kColorOutputAvailable4)]]; 92 vec<T, 4> color5 [[color(5), function_constant(kColorOutputAvailable5)]]; 93 vec<T, 4> color6 [[color(6), function_constant(kColorOutputAvailable6)]]; 94 vec<T, 4> color7 [[color(7), function_constant(kColorOutputAvailable7)]]; 95}; 96# 69 ".\\common.h" 97template <typename T> 98static inline MultipleColorOutputs<T> toMultipleColorOutputs(vec<T, 4> color) 99{ 100 MultipleColorOutputs<T> re; 101 102 do { if (kColorOutputAvailable0) { re.color0 = color; } } while (0); 103 do { if (kColorOutputAvailable1) { re.color1 = color; } } while (0); 104 do { if (kColorOutputAvailable2) { re.color2 = color; } } while (0); 105 do { if (kColorOutputAvailable3) { re.color3 = color; } } while (0); 106 do { if (kColorOutputAvailable4) { re.color4 = color; } } while (0); 107 do { if (kColorOutputAvailable5) { re.color5 = color; } } while (0); 108 do { if (kColorOutputAvailable6) { re.color6 = color; } } while (0); 109 do { if (kColorOutputAvailable7) { re.color7 = color; } } while (0); 110 111 return re; 112} 113 114static inline float3 cubeTexcoords(float2 texcoords, int face) 115{ 116 texcoords = 2.0 * texcoords - 1.0; 117 switch (face) 118 { 119 case 0: 120 return float3(1.0, -texcoords.y, -texcoords.x); 121 case 1: 122 return float3(-1.0, -texcoords.y, texcoords.x); 123 case 2: 124 return float3(texcoords.x, 1.0, texcoords.y); 125 case 3: 126 return float3(texcoords.x, -1.0, -texcoords.y); 127 case 4: 128 return float3(texcoords.x, -texcoords.y, 1.0); 129 case 5: 130 return float3(-texcoords.x, -texcoords.y, -1.0); 131 } 132 return float3(texcoords, 0); 133} 134 135template <typename T> 136static inline vec<T, 4> resolveTextureMS(texture2d_ms<T> srcTexture, uint2 coords) 137{ 138 uint samples = srcTexture.get_num_samples(); 139 140 vec<T, 4> output(0); 141 142 for (uint sample = 0; sample < samples; ++sample) 143 { 144 output += srcTexture.read(coords, sample); 145 } 146 147 output = output / samples; 148 149 return output; 150} 151 152static inline float4 sRGBtoLinear(float4 color) 153{ 154 float3 linear1 = color.rgb / 12.92; 155 float3 linear2 = powr((color.rgb + float3(0.055)) / 1.055, 2.4); 156 float3 factor = float3(color.rgb <= float3(0.04045)); 157 float4 linear = float4(factor * linear1 + float3(1.0 - factor) * linear2, color.a); 158 159 return linear; 160} 161 162static inline float linearToSRGB(float color) 163{ 164 if (color <= 0.0f) 165 return 0.0f; 166 if (color < 0.0031308f) 167 return 12.92f * color; 168 if (color < 1.0f) 169 return 1.055f * powr(color, 0.41666f) - 0.055f; 170 return 1.0f; 171} 172 173static inline float4 linearToSRGB(float4 color) 174{ 175 return float4(linearToSRGB(color.r), linearToSRGB(color.g), linearToSRGB(color.b), color.a); 176} 177 178template <typename Short> 179static inline Short bytesToShort(constant uchar *input, uint offset) 180{ 181 Short inputLo = input[offset]; 182 Short inputHi = input[offset + 1]; 183 184 return inputLo | (inputHi << 8); 185} 186 187template <typename Int> 188static inline Int bytesToInt(constant uchar *input, uint offset) 189{ 190 Int input0 = input[offset]; 191 Int input1 = input[offset + 1]; 192 Int input2 = input[offset + 2]; 193 Int input3 = input[offset + 3]; 194 195 return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24); 196} 197 198template <typename Short> 199static inline void shortToBytes(Short val, uint offset, device uchar *output) 200{ 201 ushort valUnsigned = as_type<ushort>(val); 202 output[offset] = valUnsigned & 0xff; 203 output[offset + 1] = (valUnsigned >> 8) & 0xff; 204} 205 206template <typename Int> 207static inline void intToBytes(Int val, uint offset, device uchar *output) 208{ 209 uint valUnsigned = as_type<uint>(val); 210 output[offset] = valUnsigned & 0xff; 211 output[offset + 1] = (valUnsigned >> 8) & 0xff; 212 output[offset + 2] = (valUnsigned >> 16) & 0xff; 213 output[offset + 3] = (valUnsigned >> 24) & 0xff; 214} 215 216static inline void floatToBytes(float val, uint offset, device uchar *output) 217{ 218 intToBytes(as_type<uint>(val), offset, output); 219} 220 221static inline void int24bitToBytes(uint val, uint offset, device uchar *output) 222{ 223 output[offset] = val & 0xff; 224 output[offset + 1] = (val >> 8) & 0xff; 225 output[offset + 2] = (val >> 16) & 0xff; 226} 227 228template <unsigned int inputBitCount, unsigned int inputBitStart, typename T> 229static inline T getShiftedData(T input) 230{ 231 static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8), 232 "T must have at least as many bits as inputBitCount + inputBitStart."); 233 const T mask = (1 << inputBitCount) - 1; 234 return (input >> inputBitStart) & mask; 235} 236 237template <unsigned int inputBitCount, unsigned int inputBitStart, typename T> 238static inline T shiftData(T input) 239{ 240 static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8), 241 "T must have at least as many bits as inputBitCount + inputBitStart."); 242 const T mask = (1 << inputBitCount) - 1; 243 return (input & mask) << inputBitStart; 244} 245 246template <unsigned int inputBitCount, typename T> 247static inline float normalizedToFloat(T input) 248{ 249 static_assert(inputBitCount <= (sizeof(T) * 8), 250 "T must have more bits than or same bits as inputBitCount."); 251 static_assert(inputBitCount <= 23, "Only single precision is supported"); 252 253 constexpr float inverseMax = 1.0f / ((1 << inputBitCount) - 1); 254 return input * inverseMax; 255} 256 257template <typename T> 258static inline float normalizedToFloat(T input) 259{ 260 return normalizedToFloat<sizeof(T) * 8, T>(input); 261} 262 263template <> 264inline float normalizedToFloat(short input) 265{ 266 constexpr float inverseMax = 1.0f / 0x7fff; 267 return static_cast<float>(input) * inverseMax; 268} 269 270template <> 271inline float normalizedToFloat(int input) 272{ 273 constexpr float inverseMax = 1.0f / 0x7fffffff; 274 return static_cast<float>(input) * inverseMax; 275} 276 277template <> 278inline float normalizedToFloat(uint input) 279{ 280 constexpr float inverseMax = 1.0f / 0xffffffff; 281 return static_cast<float>(input) * inverseMax; 282} 283 284template <unsigned int outputBitCount, typename T> 285static inline T floatToNormalized(float input) 286{ 287 static_assert(outputBitCount <= (sizeof(T) * 8), 288 "T must have more bits than or same bits as inputBitCount."); 289 static_assert(outputBitCount > (metal::is_unsigned<T>::value ? 0 : 1), 290 "outputBitCount must be at least 1 not counting the sign bit."); 291 constexpr unsigned int bits = 292 metal::is_unsigned<T>::value ? outputBitCount : outputBitCount - 1; 293 static_assert(bits <= 23, "Only single precision is supported"); 294 295 return static_cast<T>(metal::round(((1 << bits) - 1) * input)); 296} 297 298template <typename T> 299static inline T floatToNormalized(float input) 300{ 301 return floatToNormalized<sizeof(T) * 8, T>(input); 302} 303 304} 305} 306# 9 ".\\blit.metal" 2 307 308using namespace rx::mtl_shader; 309 310 311constant bool kPremultiplyAlpha [[function_constant(1)]]; 312constant bool kUnmultiplyAlpha [[function_constant(2)]]; 313constant bool kTransformLinearToSrgb [[function_constant(3)]]; 314constant int kSourceTextureType [[function_constant(4)]]; 315constant int kSourceTexture2Type [[function_constant(5)]]; 316 317constant bool kSourceTextureType2D = kSourceTextureType == kTextureType2D; 318constant bool kSourceTextureType2DArray = kSourceTextureType == kTextureType2DArray; 319constant bool kSourceTextureType2DMS = kSourceTextureType == kTextureType2DMultisample; 320constant bool kSourceTextureTypeCube = kSourceTextureType == kTextureTypeCube; 321constant bool kSourceTextureType3D = kSourceTextureType == kTextureType3D; 322 323constant bool kSourceTexture2Type2D = kSourceTexture2Type == kTextureType2D; 324constant bool kSourceTexture2Type2DArray = kSourceTexture2Type == kTextureType2DArray; 325constant bool kSourceTexture2Type2DMS = kSourceTexture2Type == kTextureType2DMultisample; 326constant bool kSourceTexture2TypeCube = kSourceTexture2Type == kTextureTypeCube; 327 328struct BlitParams 329{ 330 331 float4 srcTexCoords; 332 int srcLevel; 333 int srcLayer; 334 bool dstLuminance; 335 uint8_t padding[7]; 336}; 337 338struct BlitVSOut 339{ 340 float4 position [[position]]; 341 float2 texCoords [[center_no_perspective, user(locn1)]]; 342}; 343 344vertex BlitVSOut blitVS(unsigned int vid [[vertex_id]], constant BlitParams &options [[buffer(0)]]) 345{ 346 BlitVSOut output; 347 output.position.xy = select(float2(-1.0f), float2(1.0f), bool2(vid & uint2(2, 1))); 348 output.position.zw = float2(0.0, 1.0); 349 output.texCoords = select(options.srcTexCoords.xy, options.srcTexCoords.zw, bool2(vid & uint2(2, 1))); 350 351 return output; 352} 353 354template <typename SrcTexture2d> 355static uint2 getImageCoords(SrcTexture2d srcTexture, float2 texCoords) 356{ 357 uint2 dimens(srcTexture.get_width(), srcTexture.get_height()); 358 uint2 coords = uint2(texCoords * float2(dimens)); 359 360 return coords; 361} 362 363template <typename T> 364static inline vec<T, 4> blitSampleTextureMS(texture2d_ms<T> srcTexture, float2 texCoords) 365{ 366 uint2 coords = getImageCoords(srcTexture, texCoords); 367 return resolveTextureMS(srcTexture, coords); 368} 369 370template <typename T> 371static inline vec<T, 4> blitSampleTexture3D(texture3d<T> srcTexture, 372 sampler textureSampler, 373 float2 texCoords, 374 constant BlitParams &options) 375{ 376 uint depth = srcTexture.get_depth(options.srcLevel); 377 float zCoord = (float(options.srcLayer) + 0.5) / float(depth); 378 379 return srcTexture.sample(textureSampler, float3(texCoords, zCoord), level(options.srcLevel)); 380} 381# 101 ".\\blit.metal" 382template <typename T> 383static inline vec<T, 4> blitReadTexture(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 384{ 385 vec<T, 4> output; 386 387 switch (kSourceTextureType) 388 { 389 case kTextureType2D: 390 output = srcTexture2d.sample(textureSampler, input.texCoords, level(options.srcLevel)); 391 break; 392 case kTextureType2DArray: 393 output = srcTexture2dArray.sample(textureSampler, input.texCoords, options.srcLayer, 394 level(options.srcLevel)); 395 break; 396 case kTextureType2DMultisample: 397 output = blitSampleTextureMS(srcTexture2dMS, input.texCoords); 398 break; 399 case kTextureTypeCube: 400 output = srcTextureCube.sample(textureSampler, 401 cubeTexcoords(input.texCoords, options.srcLayer), 402 level(options.srcLevel)); 403 break; 404 case kTextureType3D: 405 output = blitSampleTexture3D(srcTexture3d, textureSampler, input.texCoords, options); 406 break; 407 } 408 409 if (kTransformLinearToSrgb) { 410 output.x = linearToSRGB(output.x); 411 output.y = linearToSRGB(output.y); 412 output.z = linearToSRGB(output.z); 413 } 414 if (kUnmultiplyAlpha) 415 { 416 if (output.a != 0.0) 417 { 418 output.xyz /= output.a; 419 } 420 } 421 if (kPremultiplyAlpha) 422 { 423 output.xyz *= output.a; 424 } 425 426 if (options.dstLuminance) 427 { 428 output.g = output.b = output.r; 429 } 430 431 return output; 432} 433 434template <typename T> 435static inline MultipleColorOutputs<T> blitFS(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 436{ 437 vec<T, 4> output = blitReadTexture(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 438 439 return toMultipleColorOutputs(output); 440} 441 442fragment MultipleColorOutputs<float> blitFloatFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 443{ 444 return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 445} 446fragment MultipleColorOutputs<int> blitIntFS(BlitVSOut input [[stage_in]], texture2d<int> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<int> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<int> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<int> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<int> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 447{ 448 return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 449} 450fragment MultipleColorOutputs<uint> blitUIntFS(BlitVSOut input [[stage_in]], texture2d<uint> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<uint> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<uint> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<uint> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<uint> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 451{ 452 return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 453} 454 455fragment MultipleColorOutputs<uint> copyTextureFloatToUIntFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) 456{ 457 float4 inputColor = blitReadTexture<>(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); 458 uint4 output = uint4(inputColor * float4(255.0)); 459 460 return toMultipleColorOutputs(output); 461} 462 463 464struct FragmentDepthOut 465{ 466 float depth [[depth(any)]]; 467}; 468 469static inline float sampleDepth( 470 texture2d<float> srcTexture2d [[function_constant(kSourceTextureType2D)]], 471 texture2d_array<float> srcTexture2dArray [[function_constant(kSourceTextureType2DArray)]], 472 texture2d_ms<float> srcTexture2dMS [[function_constant(kSourceTextureType2DMS)]], 473 texturecube<float> srcTextureCube [[function_constant(kSourceTextureTypeCube)]], 474 float2 texCoords, 475 constant BlitParams &options) 476{ 477 float4 output; 478 479 constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest); 480 481 switch (kSourceTextureType) 482 { 483 case kTextureType2D: 484 output = srcTexture2d.sample(textureSampler, texCoords, level(options.srcLevel)); 485 break; 486 case kTextureType2DArray: 487 output = srcTexture2dArray.sample(textureSampler, texCoords, options.srcLayer, 488 level(options.srcLevel)); 489 break; 490 case kTextureType2DMultisample: 491 492 output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0); 493 break; 494 case kTextureTypeCube: 495 output = 496 srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, options.srcLayer), 497 level(options.srcLevel)); 498 break; 499 } 500 501 return output.r; 502} 503 504fragment FragmentDepthOut blitDepthFS(BlitVSOut input [[stage_in]], 505 texture2d<float> srcTexture2d 506 [[texture(0), function_constant(kSourceTextureType2D)]], 507 texture2d_array<float> srcTexture2dArray 508 [[texture(0), function_constant(kSourceTextureType2DArray)]], 509 texture2d_ms<float> srcTexture2dMS 510 [[texture(0), function_constant(kSourceTextureType2DMS)]], 511 texturecube<float> srcTextureCube 512 [[texture(0), function_constant(kSourceTextureTypeCube)]], 513 constant BlitParams &options [[buffer(0)]]) 514{ 515 FragmentDepthOut re; 516 517 re.depth = sampleDepth(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, 518 input.texCoords, options); 519 520 return re; 521} 522 523static inline uint32_t sampleStencil( 524 texture2d<uint32_t> srcTexture2d [[function_constant(kSourceTexture2Type2D)]], 525 texture2d_array<uint32_t> srcTexture2dArray [[function_constant(kSourceTexture2Type2DArray)]], 526 texture2d_ms<uint32_t> srcTexture2dMS [[function_constant(kSourceTexture2Type2DMS)]], 527 texturecube<uint32_t> srcTextureCube [[function_constant(kSourceTexture2TypeCube)]], 528 float2 texCoords, 529 int srcLevel, 530 int srcLayer) 531{ 532 uint4 output; 533 constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest); 534 535 switch (kSourceTexture2Type) 536 { 537 case kTextureType2D: 538 output = srcTexture2d.sample(textureSampler, texCoords, level(srcLevel)); 539 break; 540 case kTextureType2DArray: 541 output = srcTexture2dArray.sample(textureSampler, texCoords, srcLayer, level(srcLevel)); 542 break; 543 case kTextureType2DMultisample: 544 545 output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0); 546 break; 547 case kTextureTypeCube: 548 output = srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, srcLayer), 549 level(srcLevel)); 550 break; 551 } 552 553 return output.r; 554} 555 556 557struct BlitStencilToBufferParams 558{ 559 float2 srcStartTexCoords; 560 float2 srcTexCoordSteps; 561 int srcLevel; 562 int srcLayer; 563 564 uint2 dstSize; 565 uint dstBufferRowPitch; 566 567 bool resolveMS; 568}; 569 570kernel void blitStencilToBufferCS(ushort2 gIndices [[thread_position_in_grid]], 571 texture2d<uint32_t> srcTexture2d 572 [[texture(1), function_constant(kSourceTexture2Type2D)]], 573 texture2d_array<uint32_t> srcTexture2dArray 574 [[texture(1), function_constant(kSourceTexture2Type2DArray)]], 575 texture2d_ms<uint32_t> srcTexture2dMS 576 [[texture(1), function_constant(kSourceTexture2Type2DMS)]], 577 texturecube<uint32_t> srcTextureCube 578 [[texture(1), function_constant(kSourceTexture2TypeCube)]], 579 constant BlitStencilToBufferParams &options [[buffer(0)]], 580 device uchar *buffer [[buffer(1)]]) 581{ 582 if (gIndices.x >= options.dstSize.x || gIndices.y >= options.dstSize.y) 583 { 584 return; 585 } 586 587 float2 srcTexCoords = options.srcStartTexCoords + float2(gIndices) * options.srcTexCoordSteps; 588 589 if (kSourceTexture2Type == kTextureType2DMultisample && !options.resolveMS) 590 { 591 uint samples = srcTexture2dMS.get_num_samples(); 592 uint2 imageCoords = getImageCoords(srcTexture2dMS, srcTexCoords); 593 uint bufferOffset = options.dstBufferRowPitch * gIndices.y + samples * gIndices.x; 594 595 for (uint sample = 0; sample < samples; ++sample) 596 { 597 uint stencilPerSample = srcTexture2dMS.read(imageCoords, sample).r; 598 buffer[bufferOffset + sample] = static_cast<uchar>(stencilPerSample); 599 } 600 } 601 else 602 { 603 uint32_t stencil = 604 sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, 605 srcTexCoords, options.srcLevel, options.srcLayer); 606 607 buffer[options.dstBufferRowPitch * gIndices.y + gIndices.x] = static_cast<uchar>(stencil); 608 } 609} 610 611 612#if __METAL_VERSION__ >= 210 613 614struct FragmentStencilOut 615{ 616 uint32_t stencil [[stencil]]; 617}; 618 619struct FragmentDepthStencilOut 620{ 621 float depth [[depth(any)]]; 622 uint32_t stencil [[stencil]]; 623}; 624 625fragment FragmentStencilOut blitStencilFS( 626 BlitVSOut input [[stage_in]], 627 texture2d<uint32_t> srcTexture2d [[texture(1), function_constant(kSourceTexture2Type2D)]], 628 texture2d_array<uint32_t> srcTexture2dArray 629 [[texture(1), function_constant(kSourceTexture2Type2DArray)]], 630 texture2d_ms<uint32_t> srcTexture2dMS 631 [[texture(1), function_constant(kSourceTexture2Type2DMS)]], 632 texturecube<uint32_t> srcTextureCube [[texture(1), function_constant(kSourceTexture2TypeCube)]], 633 constant BlitParams &options [[buffer(0)]]) 634{ 635 FragmentStencilOut re; 636 637 re.stencil = sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, 638 input.texCoords, options.srcLevel, options.srcLayer); 639 640 return re; 641} 642 643fragment FragmentDepthStencilOut blitDepthStencilFS( 644 BlitVSOut input [[stage_in]], 645 646 texture2d<float> srcDepthTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], 647 texture2d_array<float> srcDepthTexture2dArray 648 [[texture(0), function_constant(kSourceTextureType2DArray)]], 649 texture2d_ms<float> srcDepthTexture2dMS 650 [[texture(0), function_constant(kSourceTextureType2DMS)]], 651 texturecube<float> srcDepthTextureCube 652 [[texture(0), function_constant(kSourceTextureTypeCube)]], 653 654 655 texture2d<uint32_t> srcStencilTexture2d 656 [[texture(1), function_constant(kSourceTexture2Type2D)]], 657 texture2d_array<uint32_t> srcStencilTexture2dArray 658 [[texture(1), function_constant(kSourceTexture2Type2DArray)]], 659 texture2d_ms<uint32_t> srcStencilTexture2dMS 660 [[texture(1), function_constant(kSourceTexture2Type2DMS)]], 661 texturecube<uint32_t> srcStencilTextureCube 662 [[texture(1), function_constant(kSourceTexture2TypeCube)]], 663 664 constant BlitParams &options [[buffer(0)]]) 665{ 666 FragmentDepthStencilOut re; 667 668 re.depth = sampleDepth(srcDepthTexture2d, srcDepthTexture2dArray, srcDepthTexture2dMS, 669 srcDepthTextureCube, input.texCoords, options); 670 re.stencil = 671 sampleStencil(srcStencilTexture2d, srcStencilTexture2dArray, srcStencilTexture2dMS, 672 srcStencilTextureCube, input.texCoords, options.srcLevel, options.srcLayer); 673 return re; 674} 675#endif 676# 2 "temp_master_source.metal" 2 677# 1 ".\\clear.metal" 1 678# 10 ".\\clear.metal" 679using namespace rx::mtl_shader; 680 681struct ClearParams 682{ 683 float4 clearColor; 684 float clearDepth; 685}; 686 687vertex float4 clearVS(unsigned int vid [[ vertex_id ]], 688 constant ClearParams &clearParams [[buffer(0)]]) 689{ 690 return float4(gCorners[vid], clearParams.clearDepth, 1.0); 691} 692 693fragment MultipleColorOutputs<float> clearFloatFS(constant ClearParams &clearParams [[buffer(0)]]) 694{ 695 return toMultipleColorOutputs(clearParams.clearColor); 696} 697 698fragment MultipleColorOutputs<int> clearIntFS(constant ClearParams &clearParams [[buffer(0)]]) 699{ 700 return toMultipleColorOutputs(as_type<int4>(clearParams.clearColor)); 701} 702 703fragment MultipleColorOutputs<uint> clearUIntFS(constant ClearParams &clearParams [[buffer(0)]]) 704{ 705 return toMultipleColorOutputs(as_type<uint4>(clearParams.clearColor)); 706} 707# 3 "temp_master_source.metal" 2 708# 1 ".\\gen_indices.metal" 1 709 710 711 712 713 714 715 716 717using namespace rx::mtl_shader; 718 719 720constant bool kSourceBufferAligned[[function_constant(100)]]; 721constant bool kSourceIndexIsU8[[function_constant(200)]]; 722constant bool kSourceIndexIsU16[[function_constant(300)]]; 723constant bool kSourceIndexIsU32[[function_constant(400)]]; 724constant bool kSourceBufferUnaligned = !kSourceBufferAligned; 725constant bool kUseSourceBufferU8 = kSourceIndexIsU8 || kSourceBufferUnaligned; 726constant bool kUseSourceBufferU16 = kSourceIndexIsU16 && kSourceBufferAligned; 727constant bool kUseSourceBufferU32 = kSourceIndexIsU32 && kSourceBufferAligned; 728 729struct IndexConversionParams 730{ 731 uint32_t srcOffset; 732 uint32_t indexCount; 733 bool primitiveRestartEnabled; 734}; 735 736 737 738inline ushort getIndexAligned(constant ushort *inputAligned, uint offset, uint idx) 739{ 740 return inputAligned[offset / 2 + idx]; 741} 742inline uint getIndexAligned(constant uint *inputAligned, uint offset, uint idx) 743{ 744 return inputAligned[offset / 4 + idx]; 745} 746inline uchar getIndexAligned(constant uchar *input, uint offset, uint idx) 747{ 748 return input[offset + idx]; 749} 750inline ushort getIndexUnalignedU16(constant uchar *input, uint offset, uint idx) 751{ 752 ushort inputLo = input[offset + 2 * idx]; 753 ushort inputHi = input[offset + 2 * idx + 1]; 754 755 return inputLo | (inputHi << 8); 756} 757inline uint getIndexUnalignedU32(constant uchar *input, uint offset, uint idx) 758{ 759 uint input0 = input[offset + 4 * idx]; 760 uint input1 = input[offset + 4 * idx + 1]; 761 uint input2 = input[offset + 4 * idx + 2]; 762 uint input3 = input[offset + 4 * idx + 3]; 763 764 return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24); 765} 766 767kernel void convertIndexU8ToU16(uint idx [[thread_position_in_grid]], 768 constant IndexConversionParams &options [[buffer(0)]], 769 constant uchar *input [[buffer(1)]], 770 device ushort *output [[buffer(2)]]) 771{ 772 if (idx >= options.indexCount) { return; }; 773 774 uchar value = getIndexAligned(input, options.srcOffset, idx); 775 776 if (options.primitiveRestartEnabled && value == 0xff) 777 { 778 output[idx] = 0xffff; 779 } 780 else 781 { 782 output[idx] = value; 783 } 784} 785 786kernel void convertIndexU16(uint idx [[thread_position_in_grid]], 787 constant IndexConversionParams &options [[buffer(0)]], 788 constant uchar *input 789 [[buffer(1), function_constant(kSourceBufferUnaligned)]], 790 constant ushort *inputAligned 791 [[buffer(1), function_constant(kSourceBufferAligned)]], 792 device ushort *output [[buffer(2)]]) 793{ 794 if (idx >= options.indexCount) { return; }; 795 796 ushort value; 797 if (kSourceBufferAligned) 798 { 799 value = getIndexAligned(inputAligned, options.srcOffset, idx); 800 } 801 else 802 { 803 value = getIndexUnalignedU16(input, options.srcOffset, idx); 804 } 805 output[idx] = value; 806} 807 808kernel void convertIndexU32(uint idx [[thread_position_in_grid]], 809 constant IndexConversionParams &options [[buffer(0)]], 810 constant uchar *input 811 [[buffer(1), function_constant(kSourceBufferUnaligned)]], 812 constant uint *inputAligned 813 [[buffer(1), function_constant(kSourceBufferAligned)]], 814 device uint *output [[buffer(2)]]) 815{ 816 if (idx >= options.indexCount) { return; }; 817 818 uint value; 819 if (kSourceBufferAligned) 820 { 821 value = getIndexAligned(inputAligned, options.srcOffset, idx); 822 } 823 else 824 { 825 value = getIndexUnalignedU32(input, options.srcOffset, idx); 826 } 827 output[idx] = value; 828} 829 830struct IndexFromArrayParams 831{ 832 uint firstVertex; 833 834 uint vertexCount; 835}; 836 837 838kernel void genTriFanIndicesFromArray(uint idx [[thread_position_in_grid]], 839 constant IndexFromArrayParams &options [[buffer(0)]], 840 device uint *output [[buffer(2)]]) 841{ 842 if (idx >= options.vertexCount) { return; }; 843 844 uint vertexIdx = options.firstVertex + 2 + idx; 845 846 847 848 output[3 * idx ] = vertexIdx - 1; 849 output[3 * idx + 1] = vertexIdx; 850 output[3 * idx + 2] = options.firstVertex; 851} 852 853inline uint getIndexU32(uint offset, 854 uint idx, 855 constant uchar *inputU8 [[function_constant(kUseSourceBufferU8)]], 856 constant ushort *inputU16 [[function_constant(kUseSourceBufferU16)]], 857 constant uint *inputU32 [[function_constant(kUseSourceBufferU32)]]) 858{ 859 if (kUseSourceBufferU8) 860 { 861 if (kSourceIndexIsU16) 862 { 863 return getIndexUnalignedU16(inputU8, offset, idx); 864 } 865 else if (kSourceIndexIsU32) 866 { 867 return getIndexUnalignedU32(inputU8, offset, idx); 868 } 869 return getIndexAligned(inputU8, offset, idx); 870 } 871 else if (kUseSourceBufferU16) 872 { 873 return getIndexAligned(inputU16, offset, idx); 874 } 875 else if (kUseSourceBufferU32) 876 { 877 return getIndexAligned(inputU32, offset, idx); 878 } 879 return 0; 880} 881 882 883 884 885kernel void genTriFanIndicesFromElements(uint idx [[thread_position_in_grid]], 886 constant IndexConversionParams &options [[buffer(0)]], 887 constant uchar *inputU8 888 [[buffer(1), function_constant(kUseSourceBufferU8)]], 889 constant ushort *inputU16 890 [[buffer(1), function_constant(kUseSourceBufferU16)]], 891 constant uint *inputU32 892 [[buffer(1), function_constant(kUseSourceBufferU32)]], 893 device uint *output [[buffer(2)]]) 894{ 895 if (idx >= options.indexCount) { return; }; 896 897 uint elemIdx = 2 + idx; 898 899 output[3 * idx] = getIndexU32(options.srcOffset, 0, inputU8, inputU16, inputU32); 900 output[3 * idx + 1] = getIndexU32(options.srcOffset, elemIdx - 1, inputU8, inputU16, inputU32); 901 output[3 * idx + 2] = getIndexU32(options.srcOffset, elemIdx, inputU8, inputU16, inputU32); 902} 903 904 905kernel void genLineLoopIndicesFromArray(uint idx [[thread_position_in_grid]], 906 constant IndexFromArrayParams &options [[buffer(0)]], 907 device uint *output [[buffer(2)]]) 908{ 909 uint totalIndices = options.vertexCount + 1; 910 if (idx >= totalIndices) { return; }; 911 912 output[idx] = options.firstVertex + idx % options.vertexCount; 913} 914 915 916 917kernel void genLineLoopIndicesFromElements(uint idx [[thread_position_in_grid]], 918 constant IndexConversionParams &options [[buffer(0)]], 919 constant uchar *inputU8 920 [[buffer(1), function_constant(kUseSourceBufferU8)]], 921 constant ushort *inputU16 922 [[buffer(1), function_constant(kUseSourceBufferU16)]], 923 constant uint *inputU32 924 [[buffer(1), function_constant(kUseSourceBufferU32)]], 925 device uint *output [[buffer(2)]]) 926{ 927 uint totalTargetIndices = options.indexCount + 1; 928 if (idx >= totalTargetIndices) { return; }; 929 930 output[idx] = 931 getIndexU32(options.srcOffset, idx % options.indexCount, inputU8, inputU16, inputU32); 932} 933# 4 "temp_master_source.metal" 2 934# 1 ".\\gen_mipmap.metal" 1 935 936 937 938 939 940 941 942 943using namespace rx::mtl_shader; 944# 31 ".\\gen_mipmap.metal" 945struct GenMipParams 946{ 947 uint srcLevel; 948 uint numMipLevelsToGen; 949 bool sRGB; 950}; 951 952 953 954kernel void generate3DMipmaps(uint lIndex [[thread_index_in_threadgroup]], 955 ushort3 gIndices [[thread_position_in_grid]], 956 texture3d<float> srcTexture [[texture(0)]], 957 texture3d<float, access::write> dstMip1 [[texture(1)]], 958 texture3d<float, access::write> dstMip2 [[texture(2)]], 959 texture3d<float, access::write> dstMip3 [[texture(3)]], 960 texture3d<float, access::write> dstMip4 [[texture(4)]], 961 constant GenMipParams &options [[buffer(0)]]) 962{ 963 ushort3 mipSize = ushort3(dstMip1.get_width(), dstMip1.get_height(), dstMip1.get_depth()); 964 bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y && gIndices.z < mipSize.z; 965 966 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 967 968 969 970 971 threadgroup float sR[(8 * 8 * 8)]; 972 threadgroup float sG[(8 * 8 * 8)]; 973 threadgroup float sB[(8 * 8 * 8)]; 974 threadgroup float sA[(8 * 8 * 8)]; 975 976 977 float4 texel1; 978 if (validThread) 979 { 980 float3 texCoords = (float3(gIndices) + float3(0.5, 0.5, 0.5)) / float3(mipSize); 981 texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel)); 982 983 984 dstMip1.write(texel1, gIndices); 985 } 986 else 987 { 988 989 lIndex = 0xffffffff; 990 } 991 992 if (options.numMipLevelsToGen == 1) 993 { 994 return; 995 } 996 997 998 999 1000 if (options.sRGB) 1001 { 1002 texel1 = linearToSRGB(texel1); 1003 } 1004 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1005 1006 threadgroup_barrier(mem_flags::mem_threadgroup); 1007 1008 1009 if ((lIndex & 0x49) == 0) 1010 { 1011 bool3 atEdge = gIndices == (mipSize - ushort3(1)); 1012 1013 1014 1015 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1])); 1016 1017 float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8])); 1018 1019 float4 texel4 = (atEdge.z) ? (texel1) : (float4(sR[lIndex + (8 * 8)], sG[lIndex + (8 * 8)], sB[lIndex + (8 * 8)], sA[lIndex + (8 * 8)])); 1020 1021 float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)])); 1022 1023 1024 float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + ((8 * 8) + 1)], sG[lIndex + ((8 * 8) + 1)], sB[lIndex + ((8 * 8) + 1)], sA[lIndex + ((8 * 8) + 1)])); 1025 1026 1027 float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + ((8 * 8) + 8)], sG[lIndex + ((8 * 8) + 8)], sB[lIndex + ((8 * 8) + 8)], sA[lIndex + ((8 * 8) + 8)])); 1028 1029 1030 float4 texel8 = 1031 (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + ((8 * 8) + 8 + 1)], sG[lIndex + ((8 * 8) + 8 + 1)], sB[lIndex + ((8 * 8) + 8 + 1)], sA[lIndex + ((8 * 8) + 8 + 1)])); 1032 1033 1034 texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; 1035 1036 dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1); 1037 1038 1039 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1040 } 1041 1042 if (options.numMipLevelsToGen == 2) 1043 { 1044 return; 1045 } 1046 1047 1048 threadgroup_barrier(mem_flags::mem_threadgroup); 1049 1050 1051 if ((lIndex & 0xdb) == 0) 1052 { 1053 mipSize = max(mipSize >> 1, ushort3(1)); 1054 bool3 atEdge = (gIndices >> 1) == (mipSize - ushort3(1)); 1055 1056 1057 1058 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2])); 1059 1060 float4 texel3 = 1061 (atEdge.y) ? (texel1) : (float4(sR[lIndex + (2 * 8)], sG[lIndex + (2 * 8)], sB[lIndex + (2 * 8)], sA[lIndex + (2 * 8)])); 1062 1063 float4 texel4 = 1064 (atEdge.z) ? (texel1) : (float4(sR[lIndex + (2 * (8 * 8))], sG[lIndex + (2 * (8 * 8))], sB[lIndex + (2 * (8 * 8))], sA[lIndex + (2 * (8 * 8))])); 1065 1066 float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)])); 1067 1068 1069 float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (2 * (8 * 8) + 2)], sG[lIndex + (2 * (8 * 8) + 2)], sB[lIndex + (2 * (8 * 8) + 2)], sA[lIndex + (2 * (8 * 8) + 2)])); 1070 1071 1072 float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8)], sG[lIndex + (2 * (8 * 8) + 2 * 8)], sB[lIndex + (2 * (8 * 8) + 2 * 8)], sA[lIndex + (2 * (8 * 8) + 2 * 8)])); 1073 1074 1075 1076 float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sG[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sB[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sA[lIndex + (2 * (8 * 8) + 2 * 8 + 2)])); 1077 1078 1079 1080 texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; 1081 1082 dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2); 1083 1084 1085 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1086 } 1087 1088 if (options.numMipLevelsToGen == 3) 1089 { 1090 return; 1091 } 1092 1093 1094 threadgroup_barrier(mem_flags::mem_threadgroup); 1095 1096 1097 if ((lIndex & 0x1ff) == 0) 1098 { 1099 mipSize = max(mipSize >> 1, ushort3(1)); 1100 bool3 atEdge = (gIndices >> 2) == (mipSize - ushort3(1)); 1101 1102 1103 1104 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4])); 1105 1106 float4 texel3 = 1107 (atEdge.y) ? (texel1) : (float4(sR[lIndex + (4 * 8)], sG[lIndex + (4 * 8)], sB[lIndex + (4 * 8)], sA[lIndex + (4 * 8)])); 1108 1109 float4 texel4 = 1110 (atEdge.z) ? (texel1) : (float4(sR[lIndex + (4 * (8 * 8))], sG[lIndex + (4 * (8 * 8))], sB[lIndex + (4 * (8 * 8))], sA[lIndex + (4 * (8 * 8))])); 1111 1112 float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)])); 1113 1114 1115 float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (4 * (8 * 8) + 4)], sG[lIndex + (4 * (8 * 8) + 4)], sB[lIndex + (4 * (8 * 8) + 4)], sA[lIndex + (4 * (8 * 8) + 4)])); 1116 1117 1118 float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8)], sG[lIndex + (4 * (8 * 8) + 4 * 8)], sB[lIndex + (4 * (8 * 8) + 4 * 8)], sA[lIndex + (4 * (8 * 8) + 4 * 8)])); 1119 1120 1121 1122 float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sG[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sB[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sA[lIndex + (4 * (8 * 8) + 4 * 8 + 4)])); 1123 1124 1125 1126 texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; 1127 1128 dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3); 1129 } 1130} 1131 1132kernel void generate2DMipmaps(uint lIndex [[thread_index_in_threadgroup]], 1133 ushort2 gIndices [[thread_position_in_grid]], 1134 texture2d<float> srcTexture [[texture(0)]], 1135 texture2d<float, access::write> dstMip1 [[texture(1)]], 1136 texture2d<float, access::write> dstMip2 [[texture(2)]], 1137 texture2d<float, access::write> dstMip3 [[texture(3)]], 1138 texture2d<float, access::write> dstMip4 [[texture(4)]], 1139 constant GenMipParams &options [[buffer(0)]]) 1140{ 1141 uint firstMipLevel = options.srcLevel + 1; 1142 ushort2 mipSize = 1143 ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); 1144 bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y; 1145 1146 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 1147 1148 1149 1150 1151 threadgroup float sR[(8 * 8)]; 1152 threadgroup float sG[(8 * 8)]; 1153 threadgroup float sB[(8 * 8)]; 1154 threadgroup float sA[(8 * 8)]; 1155 1156 1157 float4 texel1; 1158 if (validThread) 1159 { 1160 float2 texCoords = (float2(gIndices) + float2(0.5, 0.5)) / float2(mipSize); 1161 texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel)); 1162 1163 1164 dstMip1.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices); 1165 } 1166 else 1167 { 1168 1169 lIndex = 0xffffffff; 1170 } 1171 1172 if (options.numMipLevelsToGen == 1) 1173 { 1174 return; 1175 } 1176 1177 1178 1179 1180 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1181 1182 threadgroup_barrier(mem_flags::mem_threadgroup); 1183 1184 1185 if ((lIndex & 0x09) == 0) 1186 { 1187 bool2 atEdge = gIndices == (mipSize - ushort2(1)); 1188 1189 1190 1191 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1])); 1192 1193 float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8])); 1194 1195 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)])); 1196 1197 1198 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1199 1200 dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1); 1201 1202 1203 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1204 } 1205 1206 if (options.numMipLevelsToGen == 2) 1207 { 1208 return; 1209 } 1210 1211 1212 threadgroup_barrier(mem_flags::mem_threadgroup); 1213 1214 1215 if ((lIndex & 0x1b) == 0) 1216 { 1217 mipSize = max(mipSize >> 1, ushort2(1)); 1218 bool2 atEdge = (gIndices >> 1) == (mipSize - ushort2(1)); 1219 1220 1221 1222 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2])); 1223 1224 float4 texel3 = 1225 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8])); 1226 1227 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)])); 1228 1229 1230 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1231 1232 dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2); 1233 1234 1235 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1236 } 1237 1238 if (options.numMipLevelsToGen == 3) 1239 { 1240 return; 1241 } 1242 1243 1244 threadgroup_barrier(mem_flags::mem_threadgroup); 1245 1246 1247 if ((lIndex & 0x3f) == 0) 1248 { 1249 mipSize = max(mipSize >> 1, ushort2(1)); 1250 bool2 atEdge = (gIndices >> 2) == (mipSize - ushort2(1)); 1251 1252 1253 1254 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4])); 1255 1256 float4 texel3 = 1257 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8])); 1258 1259 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)])); 1260 1261 1262 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1263 1264 dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3); 1265 } 1266} 1267 1268template <typename TextureTypeR, typename TextureTypeW> 1269static __attribute__((always_inline)) void generateCubeOr2DArray2ndAndMoreMipmaps( 1270 uint lIndex, 1271 ushort3 gIndices, 1272 TextureTypeR srcTexture, 1273 TextureTypeW dstMip2, 1274 TextureTypeW dstMip3, 1275 TextureTypeW dstMip4, 1276 ushort2 mip1Size, 1277 float4 mip1Texel, 1278 threadgroup float *sR, 1279 threadgroup float *sG, 1280 threadgroup float *sB, 1281 threadgroup float *sA, 1282 constant GenMipParams &options) 1283{ 1284 ushort2 mipSize = mip1Size; 1285 float4 texel1 = mip1Texel; 1286 1287 1288 1289 1290 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1291 1292 threadgroup_barrier(mem_flags::mem_threadgroup); 1293 1294 1295 if ((lIndex & 0x09) == 0) 1296 { 1297 bool2 atEdge = gIndices.xy == (mipSize - ushort2(1)); 1298 1299 1300 1301 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1])); 1302 1303 float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8])); 1304 1305 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)])); 1306 1307 1308 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1309 1310 dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 1, gIndices.z); 1311 1312 1313 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1314 } 1315 1316 if (options.numMipLevelsToGen == 2) 1317 { 1318 return; 1319 } 1320 1321 1322 threadgroup_barrier(mem_flags::mem_threadgroup); 1323 1324 1325 if ((lIndex & 0x1b) == 0) 1326 { 1327 mipSize = max(mipSize >> 1, ushort2(1)); 1328 bool2 atEdge = (gIndices.xy >> 1) == (mipSize - ushort2(1)); 1329 1330 1331 1332 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2])); 1333 1334 float4 texel3 = 1335 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8])); 1336 1337 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)])); 1338 1339 1340 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1341 1342 dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 2, gIndices.z); 1343 1344 1345 sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; 1346 } 1347 1348 if (options.numMipLevelsToGen == 3) 1349 { 1350 return; 1351 } 1352 1353 1354 threadgroup_barrier(mem_flags::mem_threadgroup); 1355 1356 1357 if ((lIndex & 0x3f) == 0) 1358 { 1359 mipSize = max(mipSize >> 1, ushort2(1)); 1360 bool2 atEdge = (gIndices.xy >> 2) == (mipSize - ushort2(1)); 1361 1362 1363 1364 float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4])); 1365 1366 float4 texel3 = 1367 (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8])); 1368 1369 float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)])); 1370 1371 1372 texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; 1373 1374 dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 3, gIndices.z); 1375 } 1376} 1377 1378kernel void generateCubeMipmaps(uint lIndex [[thread_index_in_threadgroup]], 1379 ushort3 gIndices [[thread_position_in_grid]], 1380 texturecube<float> srcTexture [[texture(0)]], 1381 texturecube<float, access::write> dstMip1 [[texture(1)]], 1382 texturecube<float, access::write> dstMip2 [[texture(2)]], 1383 texturecube<float, access::write> dstMip3 [[texture(3)]], 1384 texturecube<float, access::write> dstMip4 [[texture(4)]], 1385 constant GenMipParams &options [[buffer(0)]]) 1386{ 1387 uint firstMipLevel = options.srcLevel + 1; 1388 ushort2 mip1Size = 1389 ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); 1390 bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y; 1391 1392 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 1393 1394 1395 float4 mip1Texel; 1396 if (validThread) 1397 { 1398 float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size); 1399 mip1Texel = srcTexture.sample(textureSampler, cubeTexcoords(texCoords, int(gIndices.z)), 1400 level(options.srcLevel)); 1401 1402 1403 dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z); 1404 } 1405 else 1406 { 1407 1408 lIndex = 0xffffffff; 1409 } 1410 1411 if (options.numMipLevelsToGen == 1) 1412 { 1413 return; 1414 } 1415 1416 1417 threadgroup float sR[(8 * 8)]; 1418 threadgroup float sG[(8 * 8)]; 1419 threadgroup float sB[(8 * 8)]; 1420 threadgroup float sA[(8 * 8)]; 1421 1422 generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4, 1423 mip1Size, mip1Texel, sR, sG, sB, sA, options); 1424} 1425 1426kernel void generate2DArrayMipmaps(uint lIndex [[thread_index_in_threadgroup]], 1427 ushort3 gIndices [[thread_position_in_grid]], 1428 texture2d_array<float> srcTexture [[texture(0)]], 1429 texture2d_array<float, access::write> dstMip1 [[texture(1)]], 1430 texture2d_array<float, access::write> dstMip2 [[texture(2)]], 1431 texture2d_array<float, access::write> dstMip3 [[texture(3)]], 1432 texture2d_array<float, access::write> dstMip4 [[texture(4)]], 1433 constant GenMipParams &options [[buffer(0)]]) 1434{ 1435 uint firstMipLevel = options.srcLevel + 1; 1436 ushort2 mip1Size = 1437 ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); 1438 bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y; 1439 1440 constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); 1441 1442 1443 float4 mip1Texel; 1444 if (validThread) 1445 { 1446 float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size); 1447 mip1Texel = 1448 srcTexture.sample(textureSampler, texCoords, gIndices.z, level(options.srcLevel)); 1449 1450 1451 dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z); 1452 } 1453 else 1454 { 1455 1456 lIndex = 0xffffffff; 1457 } 1458 1459 if (options.numMipLevelsToGen == 1) 1460 { 1461 return; 1462 } 1463 1464 1465 threadgroup float sR[(8 * 8)]; 1466 threadgroup float sG[(8 * 8)]; 1467 threadgroup float sB[(8 * 8)]; 1468 threadgroup float sA[(8 * 8)]; 1469 1470 generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4, 1471 mip1Size, mip1Texel, sR, sG, sB, sA, options); 1472} 1473# 5 "temp_master_source.metal" 2 1474# 1 ".\\copy_buffer.metal" 1 1475# 12 ".\\copy_buffer.metal" 1476#include <metal_pack> 1477 1478 1479# 1 ".\\format_autogen.h" 1 1480 1481 1482 1483namespace rx 1484{ 1485namespace mtl_shader 1486{ 1487 1488namespace FormatID 1489{ 1490enum 1491{ 1492 NONE, 1493 D16_UNORM, 1494 D24_UNORM_S8_UINT, 1495 D24_UNORM_X8_UINT, 1496 D32_FLOAT, 1497 D32_FLOAT_S8X24_UINT, 1498 D32_UNORM, 1499 S8_UINT, 1500 A16_FLOAT, 1501 A1R5G5B5_UNORM, 1502 A2R10G10B10_SINT_VERTEX, 1503 A2R10G10B10_SNORM_VERTEX, 1504 A2R10G10B10_SSCALED_VERTEX, 1505 A2R10G10B10_UINT_VERTEX, 1506 A2R10G10B10_UNORM_VERTEX, 1507 A2R10G10B10_USCALED_VERTEX, 1508 A32_FLOAT, 1509 A8_UNORM, 1510 ASTC_10x10_SRGB_BLOCK, 1511 ASTC_10x10_UNORM_BLOCK, 1512 ASTC_10x5_SRGB_BLOCK, 1513 ASTC_10x5_UNORM_BLOCK, 1514 ASTC_10x6_SRGB_BLOCK, 1515 ASTC_10x6_UNORM_BLOCK, 1516 ASTC_10x8_SRGB_BLOCK, 1517 ASTC_10x8_UNORM_BLOCK, 1518 ASTC_12x10_SRGB_BLOCK, 1519 ASTC_12x10_UNORM_BLOCK, 1520 ASTC_12x12_SRGB_BLOCK, 1521 ASTC_12x12_UNORM_BLOCK, 1522 ASTC_3x3x3_UNORM_BLOCK, 1523 ASTC_3x3x3_UNORM_SRGB_BLOCK, 1524 ASTC_4x3x3_UNORM_BLOCK, 1525 ASTC_4x3x3_UNORM_SRGB_BLOCK, 1526 ASTC_4x4_SRGB_BLOCK, 1527 ASTC_4x4_UNORM_BLOCK, 1528 ASTC_4x4x3_UNORM_BLOCK, 1529 ASTC_4x4x3_UNORM_SRGB_BLOCK, 1530 ASTC_4x4x4_UNORM_BLOCK, 1531 ASTC_4x4x4_UNORM_SRGB_BLOCK, 1532 ASTC_5x4_SRGB_BLOCK, 1533 ASTC_5x4_UNORM_BLOCK, 1534 ASTC_5x4x4_UNORM_BLOCK, 1535 ASTC_5x4x4_UNORM_SRGB_BLOCK, 1536 ASTC_5x5_SRGB_BLOCK, 1537 ASTC_5x5_UNORM_BLOCK, 1538 ASTC_5x5x4_UNORM_BLOCK, 1539 ASTC_5x5x4_UNORM_SRGB_BLOCK, 1540 ASTC_5x5x5_UNORM_BLOCK, 1541 ASTC_5x5x5_UNORM_SRGB_BLOCK, 1542 ASTC_6x5_SRGB_BLOCK, 1543 ASTC_6x5_UNORM_BLOCK, 1544 ASTC_6x5x5_UNORM_BLOCK, 1545 ASTC_6x5x5_UNORM_SRGB_BLOCK, 1546 ASTC_6x6_SRGB_BLOCK, 1547 ASTC_6x6_UNORM_BLOCK, 1548 ASTC_6x6x5_UNORM_BLOCK, 1549 ASTC_6x6x5_UNORM_SRGB_BLOCK, 1550 ASTC_6x6x6_UNORM_BLOCK, 1551 ASTC_6x6x6_UNORM_SRGB_BLOCK, 1552 ASTC_8x5_SRGB_BLOCK, 1553 ASTC_8x5_UNORM_BLOCK, 1554 ASTC_8x6_SRGB_BLOCK, 1555 ASTC_8x6_UNORM_BLOCK, 1556 ASTC_8x8_SRGB_BLOCK, 1557 ASTC_8x8_UNORM_BLOCK, 1558 B10G10R10A2_UNORM, 1559 B4G4R4A4_UNORM, 1560 B5G5R5A1_UNORM, 1561 B5G6R5_UNORM, 1562 B8G8R8A8_TYPELESS, 1563 B8G8R8A8_TYPELESS_SRGB, 1564 B8G8R8A8_UNORM, 1565 B8G8R8A8_UNORM_SRGB, 1566 B8G8R8X8_UNORM, 1567 B8G8R8X8_UNORM_SRGB, 1568 BC1_RGBA_UNORM_BLOCK, 1569 BC1_RGBA_UNORM_SRGB_BLOCK, 1570 BC1_RGB_UNORM_BLOCK, 1571 BC1_RGB_UNORM_SRGB_BLOCK, 1572 BC2_RGBA_UNORM_BLOCK, 1573 BC2_RGBA_UNORM_SRGB_BLOCK, 1574 BC3_RGBA_UNORM_BLOCK, 1575 BC3_RGBA_UNORM_SRGB_BLOCK, 1576 BC4_RED_SNORM_BLOCK, 1577 BC4_RED_UNORM_BLOCK, 1578 BC5_RG_SNORM_BLOCK, 1579 BC5_RG_UNORM_BLOCK, 1580 BC6H_RGB_SFLOAT_BLOCK, 1581 BC6H_RGB_UFLOAT_BLOCK, 1582 BC7_RGBA_UNORM_BLOCK, 1583 BC7_RGBA_UNORM_SRGB_BLOCK, 1584 EAC_R11G11_SNORM_BLOCK, 1585 EAC_R11G11_UNORM_BLOCK, 1586 EAC_R11_SNORM_BLOCK, 1587 EAC_R11_UNORM_BLOCK, 1588 ETC1_LOSSY_DECODE_R8G8B8_UNORM_BLOCK, 1589 ETC1_R8G8B8_UNORM_BLOCK, 1590 ETC2_R8G8B8A1_SRGB_BLOCK, 1591 ETC2_R8G8B8A1_UNORM_BLOCK, 1592 ETC2_R8G8B8A8_SRGB_BLOCK, 1593 ETC2_R8G8B8A8_UNORM_BLOCK, 1594 ETC2_R8G8B8_SRGB_BLOCK, 1595 ETC2_R8G8B8_UNORM_BLOCK, 1596 G8_B8R8_2PLANE_420_UNORM, 1597 G8_B8_R8_3PLANE_420_UNORM, 1598 L16A16_FLOAT, 1599 L16_FLOAT, 1600 L32A32_FLOAT, 1601 L32_FLOAT, 1602 L4A4_UNORM, 1603 L8A8_UNORM, 1604 L8_UNORM, 1605 PALETTE4_R4G4B4A4_UNORM, 1606 PALETTE4_R5G5B5A1_UNORM, 1607 PALETTE4_R5G6B5_UNORM, 1608 PALETTE4_R8G8B8A8_UNORM, 1609 PALETTE4_R8G8B8_UNORM, 1610 PALETTE8_R4G4B4A4_UNORM, 1611 PALETTE8_R5G5B5A1_UNORM, 1612 PALETTE8_R5G6B5_UNORM, 1613 PALETTE8_R8G8B8A8_UNORM, 1614 PALETTE8_R8G8B8_UNORM, 1615 PVRTC1_RGBA_2BPP_UNORM_BLOCK, 1616 PVRTC1_RGBA_2BPP_UNORM_SRGB_BLOCK, 1617 PVRTC1_RGBA_4BPP_UNORM_BLOCK, 1618 PVRTC1_RGBA_4BPP_UNORM_SRGB_BLOCK, 1619 PVRTC1_RGB_2BPP_UNORM_BLOCK, 1620 PVRTC1_RGB_2BPP_UNORM_SRGB_BLOCK, 1621 PVRTC1_RGB_4BPP_UNORM_BLOCK, 1622 PVRTC1_RGB_4BPP_UNORM_SRGB_BLOCK, 1623 R10G10B10A2_SINT, 1624 R10G10B10A2_SNORM, 1625 R10G10B10A2_SSCALED, 1626 R10G10B10A2_UINT, 1627 R10G10B10A2_UNORM, 1628 R10G10B10A2_USCALED, 1629 R10G10B10X2_UNORM, 1630 R11G11B10_FLOAT, 1631 R16G16B16A16_FLOAT, 1632 R16G16B16A16_SINT, 1633 R16G16B16A16_SNORM, 1634 R16G16B16A16_SSCALED, 1635 R16G16B16A16_UINT, 1636 R16G16B16A16_UNORM, 1637 R16G16B16A16_USCALED, 1638 R16G16B16_FLOAT, 1639 R16G16B16_SINT, 1640 R16G16B16_SNORM, 1641 R16G16B16_SSCALED, 1642 R16G16B16_UINT, 1643 R16G16B16_UNORM, 1644 R16G16B16_USCALED, 1645 R16G16_FLOAT, 1646 R16G16_SINT, 1647 R16G16_SNORM, 1648 R16G16_SSCALED, 1649 R16G16_UINT, 1650 R16G16_UNORM, 1651 R16G16_USCALED, 1652 R16_FLOAT, 1653 R16_SINT, 1654 R16_SNORM, 1655 R16_SSCALED, 1656 R16_UINT, 1657 R16_UNORM, 1658 R16_USCALED, 1659 R32G32B32A32_FIXED, 1660 R32G32B32A32_FLOAT, 1661 R32G32B32A32_SINT, 1662 R32G32B32A32_SNORM, 1663 R32G32B32A32_SSCALED, 1664 R32G32B32A32_UINT, 1665 R32G32B32A32_UNORM, 1666 R32G32B32A32_USCALED, 1667 R32G32B32_FIXED, 1668 R32G32B32_FLOAT, 1669 R32G32B32_SINT, 1670 R32G32B32_SNORM, 1671 R32G32B32_SSCALED, 1672 R32G32B32_UINT, 1673 R32G32B32_UNORM, 1674 R32G32B32_USCALED, 1675 R32G32_FIXED, 1676 R32G32_FLOAT, 1677 R32G32_SINT, 1678 R32G32_SNORM, 1679 R32G32_SSCALED, 1680 R32G32_UINT, 1681 R32G32_UNORM, 1682 R32G32_USCALED, 1683 R32_FIXED, 1684 R32_FLOAT, 1685 R32_SINT, 1686 R32_SNORM, 1687 R32_SSCALED, 1688 R32_UINT, 1689 R32_UNORM, 1690 R32_USCALED, 1691 R4G4B4A4_UNORM, 1692 R5G5B5A1_UNORM, 1693 R5G6B5_UNORM, 1694 R8G8B8A8_SINT, 1695 R8G8B8A8_SNORM, 1696 R8G8B8A8_SSCALED, 1697 R8G8B8A8_TYPELESS, 1698 R8G8B8A8_TYPELESS_SRGB, 1699 R8G8B8A8_UINT, 1700 R8G8B8A8_UNORM, 1701 R8G8B8A8_UNORM_SRGB, 1702 R8G8B8A8_USCALED, 1703 R8G8B8X8_UNORM, 1704 R8G8B8X8_UNORM_SRGB, 1705 R8G8B8_SINT, 1706 R8G8B8_SNORM, 1707 R8G8B8_SSCALED, 1708 R8G8B8_UINT, 1709 R8G8B8_UNORM, 1710 R8G8B8_UNORM_SRGB, 1711 R8G8B8_USCALED, 1712 R8G8_SINT, 1713 R8G8_SNORM, 1714 R8G8_SSCALED, 1715 R8G8_UINT, 1716 R8G8_UNORM, 1717 R8G8_UNORM_SRGB, 1718 R8G8_USCALED, 1719 R8_SINT, 1720 R8_SNORM, 1721 R8_SSCALED, 1722 R8_UINT, 1723 R8_UNORM, 1724 R8_UNORM_SRGB, 1725 R8_USCALED, 1726 R9G9B9E5_SHAREDEXP, 1727 X2R10G10B10_SINT_VERTEX, 1728 X2R10G10B10_SNORM_VERTEX, 1729 X2R10G10B10_SSCALED_VERTEX, 1730 X2R10G10B10_UINT_VERTEX, 1731 X2R10G10B10_UNORM_VERTEX, 1732 X2R10G10B10_USCALED_VERTEX, 1733 EXTERNAL0, 1734 EXTERNAL1, 1735 EXTERNAL2, 1736 EXTERNAL3, 1737 EXTERNAL4, 1738 EXTERNAL5, 1739 EXTERNAL6, 1740 EXTERNAL7 1741}; 1742 1743} 1744 1745} 1746} 1747# 16 ".\\copy_buffer.metal" 2 1748 1749using namespace rx::mtl_shader; 1750 1751constant int kCopyFormatType [[function_constant(10)]]; 1752 1753 1754constant int kCopyTextureType [[function_constant(20)]]; 1755constant bool kCopyTextureType2D = kCopyTextureType == kTextureType2D; 1756constant bool kCopyTextureType2DArray = kCopyTextureType == kTextureType2DArray; 1757constant bool kCopyTextureType2DMS = kCopyTextureType == kTextureType2DMultisample; 1758constant bool kCopyTextureTypeCube = kCopyTextureType == kTextureTypeCube; 1759constant bool kCopyTextureType3D = kCopyTextureType == kTextureType3D; 1760 1761struct CopyPixelParams 1762{ 1763 uint3 copySize; 1764 uint3 textureOffset; 1765 1766 uint bufferStartOffset; 1767 uint pixelSize; 1768 uint bufferRowPitch; 1769 uint bufferDepthPitch; 1770}; 1771 1772struct WritePixelParams 1773{ 1774 uint2 copySize; 1775 uint2 textureOffset; 1776 1777 uint bufferStartOffset; 1778 1779 uint pixelSize; 1780 uint bufferRowPitch; 1781 1782 uint textureLevel; 1783 uint textureLayer; 1784 1785 bool reverseTextureRowOrder; 1786}; 1787# 120 ".\\copy_buffer.metal" 1788template <typename T> 1789static inline void textureWrite(ushort3 gIndices, 1790 constant CopyPixelParams &options, 1791 vec<T, 4> color, 1792 texture2d<T, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 1793{ 1794 uint3 writeIndices = options.textureOffset + uint3(gIndices); 1795 switch (kCopyTextureType) 1796 { 1797 case kTextureType2D: 1798 dstTexture2d.write(color, writeIndices.xy); 1799 break; 1800 case kTextureType2DArray: 1801 dstTexture2dArray.write(color, writeIndices.xy, writeIndices.z); 1802 break; 1803 case kTextureType3D: 1804 dstTexture3d.write(color, writeIndices); 1805 break; 1806 case kTextureTypeCube: 1807 dstTextureCube.write(color, writeIndices.xy, writeIndices.z); 1808 break; 1809 } 1810} 1811 1812 1813template <typename T> 1814static inline vec<T, 4> textureRead(ushort2 gIndices, 1815 constant WritePixelParams &options, 1816 texture2d<T, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<T, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]]) 1817{ 1818 vec<T, 4> color; 1819 uint2 coords = uint2(gIndices); 1820 if (options.reverseTextureRowOrder) 1821 { 1822 coords.y = options.copySize.y - 1 - gIndices.y; 1823 } 1824 coords += options.textureOffset; 1825 switch (kCopyTextureType) 1826 { 1827 case kTextureType2D: 1828 color = srcTexture2d.read(coords.xy, options.textureLevel); 1829 break; 1830 case kTextureType2DArray: 1831 color = srcTexture2dArray.read(coords.xy, options.textureLayer, options.textureLevel); 1832 break; 1833 case kTextureType2DMultisample: 1834 color = resolveTextureMS(srcTexture2dMS, coords.xy); 1835 break; 1836 case kTextureType3D: 1837 color = srcTexture3d.read(uint3(coords, options.textureLayer), options.textureLevel); 1838 break; 1839 case kTextureTypeCube: 1840 color = srcTextureCube.read(coords.xy, options.textureLayer, options.textureLevel); 1841 break; 1842 } 1843 return color; 1844} 1845# 215 ".\\copy_buffer.metal" 1846static inline float4 readR5G6B5_UNORM(uint bufferOffset, constant uchar *buffer) 1847{ 1848 float4 color; 1849 ushort src = bytesToShort<ushort>(buffer, bufferOffset); 1850 1851 color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src)); 1852 color.g = normalizedToFloat<6>(getShiftedData<6, 5>(src)); 1853 color.b = normalizedToFloat<5>(getShiftedData<5, 0>(src)); 1854 color.a = 1.0; 1855 return color; 1856} 1857static inline void writeR5G6B5_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1858{ 1859 ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) | 1860 shiftData<6, 5>(floatToNormalized<6, ushort>(color.g)) | 1861 shiftData<5, 0>(floatToNormalized<5, ushort>(color.b)); 1862 1863 shortToBytes(dst, bufferOffset, buffer); 1864} 1865 1866 1867static inline float4 readR4G4B4A4_UNORM(uint bufferOffset, constant uchar *buffer) 1868{ 1869 float4 color; 1870 ushort src = bytesToShort<ushort>(buffer, bufferOffset); 1871 1872 color.r = normalizedToFloat<4>(getShiftedData<4, 12>(src)); 1873 color.g = normalizedToFloat<4>(getShiftedData<4, 8>(src)); 1874 color.b = normalizedToFloat<4>(getShiftedData<4, 4>(src)); 1875 color.a = normalizedToFloat<4>(getShiftedData<4, 0>(src)); 1876 return color; 1877} 1878static inline void writeR4G4B4A4_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1879{ 1880 ushort dst = shiftData<4, 12>(floatToNormalized<4, ushort>(color.r)) | 1881 shiftData<4, 8>(floatToNormalized<4, ushort>(color.g)) | 1882 shiftData<4, 4>(floatToNormalized<4, ushort>(color.b)) | 1883 shiftData<4, 0>(floatToNormalized<4, ushort>(color.a)); 1884 ; 1885 1886 shortToBytes(dst, bufferOffset, buffer); 1887} 1888 1889 1890static inline float4 readR5G5B5A1_UNORM(uint bufferOffset, constant uchar *buffer) 1891{ 1892 float4 color; 1893 ushort src = bytesToShort<ushort>(buffer, bufferOffset); 1894 1895 color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src)); 1896 color.g = normalizedToFloat<5>(getShiftedData<5, 6>(src)); 1897 color.b = normalizedToFloat<5>(getShiftedData<5, 1>(src)); 1898 color.a = normalizedToFloat<1>(getShiftedData<1, 0>(src)); 1899 return color; 1900} 1901static inline void writeR5G5B5A1_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 1902{ 1903 ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) | 1904 shiftData<5, 6>(floatToNormalized<5, ushort>(color.g)) | 1905 shiftData<5, 1>(floatToNormalized<5, ushort>(color.b)) | 1906 shiftData<1, 0>(floatToNormalized<1, ushort>(color.a)); 1907 ; 1908 1909 shortToBytes(dst, bufferOffset, buffer); 1910} 1911 1912 1913static inline int4 readR10G10B10A2_SINT(uint bufferOffset, constant uchar *buffer) 1914{ 1915 int4 color; 1916 int src = bytesToInt<int>(buffer, bufferOffset); 1917 1918 constexpr int3 rgbSignMask(0x200); 1919 constexpr int3 negativeMask(0xfffffc00); 1920 constexpr int alphaSignMask = 0x2; 1921 constexpr int alphaNegMask = 0xfffffffc; 1922 1923 color.r = getShiftedData<10, 0>(src); 1924 color.g = getShiftedData<10, 10>(src); 1925 color.b = getShiftedData<10, 20>(src); 1926 1927 int3 isRgbNegative = (color.rgb & rgbSignMask) >> 9; 1928 color.rgb = (isRgbNegative * negativeMask) | color.rgb; 1929 1930 color.a = getShiftedData<2, 30>(src); 1931 int isAlphaNegative = color.a & alphaSignMask >> 1; 1932 color.a = (isAlphaNegative * alphaNegMask) | color.a; 1933 return color; 1934} 1935 1936static inline uint4 readR10G10B10A2_UINT(uint bufferOffset, constant uchar *buffer) 1937{ 1938 uint4 color; 1939 uint src = bytesToInt<uint>(buffer, bufferOffset); 1940 1941 color.r = getShiftedData<10, 0>(src); 1942 color.g = getShiftedData<10, 10>(src); 1943 color.b = getShiftedData<10, 20>(src); 1944 color.a = getShiftedData<2, 30>(src); 1945 return color; 1946} 1947 1948 1949static inline float4 readR8G8B8A8(uint bufferOffset, constant uchar *buffer, bool isSRGB) 1950{ 1951 float4 color; 1952 uint src = bytesToInt<uint>(buffer, bufferOffset); 1953 1954 if (isSRGB) 1955 { 1956 color = unpack_unorm4x8_srgb_to_float(src); 1957 } 1958 else 1959 { 1960 color = unpack_unorm4x8_to_float(src); 1961 } 1962 return color; 1963} 1964static inline void writeR8G8B8A8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB) 1965{ 1966 uint dst; 1967 1968 if (isSRGB) 1969 { 1970 dst = pack_float_to_srgb_unorm4x8(color); 1971 } 1972 else 1973 { 1974 dst = pack_float_to_unorm4x8(color); 1975 } 1976 1977 intToBytes(dst, bufferOffset, buffer); 1978} 1979 1980static inline float4 readR8G8B8(uint bufferOffset, constant uchar *buffer, bool isSRGB) 1981{ 1982 float4 color; 1983 color.r = normalizedToFloat<uchar>(buffer[bufferOffset]); 1984 color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]); 1985 color.b = normalizedToFloat<uchar>(buffer[bufferOffset + 2]); 1986 color.a = 1.0; 1987 1988 if (isSRGB) 1989 { 1990 color = sRGBtoLinear(color); 1991 } 1992 return color; 1993} 1994static inline void writeR8G8B8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB) 1995{ 1996 color.a = 1.0; 1997 uint dst; 1998 1999 if (isSRGB) 2000 { 2001 dst = pack_float_to_srgb_unorm4x8(color); 2002 } 2003 else 2004 { 2005 dst = pack_float_to_unorm4x8(color); 2006 } 2007 int24bitToBytes(dst, bufferOffset, buffer); 2008} 2009 2010 2011static inline float4 readR8G8B8A8_SNORM(uint bufferOffset, constant uchar *buffer) 2012{ 2013 float4 color; 2014 uint src = bytesToInt<uint>(buffer, bufferOffset); 2015 2016 color = unpack_snorm4x8_to_float(src); 2017 2018 return color; 2019} 2020static inline void writeR8G8B8A8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2021{ 2022 uint dst = pack_float_to_snorm4x8(color); 2023 2024 intToBytes(dst, bufferOffset, buffer); 2025} 2026 2027 2028static inline float4 readR8G8B8_SNORM(uint bufferOffset, constant uchar *buffer) 2029{ 2030 float4 color; 2031 color.r = normalizedToFloat<7, char>(buffer[bufferOffset]); 2032 color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]); 2033 color.b = normalizedToFloat<7, char>(buffer[bufferOffset + 2]); 2034 color.a = 1.0; 2035 2036 return color; 2037} 2038static inline void writeR8G8B8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2039{ 2040 uint dst = pack_float_to_snorm4x8(color); 2041 2042 int24bitToBytes(dst, bufferOffset, buffer); 2043} 2044 2045 2046static inline float4 readR8G8B8A8_UNORM(uint bufferOffset, constant uchar *buffer) 2047{ 2048 return readR8G8B8A8(bufferOffset, buffer, false); 2049} 2050static inline void writeR8G8B8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2051{ 2052 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false); 2053} 2054 2055static inline float4 readR8G8B8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer) 2056{ 2057 return readR8G8B8A8(bufferOffset, buffer, true); 2058} 2059static inline void writeR8G8B8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2060{ 2061 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true); 2062} 2063 2064 2065static inline float4 readB8G8R8A8_UNORM(uint bufferOffset, constant uchar *buffer) 2066{ 2067 return readR8G8B8A8(bufferOffset, buffer, false).bgra; 2068} 2069static inline void writeB8G8R8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2070{ 2071 color.rgba = color.bgra; 2072 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false); 2073} 2074 2075static inline float4 readB8G8R8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer) 2076{ 2077 return readR8G8B8A8(bufferOffset, buffer, true).bgra; 2078} 2079static inline void writeB8G8R8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2080{ 2081 color.rgba = color.bgra; 2082 return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true); 2083} 2084 2085 2086static inline float4 readR8G8B8_UNORM(uint bufferOffset, constant uchar *buffer) 2087{ 2088 return readR8G8B8(bufferOffset, buffer, false); 2089} 2090static inline void writeR8G8B8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2091{ 2092 return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, false); 2093} 2094 2095static inline float4 readR8G8B8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer) 2096{ 2097 return readR8G8B8(bufferOffset, buffer, true); 2098} 2099static inline void writeR8G8B8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2100{ 2101 return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, true); 2102} 2103 2104 2105static inline float4 readL8_UNORM(uint bufferOffset, constant uchar *buffer) 2106{ 2107 float4 color; 2108 color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset])); 2109 color.a = 1.0; 2110 return color; 2111} 2112static inline void writeL8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2113{ 2114 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 2115} 2116 2117 2118static inline void writeA8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2119{ 2120 buffer[bufferOffset] = floatToNormalized<uchar>(color.a); 2121} 2122 2123 2124static inline float4 readL8A8_UNORM(uint bufferOffset, constant uchar *buffer) 2125{ 2126 float4 color; 2127 color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset])); 2128 color.a = normalizedToFloat<uchar>(buffer[bufferOffset + 1]); 2129 return color; 2130} 2131static inline void writeL8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2132{ 2133 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 2134 buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.a); 2135} 2136 2137 2138static inline float4 readR8_UNORM(uint bufferOffset, constant uchar *buffer) 2139{ 2140 float4 color; 2141 color.r = normalizedToFloat<uchar>(buffer[bufferOffset]); 2142 color.g = color.b = 0.0; 2143 color.a = 1.0; 2144 return color; 2145} 2146static inline void writeR8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2147{ 2148 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 2149} 2150 2151static inline float4 readR8_SNORM(uint bufferOffset, constant uchar *buffer) 2152{ 2153 float4 color; 2154 color.r = normalizedToFloat<7, char>(buffer[bufferOffset]); 2155 color.g = color.b = 0.0; 2156 color.a = 1.0; 2157 return color; 2158} 2159static inline void writeR8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2160{ 2161 buffer[bufferOffset] = as_type<uchar>(floatToNormalized<char>(color.r)); 2162} 2163 2164 2165static inline int4 readR8_SINT(uint bufferOffset, constant uchar *buffer) 2166{ 2167 int4 color; 2168 color.r = as_type<char>(buffer[bufferOffset]); 2169 color.g = color.b = 0; 2170 color.a = 1; 2171 return color; 2172} 2173static inline void writeR8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2174{ 2175 buffer[bufferOffset] = static_cast<uchar>(color.r); 2176} 2177 2178 2179static inline uint4 readR8_UINT(uint bufferOffset, constant uchar *buffer) 2180{ 2181 uint4 color; 2182 color.r = as_type<uchar>(buffer[bufferOffset]); 2183 color.g = color.b = 0; 2184 color.a = 1; 2185 return color; 2186} 2187static inline void writeR8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2188{ 2189 buffer[bufferOffset] = static_cast<uchar>(color.r); 2190} 2191 2192 2193static inline float4 readR8G8_UNORM(uint bufferOffset, constant uchar *buffer) 2194{ 2195 float4 color; 2196 color.r = normalizedToFloat<uchar>(buffer[bufferOffset]); 2197 color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]); 2198 color.b = 0.0; 2199 color.a = 1.0; 2200 return color; 2201} 2202static inline void writeR8G8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2203{ 2204 buffer[bufferOffset] = floatToNormalized<uchar>(color.r); 2205 buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.g); 2206} 2207 2208static inline float4 readR8G8_SNORM(uint bufferOffset, constant uchar *buffer) 2209{ 2210 float4 color; 2211 color.r = normalizedToFloat<7, char>(buffer[bufferOffset]); 2212 color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]); 2213 color.b = 0.0; 2214 color.a = 1.0; 2215 return color; 2216} 2217static inline void writeR8G8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2218{ 2219 buffer[bufferOffset] = as_type<uchar>(floatToNormalized<char>(color.r)); 2220 buffer[bufferOffset + 1] = as_type<uchar>(floatToNormalized<char>(color.g)); 2221} 2222 2223 2224static inline int4 readR8G8_SINT(uint bufferOffset, constant uchar *buffer) 2225{ 2226 int4 color; 2227 color.r = as_type<char>(buffer[bufferOffset]); 2228 color.g = as_type<char>(buffer[bufferOffset + 1]); 2229 color.b = 0; 2230 color.a = 1; 2231 return color; 2232} 2233static inline void writeR8G8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2234{ 2235 buffer[bufferOffset] = static_cast<uchar>(color.r); 2236 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 2237} 2238 2239 2240static inline uint4 readR8G8_UINT(uint bufferOffset, constant uchar *buffer) 2241{ 2242 uint4 color; 2243 color.r = as_type<uchar>(buffer[bufferOffset]); 2244 color.g = as_type<uchar>(buffer[bufferOffset + 1]); 2245 color.b = 0; 2246 color.a = 1; 2247 return color; 2248} 2249static inline void writeR8G8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2250{ 2251 buffer[bufferOffset] = static_cast<uchar>(color.r); 2252 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 2253} 2254 2255 2256static inline int4 readR8G8B8_SINT(uint bufferOffset, constant uchar *buffer) 2257{ 2258 int4 color; 2259 color.r = as_type<char>(buffer[bufferOffset]); 2260 color.g = as_type<char>(buffer[bufferOffset + 1]); 2261 color.b = as_type<char>(buffer[bufferOffset + 2]); 2262 color.a = 1; 2263 return color; 2264} 2265 2266 2267static inline uint4 readR8G8B8_UINT(uint bufferOffset, constant uchar *buffer) 2268{ 2269 uint4 color; 2270 color.r = as_type<uchar>(buffer[bufferOffset]); 2271 color.g = as_type<uchar>(buffer[bufferOffset + 1]); 2272 color.b = as_type<uchar>(buffer[bufferOffset + 2]); 2273 color.a = 1; 2274 return color; 2275} 2276 2277 2278static inline int4 readR8G8B8A8_SINT(uint bufferOffset, constant uchar *buffer) 2279{ 2280 int4 color; 2281 color.r = as_type<char>(buffer[bufferOffset]); 2282 color.g = as_type<char>(buffer[bufferOffset + 1]); 2283 color.b = as_type<char>(buffer[bufferOffset + 2]); 2284 color.a = as_type<char>(buffer[bufferOffset + 3]); 2285 return color; 2286} 2287static inline void writeR8G8B8A8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2288{ 2289 buffer[bufferOffset] = static_cast<uchar>(color.r); 2290 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 2291 buffer[bufferOffset + 2] = static_cast<uchar>(color.b); 2292 buffer[bufferOffset + 3] = static_cast<uchar>(color.a); 2293} 2294 2295 2296static inline uint4 readR8G8B8A8_UINT(uint bufferOffset, constant uchar *buffer) 2297{ 2298 uint4 color; 2299 color.r = as_type<uchar>(buffer[bufferOffset]); 2300 color.g = as_type<uchar>(buffer[bufferOffset + 1]); 2301 color.b = as_type<uchar>(buffer[bufferOffset + 2]); 2302 color.a = as_type<uchar>(buffer[bufferOffset + 3]); 2303 return color; 2304} 2305static inline void writeR8G8B8A8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2306{ 2307 buffer[bufferOffset] = static_cast<uchar>(color.r); 2308 buffer[bufferOffset + 1] = static_cast<uchar>(color.g); 2309 buffer[bufferOffset + 2] = static_cast<uchar>(color.b); 2310 buffer[bufferOffset + 3] = static_cast<uchar>(color.a); 2311} 2312 2313 2314static inline float4 readR16_FLOAT(uint bufferOffset, constant uchar *buffer) 2315{ 2316 float4 color; 2317 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2318 color.g = color.b = 0.0; 2319 color.a = 1.0; 2320 return color; 2321} 2322static inline void writeR16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2323{ 2324 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 2325} 2326 2327template <typename ShortType> 2328static inline float4 readR16_NORM(uint bufferOffset, constant uchar *buffer) 2329{ 2330 float4 color; 2331 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 2332 color.g = color.b = 0.0; 2333 color.a = 1.0; 2334 return color; 2335} 2336 2337 2338 2339template<typename ShortType> 2340static inline void writeR16_NORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2341{ 2342 shortToBytes(floatToNormalized<ShortType>(color.r), bufferOffset, buffer); 2343} 2344 2345 2346 2347 2348static inline int4 readR16_SINT(uint bufferOffset, constant uchar *buffer) 2349{ 2350 int4 color; 2351 color.r = bytesToShort<short>(buffer, bufferOffset); 2352 color.g = color.b = 0; 2353 color.a = 1; 2354 return color; 2355} 2356static inline void writeR16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2357{ 2358 shortToBytes(static_cast<short>(color.r), bufferOffset, buffer); 2359} 2360 2361 2362static inline uint4 readR16_UINT(uint bufferOffset, constant uchar *buffer) 2363{ 2364 uint4 color; 2365 color.r = bytesToShort<ushort>(buffer, bufferOffset); 2366 color.g = color.b = 0; 2367 color.a = 1; 2368 return color; 2369} 2370static inline void writeR16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2371{ 2372 shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer); 2373} 2374 2375 2376static inline float4 readA16_FLOAT(uint bufferOffset, constant uchar *buffer) 2377{ 2378 float4 color; 2379 color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2380 color.rgb = 0.0; 2381 return color; 2382} 2383static inline void writeA16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2384{ 2385 shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset, buffer); 2386} 2387 2388 2389static inline float4 readL16_FLOAT(uint bufferOffset, constant uchar *buffer) 2390{ 2391 float4 color; 2392 color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2393 color.a = 1.0; 2394 return color; 2395} 2396static inline void writeL16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2397{ 2398 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 2399} 2400 2401 2402static inline float4 readL16A16_FLOAT(uint bufferOffset, constant uchar *buffer) 2403{ 2404 float4 color; 2405 color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2406 color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 2407 return color; 2408} 2409static inline void writeL16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2410{ 2411 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 2412 shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 2, buffer); 2413} 2414 2415 2416static inline float4 readR16G16_FLOAT(uint bufferOffset, constant uchar *buffer) 2417{ 2418 float4 color; 2419 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2420 color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 2421 color.b = 0.0; 2422 color.a = 1.0; 2423 return color; 2424} 2425static inline void writeR16G16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2426{ 2427 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 2428 shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer); 2429} 2430 2431 2432template <typename ShortType> 2433static inline float4 readR16G16_NORM(uint bufferOffset, constant uchar *buffer) 2434{ 2435 float4 color; 2436 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 2437 color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2)); 2438 color.b = 0.0; 2439 color.a = 1.0; 2440 return color; 2441} 2442 2443 2444 2445template<typename ShortType> 2446static inline void writeR16G16_NORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2447{ 2448 shortToBytes(floatToNormalized<ShortType>(color.r), bufferOffset, buffer); 2449 shortToBytes(floatToNormalized<ShortType>(color.g), bufferOffset + 2, buffer); 2450} 2451 2452 2453 2454 2455static inline int4 readR16G16_SINT(uint bufferOffset, constant uchar *buffer) 2456{ 2457 int4 color; 2458 color.r = bytesToShort<short>(buffer, bufferOffset); 2459 color.g = bytesToShort<short>(buffer, bufferOffset + 2); 2460 color.b = 0; 2461 color.a = 1; 2462 return color; 2463} 2464static inline void writeR16G16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2465{ 2466 shortToBytes(static_cast<short>(color.r), bufferOffset, buffer); 2467 shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer); 2468} 2469 2470 2471static inline uint4 readR16G16_UINT(uint bufferOffset, constant uchar *buffer) 2472{ 2473 uint4 color; 2474 color.r = bytesToShort<ushort>(buffer, bufferOffset); 2475 color.g = bytesToShort<ushort>(buffer, bufferOffset + 2); 2476 color.b = 0; 2477 color.a = 1; 2478 return color; 2479} 2480static inline void writeR16G16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2481{ 2482 shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer); 2483 shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer); 2484} 2485 2486 2487static inline float4 readR16G16B16_FLOAT(uint bufferOffset, constant uchar *buffer) 2488{ 2489 float4 color; 2490 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2491 color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 2492 color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4)); 2493 color.a = 1.0; 2494 return color; 2495} 2496 2497 2498template <typename ShortType> 2499static inline float4 readR16G16B16_NORM(uint bufferOffset, constant uchar *buffer) 2500{ 2501 float4 color; 2502 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 2503 color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2)); 2504 color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4)); 2505 color.a = 1.0; 2506 return color; 2507} 2508 2509 2510 2511static inline int4 readR16G16B16_SINT(uint bufferOffset, constant uchar *buffer) 2512{ 2513 int4 color; 2514 color.r = bytesToShort<short>(buffer, bufferOffset); 2515 color.g = bytesToShort<short>(buffer, bufferOffset + 2); 2516 color.b = bytesToShort<short>(buffer, bufferOffset + 4); 2517 color.a = 1; 2518 return color; 2519} 2520 2521 2522static inline uint4 readR16G16B16_UINT(uint bufferOffset, constant uchar *buffer) 2523{ 2524 uint4 color; 2525 color.r = bytesToShort<ushort>(buffer, bufferOffset); 2526 color.g = bytesToShort<ushort>(buffer, bufferOffset + 2); 2527 color.b = bytesToShort<ushort>(buffer, bufferOffset + 4); 2528 color.a = 1; 2529 return color; 2530} 2531 2532 2533static inline float4 readR16G16B16A16_FLOAT(uint bufferOffset, constant uchar *buffer) 2534{ 2535 float4 color; 2536 color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); 2537 color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); 2538 color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4)); 2539 color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 6)); 2540 return color; 2541} 2542static inline void writeR16G16B16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2543{ 2544 shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); 2545 shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer); 2546 shortToBytes(as_type<ushort>(static_cast<half>(color.b)), bufferOffset + 4, buffer); 2547 shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 6, buffer); 2548} 2549 2550 2551template <typename ShortType> 2552static inline float4 readR16G16B16A16_NORM(uint bufferOffset, constant uchar *buffer) 2553{ 2554 float4 color; 2555 color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); 2556 color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2)); 2557 color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4)); 2558 color.a = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 6)); 2559 return color; 2560} 2561 2562 2563 2564template<typename ShortType> 2565static inline void writeR16G16B16A16_NORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2566{ 2567 shortToBytes(floatToNormalized<ShortType>(color.r), bufferOffset, buffer); 2568 shortToBytes(floatToNormalized<ShortType>(color.g), bufferOffset + 2, buffer); 2569 shortToBytes(floatToNormalized<ShortType>(color.b), bufferOffset + 4, buffer); 2570 shortToBytes(floatToNormalized<ShortType>(color.a), bufferOffset + 6, buffer); 2571} 2572 2573 2574 2575 2576static inline int4 readR16G16B16A16_SINT(uint bufferOffset, constant uchar *buffer) 2577{ 2578 int4 color; 2579 color.r = bytesToShort<short>(buffer, bufferOffset); 2580 color.g = bytesToShort<short>(buffer, bufferOffset + 2); 2581 color.b = bytesToShort<short>(buffer, bufferOffset + 4); 2582 color.a = bytesToShort<short>(buffer, bufferOffset + 6); 2583 return color; 2584} 2585static inline void writeR16G16B16A16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2586{ 2587 shortToBytes(static_cast<short>(color.r), bufferOffset, buffer); 2588 shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer); 2589 shortToBytes(static_cast<short>(color.b), bufferOffset + 4, buffer); 2590 shortToBytes(static_cast<short>(color.a), bufferOffset + 6, buffer); 2591} 2592 2593 2594static inline uint4 readR16G16B16A16_UINT(uint bufferOffset, constant uchar *buffer) 2595{ 2596 uint4 color; 2597 color.r = bytesToShort<ushort>(buffer, bufferOffset); 2598 color.g = bytesToShort<ushort>(buffer, bufferOffset + 2); 2599 color.b = bytesToShort<ushort>(buffer, bufferOffset + 4); 2600 color.a = bytesToShort<ushort>(buffer, bufferOffset + 6); 2601 return color; 2602} 2603static inline void writeR16G16B16A16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2604{ 2605 shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer); 2606 shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer); 2607 shortToBytes(static_cast<ushort>(color.b), bufferOffset + 4, buffer); 2608 shortToBytes(static_cast<ushort>(color.a), bufferOffset + 6, buffer); 2609} 2610 2611 2612static inline float4 readR32_FLOAT(uint bufferOffset, constant uchar *buffer) 2613{ 2614 float4 color; 2615 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2616 color.g = color.b = 0.0; 2617 color.a = 1.0; 2618 return color; 2619} 2620static inline void writeR32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2621{ 2622 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2623} 2624 2625 2626template <typename IntType> 2627static inline float4 readR32_NORM(uint bufferOffset, constant uchar *buffer) 2628{ 2629 float4 color; 2630 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2631 color.g = color.b = 0.0; 2632 color.a = 1.0; 2633 return color; 2634} 2635 2636 2637 2638 2639static inline float4 readA32_FLOAT(uint bufferOffset, constant uchar *buffer) 2640{ 2641 float4 color; 2642 color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2643 color.rgb = 0.0; 2644 return color; 2645} 2646static inline void writeA32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2647{ 2648 intToBytes(as_type<uint>(color.a), bufferOffset, buffer); 2649} 2650 2651 2652static inline float4 readL32_FLOAT(uint bufferOffset, constant uchar *buffer) 2653{ 2654 float4 color; 2655 color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2656 color.a = 1.0; 2657 return color; 2658} 2659static inline void writeL32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2660{ 2661 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2662} 2663 2664 2665static inline int4 readR32_SINT(uint bufferOffset, constant uchar *buffer) 2666{ 2667 int4 color; 2668 color.r = bytesToInt<int>(buffer, bufferOffset); 2669 color.g = color.b = 0; 2670 color.a = 1; 2671 return color; 2672} 2673static inline void writeR32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2674{ 2675 intToBytes(color.r, bufferOffset, buffer); 2676} 2677 2678 2679static inline float4 readR32_FIXED(uint bufferOffset, constant uchar *buffer) 2680{ 2681 float4 color; 2682 constexpr float kDivisor = 1.0f / (1 << 16); 2683 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2684 color.g = color.b = 0.0; 2685 color.a = 1.0; 2686 return color; 2687} 2688 2689 2690static inline uint4 readR32_UINT(uint bufferOffset, constant uchar *buffer) 2691{ 2692 uint4 color; 2693 color.r = bytesToInt<uint>(buffer, bufferOffset); 2694 color.g = color.b = 0; 2695 color.a = 1; 2696 return color; 2697} 2698static inline void writeR32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2699{ 2700 intToBytes(color.r, bufferOffset, buffer); 2701} 2702 2703 2704static inline float4 readL32A32_FLOAT(uint bufferOffset, constant uchar *buffer) 2705{ 2706 float4 color; 2707 color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2708 color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2709 return color; 2710} 2711static inline void writeL32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2712{ 2713 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2714 intToBytes(as_type<uint>(color.a), bufferOffset + 4, buffer); 2715} 2716 2717 2718static inline float4 readR32G32_FLOAT(uint bufferOffset, constant uchar *buffer) 2719{ 2720 float4 color; 2721 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2722 color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2723 color.b = 0.0; 2724 color.a = 1.0; 2725 return color; 2726} 2727static inline void writeR32G32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2728{ 2729 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2730 intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer); 2731} 2732 2733 2734template <typename IntType> 2735static inline float4 readR32G32_NORM(uint bufferOffset, constant uchar *buffer) 2736{ 2737 float4 color; 2738 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2739 color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4)); 2740 color.b = 0.0; 2741 color.a = 1.0; 2742 return color; 2743} 2744 2745 2746 2747 2748static inline int4 readR32G32_SINT(uint bufferOffset, constant uchar *buffer) 2749{ 2750 int4 color; 2751 color.r = bytesToInt<int>(buffer, bufferOffset); 2752 color.g = bytesToInt<int>(buffer, bufferOffset + 4); 2753 color.b = 0; 2754 color.a = 1; 2755 return color; 2756} 2757static inline void writeR32G32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2758{ 2759 intToBytes(color.r, bufferOffset, buffer); 2760 intToBytes(color.g, bufferOffset + 4, buffer); 2761} 2762 2763 2764static inline float4 readR32G32_FIXED(uint bufferOffset, constant uchar *buffer) 2765{ 2766 float4 color; 2767 constexpr float kDivisor = 1.0f / (1 << 16); 2768 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2769 color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor; 2770 color.b = 0.0; 2771 color.a = 1.0; 2772 return color; 2773} 2774 2775 2776static inline uint4 readR32G32_UINT(uint bufferOffset, constant uchar *buffer) 2777{ 2778 uint4 color; 2779 color.r = bytesToInt<uint>(buffer, bufferOffset); 2780 color.g = bytesToInt<uint>(buffer, bufferOffset + 4); 2781 color.b = 0; 2782 color.a = 1; 2783 return color; 2784} 2785static inline void writeR32G32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2786{ 2787 intToBytes(color.r, bufferOffset, buffer); 2788 intToBytes(color.g, bufferOffset + 4, buffer); 2789} 2790 2791 2792static inline float4 readR32G32B32_FLOAT(uint bufferOffset, constant uchar *buffer) 2793{ 2794 float4 color; 2795 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2796 color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2797 color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8)); 2798 color.a = 1.0; 2799 return color; 2800} 2801 2802 2803template <typename IntType> 2804static inline float4 readR32G32B32_NORM(uint bufferOffset, constant uchar *buffer) 2805{ 2806 float4 color; 2807 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2808 color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4)); 2809 color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8)); 2810 color.a = 1.0; 2811 return color; 2812} 2813 2814 2815 2816 2817static inline int4 readR32G32B32_SINT(uint bufferOffset, constant uchar *buffer) 2818{ 2819 int4 color; 2820 color.r = bytesToInt<int>(buffer, bufferOffset); 2821 color.g = bytesToInt<int>(buffer, bufferOffset + 4); 2822 color.b = bytesToInt<int>(buffer, bufferOffset + 8); 2823 color.a = 1; 2824 return color; 2825} 2826 2827 2828static inline float4 readR32G32B32_FIXED(uint bufferOffset, constant uchar *buffer) 2829{ 2830 float4 color; 2831 constexpr float kDivisor = 1.0f / (1 << 16); 2832 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2833 color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor; 2834 color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor; 2835 color.a = 1.0; 2836 return color; 2837} 2838 2839 2840static inline uint4 readR32G32B32_UINT(uint bufferOffset, constant uchar *buffer) 2841{ 2842 uint4 color; 2843 color.r = bytesToInt<uint>(buffer, bufferOffset); 2844 color.g = bytesToInt<uint>(buffer, bufferOffset + 4); 2845 color.b = bytesToInt<uint>(buffer, bufferOffset + 8); 2846 color.a = 1; 2847 return color; 2848} 2849 2850 2851static inline float4 readR32G32B32A32_FLOAT(uint bufferOffset, constant uchar *buffer) 2852{ 2853 float4 color; 2854 color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); 2855 color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); 2856 color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8)); 2857 color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 12)); 2858 return color; 2859} 2860static inline void writeR32G32B32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) 2861{ 2862 intToBytes(as_type<uint>(color.r), bufferOffset, buffer); 2863 intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer); 2864 intToBytes(as_type<uint>(color.b), bufferOffset + 8, buffer); 2865 intToBytes(as_type<uint>(color.a), bufferOffset + 12, buffer); 2866} 2867 2868 2869template <typename IntType> 2870static inline float4 readR32G32B32A32_NORM(uint bufferOffset, constant uchar *buffer) 2871{ 2872 float4 color; 2873 color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); 2874 color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4)); 2875 color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8)); 2876 color.a = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 12)); 2877 return color; 2878} 2879 2880 2881 2882 2883static inline int4 readR32G32B32A32_SINT(uint bufferOffset, constant uchar *buffer) 2884{ 2885 int4 color; 2886 color.r = bytesToInt<int>(buffer, bufferOffset); 2887 color.g = bytesToInt<int>(buffer, bufferOffset + 4); 2888 color.b = bytesToInt<int>(buffer, bufferOffset + 8); 2889 color.a = bytesToInt<int>(buffer, bufferOffset + 12); 2890 return color; 2891} 2892static inline void writeR32G32B32A32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) 2893{ 2894 intToBytes(color.r, bufferOffset, buffer); 2895 intToBytes(color.g, bufferOffset + 4, buffer); 2896 intToBytes(color.b, bufferOffset + 8, buffer); 2897 intToBytes(color.a, bufferOffset + 12, buffer); 2898} 2899 2900static inline float4 readR32G32B32A32_FIXED(uint bufferOffset, constant uchar *buffer) 2901{ 2902 float4 color; 2903 constexpr float kDivisor = 1.0f / (1 << 16); 2904 color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; 2905 color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor; 2906 color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor; 2907 color.a = bytesToInt<int>(buffer, bufferOffset + 12) * kDivisor; 2908 return color; 2909} 2910 2911 2912static inline uint4 readR32G32B32A32_UINT(uint bufferOffset, constant uchar *buffer) 2913{ 2914 uint4 color; 2915 color.r = bytesToInt<uint>(buffer, bufferOffset); 2916 color.g = bytesToInt<uint>(buffer, bufferOffset + 4); 2917 color.b = bytesToInt<uint>(buffer, bufferOffset + 8); 2918 color.a = bytesToInt<uint>(buffer, bufferOffset + 12); 2919 return color; 2920} 2921static inline void writeR32G32B32A32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) 2922{ 2923 intToBytes(color.r, bufferOffset, buffer); 2924 intToBytes(color.g, bufferOffset + 4, buffer); 2925 intToBytes(color.b, bufferOffset + 8, buffer); 2926 intToBytes(color.a, bufferOffset + 12, buffer); 2927} 2928# 1320 ".\\copy_buffer.metal" 2929static inline int4 readR8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8_SINT(bufferOffset, buffer); } static inline uint4 readR8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8_UINT(bufferOffset, buffer); } static inline int4 readR8G8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8A8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8A8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_UINT(bufferOffset, buffer); } 2930static inline int4 readR16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16_SINT(bufferOffset, buffer); } static inline uint4 readR16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16_UINT(bufferOffset, buffer); } static inline int4 readR16G16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16A16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16A16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_UINT(bufferOffset, buffer); } 2931static inline int4 readR32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32_SINT(bufferOffset, buffer); } static inline uint4 readR32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32_UINT(bufferOffset, buffer); } static inline int4 readR32G32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32A32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32A32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_UINT(bufferOffset, buffer); } 2932 2933static inline int4 readR10G10B10A2_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_SINT(bufferOffset, buffer); } static inline uint4 readR10G10B10A2_USCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_UINT(bufferOffset, buffer); } 2934 2935 2936kernel void readFromBufferToFloatTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<float, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 2937{ 2938 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; } 2939# 1372 ".\\copy_buffer.metal" 2940 uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2941 2942 switch (kCopyFormatType) 2943 { 2944 case FormatID::R5G6B5_UNORM: { auto color = readR5G6B5_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM: { auto color = readR8G8B8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = readR8G8B8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SNORM: { auto color = readR8G8B8A8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM: { auto color = readB8G8R8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = readB8G8R8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM: { auto color = readR8G8B8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = readR8G8B8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SNORM: { auto color = readR8G8B8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8_UNORM: { auto color = readL8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8A8_UNORM: { auto color = readL8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R5G5B5A1_UNORM: { auto color = readR5G5B5A1_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R4G4B4A4_UNORM: { auto color = readR4G4B4A4_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_UNORM: { auto color = readR8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_SNORM: { auto color = readR8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UNORM: { auto color = readR8G8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SNORM: { auto color = readR8G8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_FLOAT: { auto color = readR16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_SNORM: { auto color = readR16_NORM<short>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_UNORM: { auto color = readR16_NORM<ushort>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A16_FLOAT: { auto color = readA16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16_FLOAT: { auto color = readL16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16A16_FLOAT: { auto color = readL16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_FLOAT: { auto color = readR16G16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_SNORM: { auto color = readR16G16_NORM<short>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_UNORM: { auto color = readR16G16_NORM<ushort>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_FLOAT: { auto color = readR16G16B16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_SNORM: { auto color = readR16G16B16_NORM<short>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_UNORM: { auto color = readR16G16B16_NORM<ushort>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = readR16G16B16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_SNORM: { auto color = readR16G16B16A16_NORM<short>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_UNORM: { auto color = readR16G16B16A16_NORM<ushort>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_FLOAT: { auto color = readR32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A32_FLOAT: { auto color = readA32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32_FLOAT: { auto color = readL32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32A32_FLOAT: { auto color = readL32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_FLOAT: { auto color = readR32G32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_FLOAT: { auto color = readR32G32B32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = readR32G32B32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; 2945 } 2946 2947 2948} 2949 2950kernel void readFromBufferToIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<int, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 2951{ 2952 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; } 2953# 1400 ".\\copy_buffer.metal" 2954 uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2955 2956 switch (kCopyFormatType) 2957 { 2958 case FormatID::R8_SINT: { auto color = readR8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SINT: { auto color = readR8G8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SINT: { auto color = readR8G8B8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SINT: { auto color = readR8G8B8A8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_SINT: { auto color = readR16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_SINT: { auto color = readR16G16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_SINT: { auto color = readR16G16B16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_SINT: { auto color = readR16G16B16A16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_SINT: { auto color = readR32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_SINT: { auto color = readR32G32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_SINT: { auto color = readR32G32B32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_SINT: { auto color = readR32G32B32A32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; 2959 } 2960 2961 2962} 2963 2964kernel void readFromBufferToUIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<uint, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) 2965{ 2966 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; } 2967# 1428 ".\\copy_buffer.metal" 2968 uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2969 2970 switch (kCopyFormatType) 2971 { 2972 case FormatID::R8_UINT: { auto color = readR8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UINT: { auto color = readR8G8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UINT: { auto color = readR8G8B8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UINT: { auto color = readR8G8B8A8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_UINT: { auto color = readR16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_UINT: { auto color = readR16G16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_UINT: { auto color = readR16G16B16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_UINT: { auto color = readR16G16B16A16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_UINT: { auto color = readR32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_UINT: { auto color = readR32G32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_UINT: { auto color = readR32G32B32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_UINT: { auto color = readR32G32B32A32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; 2973 } 2974 2975 2976} 2977 2978 2979kernel void writeFromFloatTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<float, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<float, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]]) 2980{ 2981 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; } 2982# 1481 ".\\copy_buffer.metal" 2983 uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2984 2985 switch (kCopyFormatType) 2986 { 2987 case FormatID::R5G6B5_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G6B5_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R5G5B5A1_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G5B5A1_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R4G4B4A4_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR4G4B4A4_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_NORM<short>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_NORM<ushort>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_NORM<short>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_NORM<ushort>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_NORM<short>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_NORM<ushort>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; 2988 } 2989 2990 2991} 2992 2993kernel void writeFromIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<int, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<int, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]]) 2994{ 2995 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; } 2996# 1506 ".\\copy_buffer.metal" 2997 uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 2998 2999 switch (kCopyFormatType) 3000 { 3001 case FormatID::R8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_SINT(gIndices, options, bufferOffset, color, buffer); } break; 3002 } 3003 3004 3005} 3006 3007kernel void writeFromUIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<uint, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<uint, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]]) 3008{ 3009 if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; } 3010# 1531 ".\\copy_buffer.metal" 3011 uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); 3012 3013 switch (kCopyFormatType) 3014 { 3015 case FormatID::R8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_UINT(gIndices, options, bufferOffset, color, buffer); } break; 3016 } 3017 3018 3019} 3020 3021 3022struct CopyVertexParams 3023{ 3024 uint srcBufferStartOffset; 3025 uint srcStride; 3026 uint srcComponentBytes; 3027 uint srcComponents; 3028 3029 3030 3031 uchar4 srcDefaultAlphaData; 3032 3033 uint dstBufferStartOffset; 3034 uint dstStride; 3035 uint dstComponents; 3036 3037 uint vertexCount; 3038}; 3039# 1581 ".\\copy_buffer.metal" 3040template <typename IntType> 3041static inline void writeFloatVertex(constant CopyVertexParams &options, 3042 uint idx, 3043 vec<IntType, 4> data, 3044 device uchar *dst) 3045{ 3046 uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset; 3047 3048 for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4) 3049 { 3050 floatToBytes(static_cast<float>(data[component]), dstOffset, dst); 3051 } 3052} 3053 3054template <> 3055inline void writeFloatVertex(constant CopyVertexParams &options, 3056 uint idx, 3057 vec<float, 4> data, 3058 device uchar *dst) 3059{ 3060 uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset; 3061 3062 for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4) 3063 { 3064 floatToBytes(data[component], dstOffset, dst); 3065 } 3066} 3067 3068 3069static inline void convertToFloatVertexFormat(uint index, 3070 constant CopyVertexParams &options, 3071 constant uchar *srcBuffer, 3072 device uchar *dstBuffer) 3073{ 3074# 1627 ".\\copy_buffer.metal" 3075 uint bufferOffset = options.srcBufferStartOffset + options.srcStride * index; 3076# 1636 ".\\copy_buffer.metal" 3077 switch (kCopyFormatType) 3078 { 3079 case FormatID::R8_UNORM: { auto data = readR8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SNORM: { auto data = readR8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_UINT: { auto data = readR8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SINT: { auto data = readR8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_USCALED: { auto data = readR8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SSCALED: { auto data = readR8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UNORM: { auto data = readR8G8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SNORM: { auto data = readR8G8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UINT: { auto data = readR8G8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SINT: { auto data = readR8G8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_USCALED: { auto data = readR8G8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SSCALED: { auto data = readR8G8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UNORM: { auto data = readR8G8B8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SNORM: { auto data = readR8G8B8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UINT: { auto data = readR8G8B8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SINT: { auto data = readR8G8B8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_USCALED: { auto data = readR8G8B8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SSCALED: { auto data = readR8G8B8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UNORM: { auto data = readR8G8B8A8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SNORM: { auto data = readR8G8B8A8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UINT: { auto data = readR8G8B8A8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SINT: { auto data = readR8G8B8A8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_USCALED: { auto data = readR8G8B8A8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SSCALED: { auto data = readR8G8B8A8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UNORM: { auto data = readR16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SNORM: { auto data = readR16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UINT: { auto data = readR16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SINT: { auto data = readR16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_USCALED: { auto data = readR16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SSCALED: { auto data = readR16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UNORM: { auto data = readR16G16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SNORM: { auto data = readR16G16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UINT: { auto data = readR16G16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SINT: { auto data = readR16G16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_USCALED: { auto data = readR16G16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SSCALED: { auto data = readR16G16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UNORM: { auto data = readR16G16B16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SNORM: { auto data = readR16G16B16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UINT: { auto data = readR16G16B16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SINT: { auto data = readR16G16B16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_USCALED: { auto data = readR16G16B16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SSCALED: { auto data = readR16G16B16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UNORM: { auto data = readR16G16B16A16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SNORM: { auto data = readR16G16B16A16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UINT: { auto data = readR16G16B16A16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SINT: { auto data = readR16G16B16A16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_USCALED: { auto data = readR16G16B16A16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SSCALED: { auto data = readR16G16B16A16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UNORM: { auto data = readR32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SNORM: { auto data = readR32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UINT: { auto data = readR32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SINT: { auto data = readR32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_USCALED: { auto data = readR32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SSCALED: { auto data = readR32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UNORM: { auto data = readR32G32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SNORM: { auto data = readR32G32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UINT: { auto data = readR32G32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SINT: { auto data = readR32G32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_USCALED: { auto data = readR32G32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SSCALED: { auto data = readR32G32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UNORM: { auto data = readR32G32B32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SNORM: { auto data = readR32G32B32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UINT: { auto data = readR32G32B32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SINT: { auto data = readR32G32B32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_USCALED: { auto data = readR32G32B32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SSCALED: { auto data = readR32G32B32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UNORM: { auto data = readR32G32B32A32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SNORM: { auto data = readR32G32B32A32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UINT: { auto data = readR32G32B32A32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SINT: { auto data = readR32G32B32A32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_USCALED: { auto data = readR32G32B32A32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SSCALED: { auto data = readR32G32B32A32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_FLOAT: { auto data = readR16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_FLOAT: { auto data = readR16G16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_FLOAT: { auto data = readR16G16B16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto data = readR16G16B16A16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FLOAT: { auto data = readR32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FLOAT: { auto data = readR32G32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FLOAT: { auto data = readR32G32B32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto data = readR32G32B32A32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FIXED: { auto data = readR32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FIXED: { auto data = readR32G32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FIXED: { auto data = readR32G32B32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FIXED: { auto data = readR32G32B32A32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SINT: { auto data = readR10G10B10A2_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_UINT: { auto data = readR10G10B10A2_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SSCALED: { auto data = readR10G10B10A2_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_USCALED: { auto data = readR10G10B10A2_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; 3080 } 3081 3082 3083} 3084 3085 3086kernel void convertToFloatVertexFormatCS(uint index [[thread_position_in_grid]], 3087 constant CopyVertexParams &options [[buffer(0)]], 3088 constant uchar *srcBuffer [[buffer(1)]], 3089 device uchar *dstBuffer [[buffer(2)]]) 3090{ 3091 if (index >= options.vertexCount) { return; }; 3092 convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer); 3093} 3094 3095 3096vertex void convertToFloatVertexFormatVS(uint index [[vertex_id]], 3097 constant CopyVertexParams &options [[buffer(0)]], 3098 constant uchar *srcBuffer [[buffer(1)]], 3099 device uchar *dstBuffer [[buffer(2)]]) 3100{ 3101 convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer); 3102} 3103 3104 3105static inline void expandVertexFormatComponents(uint index, 3106 constant CopyVertexParams &options, 3107 constant uchar *srcBuffer, 3108 device uchar *dstBuffer) 3109{ 3110 uint srcOffset = options.srcBufferStartOffset + options.srcStride * index; 3111 uint dstOffset = options.dstBufferStartOffset + options.dstStride * index; 3112 3113 uint dstComponentsBeforeAlpha = min(options.dstComponents, 3u); 3114 uint component; 3115 for (component = 0; component < options.srcComponents; ++component, 3116 srcOffset += options.srcComponentBytes, dstOffset += options.srcComponentBytes) 3117 { 3118 for (uint byte = 0; byte < options.srcComponentBytes; ++byte) 3119 { 3120 dstBuffer[dstOffset + byte] = srcBuffer[srcOffset + byte]; 3121 } 3122 } 3123 3124 for (; component < dstComponentsBeforeAlpha; 3125 ++component, dstOffset += options.srcComponentBytes) 3126 { 3127 for (uint byte = 0; byte < options.srcComponentBytes; ++byte) 3128 { 3129 dstBuffer[dstOffset + byte] = 0; 3130 } 3131 } 3132 3133 if (component < options.dstComponents) 3134 { 3135 3136 for (uint byte = 0; byte < options.srcComponentBytes; ++byte) 3137 { 3138 dstBuffer[dstOffset + byte] = options.srcDefaultAlphaData[byte]; 3139 } 3140 } 3141} 3142 3143 3144kernel void expandVertexFormatComponentsCS(uint index [[thread_position_in_grid]], 3145 constant CopyVertexParams &options [[buffer(0)]], 3146 constant uchar *srcBuffer [[buffer(1)]], 3147 device uchar *dstBuffer [[buffer(2)]]) 3148{ 3149 if (index >= options.vertexCount) { return; }; 3150 3151 expandVertexFormatComponents(index, options, srcBuffer, dstBuffer); 3152} 3153 3154 3155vertex void expandVertexFormatComponentsVS(uint index [[vertex_id]], 3156 constant CopyVertexParams &options [[buffer(0)]], 3157 constant uchar *srcBuffer [[buffer(1)]], 3158 device uchar *dstBuffer [[buffer(2)]]) 3159{ 3160 expandVertexFormatComponents(index, options, srcBuffer, dstBuffer); 3161} 3162 3163 3164kernel void linearizeBlocks(ushort2 position [[thread_position_in_grid]], 3165 constant uint2 *dimensions [[buffer(0)]], 3166 constant uint2 *srcBuffer [[buffer(1)]], 3167 device uint2 *dstBuffer [[buffer(2)]]) 3168{ 3169 if (any(uint2(position) >= *dimensions)) 3170 { 3171 return; 3172 } 3173 uint2 t = uint2(position); 3174 t = (t | (t << 8)) & 0x00FF00FF; 3175 t = (t | (t << 4)) & 0x0F0F0F0F; 3176 t = (t | (t << 2)) & 0x33333333; 3177 t = (t | (t << 1)) & 0x55555555; 3178 dstBuffer[position.y * (*dimensions).x + position.x] = srcBuffer[(t.x << 1) | t.y]; 3179} 3180 3181 3182kernel void saturateDepth(uint2 position [[thread_position_in_grid]], 3183 constant uint3 *dimensions [[buffer(0)]], 3184 device float *srcBuffer [[buffer(1)]], 3185 device float *dstBuffer [[buffer(2)]]) 3186{ 3187 if (any(position >= (*dimensions).xy)) 3188 { 3189 return; 3190 } 3191 const uint srcOffset = position.y * (*dimensions).z + position.x; 3192 const uint dstOffset = position.y * (*dimensions).x + position.x; 3193 dstBuffer[dstOffset] = saturate(srcBuffer[srcOffset]); 3194} 3195# 6 "temp_master_source.metal" 2 3196# 1 ".\\visibility.metal" 1 3197 3198 3199 3200 3201 3202 3203 3204 3205constant bool kCombineWithExistingResult [[function_constant(1000)]]; 3206 3207 3208 3209struct CombineVisibilityResultOptions 3210{ 3211 3212 uint startOffset; 3213 3214 uint numOffsets; 3215}; 3216 3217kernel void combineVisibilityResult(uint idx [[thread_position_in_grid]], 3218 constant CombineVisibilityResultOptions &options [[buffer(0)]], 3219 constant ushort4 *renderpassVisibilityResult [[buffer(1)]], 3220 device ushort4 *finalResults [[buffer(2)]]) 3221{ 3222 if (idx > 0) 3223 { 3224 3225 3226 3227 return; 3228 } 3229 ushort4 finalResult16x4; 3230 3231 if (kCombineWithExistingResult) 3232 { 3233 finalResult16x4 = finalResults[0]; 3234 } 3235 else 3236 { 3237 finalResult16x4 = ushort4(0, 0, 0, 0); 3238 } 3239 3240 for (uint i = 0; i < options.numOffsets; ++i) 3241 { 3242 uint offset = options.startOffset + i; 3243 ushort4 renderpassResult = renderpassVisibilityResult[offset]; 3244 3245 3246 finalResult16x4 = finalResult16x4 | renderpassResult; 3247 } 3248 finalResults[0] = finalResult16x4; 3249} 3250# 7 "temp_master_source.metal" 2 3251# 1 ".\\rewrite_indices.metal" 1 3252# 11 ".\\rewrite_indices.metal" 3253# 1 ".\\rewrite_indices_shared.h" 1 3254# 12 ".\\rewrite_indices.metal" 2 3255using namespace metal; 3256 3257constant uint fixIndexBufferKey [[ function_constant(2000) ]]; 3258constant bool indexBufferIsUint16 = (((fixIndexBufferKey >> 0U) & 0x03U) == 2U); 3259constant bool indexBufferIsUint32 = (((fixIndexBufferKey >> 0U) & 0x03U) == 3U); 3260constant bool outIndexBufferIsUint16 = (((fixIndexBufferKey >> 2U) & 0x03U) == 2U); 3261constant bool outIndexBufferIsUint32 = (((fixIndexBufferKey >> 2U) & 0x03U) == 3U); 3262constant bool doPrimRestart = (fixIndexBufferKey & 0x00100U); 3263constant uint fixIndexBufferMode = (fixIndexBufferKey >> 4U) & 0x0FU; 3264 3265 3266static inline uint readIdx( 3267 const device ushort *indexBufferUint16, 3268 const device uint *indexBufferUint32, 3269 const uint restartIndex, 3270 const uint indexCount, 3271 uint idx, 3272 thread bool &foundRestart, 3273 thread uint &indexThatRestartedFirst 3274 ) 3275{ 3276 uint inIndex = idx; 3277 if(inIndex < indexCount) 3278 { 3279 if(indexBufferIsUint16) 3280 { 3281 inIndex = indexBufferUint16[inIndex]; 3282 } 3283 else if(indexBufferIsUint32) 3284 { 3285 inIndex = indexBufferUint32[inIndex]; 3286 } 3287 } 3288 else 3289 { 3290 foundRestart = true; 3291 indexThatRestartedFirst = idx; 3292 } 3293 if(doPrimRestart && !foundRestart && inIndex == restartIndex) 3294 { 3295 foundRestart = true; 3296 indexThatRestartedFirst = idx; 3297 } 3298 return inIndex; 3299} 3300 3301static inline void outputPrimitive( 3302 const device ushort *indexBufferUint16, 3303 const device uint *indexBufferUint32, 3304 device ushort *outIndexBufferUint16, 3305 device uint *outIndexBufferUint32, 3306 const uint restartIndex, 3307 const uint indexCount, 3308 thread uint &baseIndex, 3309 uint onIndex, 3310 thread uint &onOutIndex 3311 ) 3312{ 3313 if(baseIndex > onIndex) return; 3314 bool foundRestart = false; 3315 uint indexThatRestartedFirst = 0; 3316# 86 ".\\rewrite_indices.metal" 3317 switch(fixIndexBufferMode) 3318 { 3319 case 0x00U: 3320 { 3321 auto tmpIndex = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex, foundRestart, indexThatRestartedFirst); 3322 if(foundRestart) 3323 { 3324 baseIndex = indexThatRestartedFirst + 1; 3325 return; 3326 } 3327 3328 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex; } onOutIndex++; }); 3329 } 3330 break; 3331 case 0x01U: 3332 { 3333 auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst); 3334 auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst); 3335 if(foundRestart) 3336 { 3337 baseIndex = indexThatRestartedFirst + 1; 3338 return; 3339 } 3340 if((onIndex - baseIndex) & 1) return; 3341 3342 if(fixIndexBufferKey & 0x00200U) 3343 { 3344 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3345 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3346 } 3347 else 3348 { 3349 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3350 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3351 } 3352 } 3353 break; 3354 case 0x03U: 3355 { 3356 auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst); 3357 auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst); 3358 if(foundRestart) 3359 { 3360 baseIndex = indexThatRestartedFirst + 1; 3361 return; 3362 } 3363 3364 if(fixIndexBufferKey & 0x00200U) 3365 { 3366 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3367 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3368 } 3369 else 3370 { 3371 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3372 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3373 } 3374 } 3375 break; 3376 case 0x04U: 3377 { 3378 auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst); 3379 auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst); 3380 auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst); 3381 if(foundRestart) 3382 { 3383 baseIndex = indexThatRestartedFirst + 1; 3384 return; 3385 } 3386 if(((onIndex - baseIndex) % 3) != 0) return; 3387 3388 if(fixIndexBufferKey & 0x00200U) 3389 { 3390 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 3391 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3392 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3393 } 3394 else 3395 { 3396 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3397 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3398 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 3399 } 3400 } 3401 break; 3402 case 0x05U: 3403 { 3404 uint isOdd = ((onIndex - baseIndex) & 1); 3405 auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0 + isOdd, foundRestart, indexThatRestartedFirst); 3406 auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1 - isOdd, foundRestart, indexThatRestartedFirst); 3407 auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst); 3408 if(foundRestart) 3409 { 3410 baseIndex = indexThatRestartedFirst + 1; 3411 return; 3412 } 3413 3414 if(fixIndexBufferKey & 0x00200U) 3415 { 3416 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 3417 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3418 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3419 } 3420 else 3421 { 3422 3423 if(isOdd) 3424 { 3425 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3426 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 3427 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3428 } 3429 else 3430 { 3431 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); 3432 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); 3433 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); 3434 } 3435 } 3436 3437 assert(onOutIndex <= (onIndex + 1) * 3); 3438 assert(onOutIndex <= (indexCount - 2) * 3); 3439 } 3440 break; 3441 3442 } 3443 3444 3445} 3446 3447kernel void fixIndexBuffer( 3448 const device ushort *indexBufferUint16 [[ buffer(0), function_constant(indexBufferIsUint16) ]], 3449 const device uint *indexBufferUint32 [[ buffer(0), function_constant(indexBufferIsUint32) ]], 3450 device ushort *outIndexBufferUint16 [[ buffer(1), function_constant(outIndexBufferIsUint16) ]], 3451 device uint *outIndexBufferUint32 [[ buffer(1), function_constant(outIndexBufferIsUint32) ]], 3452 constant uint &indexCount [[ buffer(2) ]], 3453 constant uint &primCount [[ buffer(3) ]], 3454 uint prim [[thread_position_in_grid]]) 3455{ 3456 constexpr uint restartIndex = 0xFFFFFFFF; 3457 uint baseIndex = 0; 3458 uint onIndex = onIndex; 3459 uint onOutIndex = onOutIndex; 3460 if(prim < primCount) 3461 { 3462 switch(fixIndexBufferMode) 3463 { 3464 case 0x00U: 3465 onIndex = prim; 3466 onOutIndex = prim; 3467 break; 3468 case 0x01U: 3469 onIndex = prim * 2; 3470 onOutIndex = prim * 2; 3471 break; 3472 case 0x03U: 3473 onIndex = prim; 3474 onOutIndex = prim * 2; 3475 break; 3476 case 0x04U: 3477 onIndex = prim * 3; 3478 onOutIndex = prim * 3; 3479 break; 3480 case 0x05U: 3481 onIndex = prim; 3482 onOutIndex = prim * 3; 3483 break; 3484 } 3485 outputPrimitive(indexBufferUint16, indexBufferUint32, outIndexBufferUint16, outIndexBufferUint32, restartIndex, indexCount, baseIndex, onIndex, onOutIndex); 3486 } 3487} 3488 3489 3490 3491static inline void generatePrimitive( 3492 device ushort *outIndexBufferUint16, 3493 device uint *outIndexBufferUint32, 3494 const uint firstVertex, 3495 const uint indexCount, 3496 thread uint &baseIndex, 3497 uint onIndex, 3498 uint primCount, 3499 thread uint &onOutIndex 3500 ) 3501{ 3502 if(baseIndex > onIndex) return; 3503# 284 ".\\rewrite_indices.metal" 3504 switch(fixIndexBufferMode) 3505 { 3506 case 0x00U: 3507 { 3508 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = onIndex + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = onIndex + firstVertex; } onOutIndex++; }); 3509 } 3510 break; 3511 case 0x01U: 3512 { 3513 auto tmpIndex0 = onIndex + 0; 3514 auto tmpIndex1 = onIndex + 1; 3515 if(fixIndexBufferKey & 0x00200U) 3516 { 3517 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3518 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3519 } 3520 else 3521 { 3522 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3523 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3524 } 3525 } 3526 break; 3527 case 0x02U: 3528 { 3529 auto tmpIndex0 = onIndex + 0; 3530 auto tmpIndex1 = (onIndex + 1) % primCount; 3531 if(fixIndexBufferKey & 0x00200U) 3532 { 3533 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3534 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3535 } 3536 else 3537 { 3538 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3539 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3540 } 3541 } 3542 break; 3543 case 0x03U: 3544 { 3545 auto tmpIndex0 = onIndex + 0; 3546 auto tmpIndex1 = onIndex + 1; 3547 if(fixIndexBufferKey & 0x00200U) 3548 { 3549 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3550 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3551 } 3552 else 3553 { 3554 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3555 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3556 } 3557 } 3558 break; 3559 case 0x04U: 3560 { 3561 auto tmpIndex0 = onIndex + 0; 3562 auto tmpIndex1 = onIndex + 1; 3563 auto tmpIndex2 = onIndex + 2; 3564 if(fixIndexBufferKey & 0x00200U) 3565 { 3566 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3567 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3568 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3569 } 3570 else 3571 { 3572 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3573 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3574 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3575 } 3576 } 3577 break; 3578 case 0x05U: 3579 { 3580 uint isOdd = ((onIndex - baseIndex) & 1); 3581 auto tmpIndex0 = onIndex + 0 + isOdd; 3582 auto tmpIndex1 = onIndex + 1 - isOdd; 3583 auto tmpIndex2 = onIndex + 2; 3584 if(fixIndexBufferKey & 0x00200U) 3585 { 3586 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3587 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3588 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3589 } 3590 else 3591 { 3592 if(isOdd) 3593 { 3594 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3595 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3596 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3597 } 3598 else 3599 { 3600 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3601 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3602 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3603 } 3604 } 3605 3606 assert(onOutIndex <= (onIndex + 1) * 3); 3607 assert(onOutIndex <= (indexCount - 2) * 3); 3608 break; 3609 } 3610 case 0x06U: 3611 { 3612 auto tmpIndex0 = 0; 3613 auto tmpIndex1 = onIndex + 1; 3614 auto tmpIndex2 = onIndex + 2; 3615 3616 if(fixIndexBufferKey & 0x00200U) 3617 { 3618 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3619 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3620 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3621 } 3622 else 3623 { 3624 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; }); 3625 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; }); 3626 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; }); 3627 } 3628 } 3629 break; 3630 3631 } 3632 3633} 3634 3635 3636 3637kernel void genIndexBuffer( 3638 device ushort *outIndexBufferUint16 [[ buffer(1), function_constant(outIndexBufferIsUint16) ]], 3639 device uint *outIndexBufferUint32 [[ buffer(1), function_constant(outIndexBufferIsUint32) ]], 3640 constant uint &indexCount [[ buffer(2) ]], 3641 constant uint &primCount [[ buffer(3) ]], 3642 constant uint &firstVertex [[ buffer(4) ]], 3643 uint prim [[thread_position_in_grid]]) 3644{ 3645 uint baseIndex = 0; 3646 uint onIndex = onIndex; 3647 uint onOutIndex = onOutIndex; 3648 if(prim < primCount) 3649 { 3650 switch(fixIndexBufferMode) 3651 { 3652 case 0x00U: 3653 onIndex = prim; 3654 onOutIndex = prim; 3655 break; 3656 case 0x01U: 3657 onIndex = prim * 2; 3658 onOutIndex = prim * 2; 3659 break; 3660 case 0x03U: 3661 onIndex = prim; 3662 onOutIndex = prim * 2; 3663 break; 3664 case 0x02U: 3665 onIndex = prim; 3666 onOutIndex = prim * 2; 3667 break; 3668 case 0x04U: 3669 onIndex = prim * 3; 3670 onOutIndex = prim * 3; 3671 break; 3672 case 0x05U: 3673 onIndex = prim; 3674 onOutIndex = prim * 3; 3675 break; 3676 case 0x06U: 3677 onIndex = prim; 3678 onOutIndex = prim * 3; 3679 break; 3680 } 3681 generatePrimitive(outIndexBufferUint16, outIndexBufferUint32, firstVertex, indexCount, baseIndex, onIndex, primCount, onOutIndex); 3682 } 3683} 3684# 8 "temp_master_source.metal" 2 3685 3686 3687