xref: /aosp_15_r20/external/angle/src/libANGLE/renderer/metal/shaders/mtl_internal_shaders_autogen.metal (revision 8975f5c5ed3d1c378011245431ada316dfb6f244)
1// GENERATED FILE - DO NOT EDIT.
2// Generated by gen_mtl_internal_shaders.py
3//
4// Copyright 2020 The ANGLE Project Authors. All rights reserved.
5// Use of this source code is governed by a BSD-style license that can be
6// found in the LICENSE file.
7//
8
9// Combined Metal default shaders.
10
11# 1 "temp_master_source.metal"
12# 1 "<built-in>" 1
13# 1 "<built-in>" 3
14# 435 "<built-in>" 3
15# 1 "<command line>" 1
16# 1 "<built-in>" 2
17# 1 "temp_master_source.metal" 2
18# 1 ".\\blit.metal" 1
19
20
21
22
23
24
25
26# 1 ".\\common.h" 1
27# 13 ".\\common.h"
28# include <simd/simd.h>
29# include <metal_stdlib>
30
31
32# 1 ".\\constants.h" 1
33# 11 ".\\constants.h"
34namespace rx
35{
36namespace mtl_shader
37{
38
39enum
40{
41    kTextureType2D = 0,
42    kTextureType2DMultisample = 1,
43    kTextureType2DArray = 2,
44    kTextureTypeCube = 3,
45    kTextureType3D = 4,
46    kTextureTypeCount = 5,
47};
48
49
50
51
52}
53}
54# 18 ".\\common.h" 2
55
56
57
58
59
60
61
62using namespace metal;
63
64
65
66constant uint32_t kNumColorOutputs [[function_constant(0)]];
67constant bool kColorOutputAvailable0 = kNumColorOutputs > 0;
68constant bool kColorOutputAvailable1 = kNumColorOutputs > 1;
69constant bool kColorOutputAvailable2 = kNumColorOutputs > 2;
70constant bool kColorOutputAvailable3 = kNumColorOutputs > 3;
71constant bool kColorOutputAvailable4 = kNumColorOutputs > 4;
72constant bool kColorOutputAvailable5 = kNumColorOutputs > 5;
73constant bool kColorOutputAvailable6 = kNumColorOutputs > 6;
74constant bool kColorOutputAvailable7 = kNumColorOutputs > 7;
75
76namespace rx
77{
78namespace mtl_shader
79{
80
81
82constant float2 gCorners[3] = {float2(-1.0f, -1.0f), float2(3.0f, -1.0f), float2(-1.0f, 3.0f)};
83
84template <typename T>
85struct MultipleColorOutputs
86{
87    vec<T, 4> color0 [[color(0), function_constant(kColorOutputAvailable0)]];
88    vec<T, 4> color1 [[color(1), function_constant(kColorOutputAvailable1)]];
89    vec<T, 4> color2 [[color(2), function_constant(kColorOutputAvailable2)]];
90    vec<T, 4> color3 [[color(3), function_constant(kColorOutputAvailable3)]];
91    vec<T, 4> color4 [[color(4), function_constant(kColorOutputAvailable4)]];
92    vec<T, 4> color5 [[color(5), function_constant(kColorOutputAvailable5)]];
93    vec<T, 4> color6 [[color(6), function_constant(kColorOutputAvailable6)]];
94    vec<T, 4> color7 [[color(7), function_constant(kColorOutputAvailable7)]];
95};
96# 69 ".\\common.h"
97template <typename T>
98static inline MultipleColorOutputs<T> toMultipleColorOutputs(vec<T, 4> color)
99{
100    MultipleColorOutputs<T> re;
101
102    do { if (kColorOutputAvailable0) { re.color0 = color; } } while (0);
103    do { if (kColorOutputAvailable1) { re.color1 = color; } } while (0);
104    do { if (kColorOutputAvailable2) { re.color2 = color; } } while (0);
105    do { if (kColorOutputAvailable3) { re.color3 = color; } } while (0);
106    do { if (kColorOutputAvailable4) { re.color4 = color; } } while (0);
107    do { if (kColorOutputAvailable5) { re.color5 = color; } } while (0);
108    do { if (kColorOutputAvailable6) { re.color6 = color; } } while (0);
109    do { if (kColorOutputAvailable7) { re.color7 = color; } } while (0);
110
111    return re;
112}
113
114static inline float3 cubeTexcoords(float2 texcoords, int face)
115{
116    texcoords = 2.0 * texcoords - 1.0;
117    switch (face)
118    {
119        case 0:
120            return float3(1.0, -texcoords.y, -texcoords.x);
121        case 1:
122            return float3(-1.0, -texcoords.y, texcoords.x);
123        case 2:
124            return float3(texcoords.x, 1.0, texcoords.y);
125        case 3:
126            return float3(texcoords.x, -1.0, -texcoords.y);
127        case 4:
128            return float3(texcoords.x, -texcoords.y, 1.0);
129        case 5:
130            return float3(-texcoords.x, -texcoords.y, -1.0);
131    }
132    return float3(texcoords, 0);
133}
134
135template <typename T>
136static inline vec<T, 4> resolveTextureMS(texture2d_ms<T> srcTexture, uint2 coords)
137{
138    uint samples = srcTexture.get_num_samples();
139
140    vec<T, 4> output(0);
141
142    for (uint sample = 0; sample < samples; ++sample)
143    {
144        output += srcTexture.read(coords, sample);
145    }
146
147    output = output / samples;
148
149    return output;
150}
151
152static inline float4 sRGBtoLinear(float4 color)
153{
154    float3 linear1 = color.rgb / 12.92;
155    float3 linear2 = powr((color.rgb + float3(0.055)) / 1.055, 2.4);
156    float3 factor = float3(color.rgb <= float3(0.04045));
157    float4 linear = float4(factor * linear1 + float3(1.0 - factor) * linear2, color.a);
158
159    return linear;
160}
161
162static inline float linearToSRGB(float color)
163{
164    if (color <= 0.0f)
165        return 0.0f;
166    if (color < 0.0031308f)
167        return 12.92f * color;
168    if (color < 1.0f)
169        return 1.055f * powr(color, 0.41666f) - 0.055f;
170    return 1.0f;
171}
172
173static inline float4 linearToSRGB(float4 color)
174{
175    return float4(linearToSRGB(color.r), linearToSRGB(color.g), linearToSRGB(color.b), color.a);
176}
177
178template <typename Short>
179static inline Short bytesToShort(constant uchar *input, uint offset)
180{
181    Short inputLo = input[offset];
182    Short inputHi = input[offset + 1];
183
184    return inputLo | (inputHi << 8);
185}
186
187template <typename Int>
188static inline Int bytesToInt(constant uchar *input, uint offset)
189{
190    Int input0 = input[offset];
191    Int input1 = input[offset + 1];
192    Int input2 = input[offset + 2];
193    Int input3 = input[offset + 3];
194
195    return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24);
196}
197
198template <typename Short>
199static inline void shortToBytes(Short val, uint offset, device uchar *output)
200{
201    ushort valUnsigned = as_type<ushort>(val);
202    output[offset] = valUnsigned & 0xff;
203    output[offset + 1] = (valUnsigned >> 8) & 0xff;
204}
205
206template <typename Int>
207static inline void intToBytes(Int val, uint offset, device uchar *output)
208{
209    uint valUnsigned = as_type<uint>(val);
210    output[offset] = valUnsigned & 0xff;
211    output[offset + 1] = (valUnsigned >> 8) & 0xff;
212    output[offset + 2] = (valUnsigned >> 16) & 0xff;
213    output[offset + 3] = (valUnsigned >> 24) & 0xff;
214}
215
216static inline void floatToBytes(float val, uint offset, device uchar *output)
217{
218    intToBytes(as_type<uint>(val), offset, output);
219}
220
221static inline void int24bitToBytes(uint val, uint offset, device uchar *output)
222{
223    output[offset] = val & 0xff;
224    output[offset + 1] = (val >> 8) & 0xff;
225    output[offset + 2] = (val >> 16) & 0xff;
226}
227
228template <unsigned int inputBitCount, unsigned int inputBitStart, typename T>
229static inline T getShiftedData(T input)
230{
231    static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8),
232                  "T must have at least as many bits as inputBitCount + inputBitStart.");
233    const T mask = (1 << inputBitCount) - 1;
234    return (input >> inputBitStart) & mask;
235}
236
237template <unsigned int inputBitCount, unsigned int inputBitStart, typename T>
238static inline T shiftData(T input)
239{
240    static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8),
241                  "T must have at least as many bits as inputBitCount + inputBitStart.");
242    const T mask = (1 << inputBitCount) - 1;
243    return (input & mask) << inputBitStart;
244}
245
246template <unsigned int inputBitCount, typename T>
247static inline float normalizedToFloat(T input)
248{
249    static_assert(inputBitCount <= (sizeof(T) * 8),
250                  "T must have more bits than or same bits as inputBitCount.");
251    static_assert(inputBitCount <= 23, "Only single precision is supported");
252
253    constexpr float inverseMax = 1.0f / ((1 << inputBitCount) - 1);
254    return input * inverseMax;
255}
256
257template <typename T>
258static inline float normalizedToFloat(T input)
259{
260    return normalizedToFloat<sizeof(T) * 8, T>(input);
261}
262
263template <>
264inline float normalizedToFloat(short input)
265{
266    constexpr float inverseMax = 1.0f / 0x7fff;
267    return static_cast<float>(input) * inverseMax;
268}
269
270template <>
271inline float normalizedToFloat(int input)
272{
273    constexpr float inverseMax = 1.0f / 0x7fffffff;
274    return static_cast<float>(input) * inverseMax;
275}
276
277template <>
278inline float normalizedToFloat(uint input)
279{
280    constexpr float inverseMax = 1.0f / 0xffffffff;
281    return static_cast<float>(input) * inverseMax;
282}
283
284template <unsigned int outputBitCount, typename T>
285static inline T floatToNormalized(float input)
286{
287    static_assert(outputBitCount <= (sizeof(T) * 8),
288                  "T must have more bits than or same bits as inputBitCount.");
289    static_assert(outputBitCount > (metal::is_unsigned<T>::value ? 0 : 1),
290                  "outputBitCount must be at least 1 not counting the sign bit.");
291    constexpr unsigned int bits =
292        metal::is_unsigned<T>::value ? outputBitCount : outputBitCount - 1;
293    static_assert(bits <= 23, "Only single precision is supported");
294
295    return static_cast<T>(metal::round(((1 << bits) - 1) * input));
296}
297
298template <typename T>
299static inline T floatToNormalized(float input)
300{
301    return floatToNormalized<sizeof(T) * 8, T>(input);
302}
303
304}
305}
306# 9 ".\\blit.metal" 2
307
308using namespace rx::mtl_shader;
309
310
311constant bool kPremultiplyAlpha [[function_constant(1)]];
312constant bool kUnmultiplyAlpha [[function_constant(2)]];
313constant bool kTransformLinearToSrgb [[function_constant(3)]];
314constant int kSourceTextureType [[function_constant(4)]];
315constant int kSourceTexture2Type [[function_constant(5)]];
316
317constant bool kSourceTextureType2D = kSourceTextureType == kTextureType2D;
318constant bool kSourceTextureType2DArray = kSourceTextureType == kTextureType2DArray;
319constant bool kSourceTextureType2DMS = kSourceTextureType == kTextureType2DMultisample;
320constant bool kSourceTextureTypeCube = kSourceTextureType == kTextureTypeCube;
321constant bool kSourceTextureType3D = kSourceTextureType == kTextureType3D;
322
323constant bool kSourceTexture2Type2D = kSourceTexture2Type == kTextureType2D;
324constant bool kSourceTexture2Type2DArray = kSourceTexture2Type == kTextureType2DArray;
325constant bool kSourceTexture2Type2DMS = kSourceTexture2Type == kTextureType2DMultisample;
326constant bool kSourceTexture2TypeCube = kSourceTexture2Type == kTextureTypeCube;
327
328struct BlitParams
329{
330
331    float4 srcTexCoords;
332    int srcLevel;
333    int srcLayer;
334    bool dstLuminance;
335    uint8_t padding[7];
336};
337
338struct BlitVSOut
339{
340    float4 position [[position]];
341    float2 texCoords [[center_no_perspective, user(locn1)]];
342};
343
344vertex BlitVSOut blitVS(unsigned int vid [[vertex_id]], constant BlitParams &options [[buffer(0)]])
345{
346    BlitVSOut output;
347    output.position.xy = select(float2(-1.0f), float2(1.0f), bool2(vid & uint2(2, 1)));
348    output.position.zw = float2(0.0, 1.0);
349    output.texCoords = select(options.srcTexCoords.xy, options.srcTexCoords.zw, bool2(vid & uint2(2, 1)));
350
351    return output;
352}
353
354template <typename SrcTexture2d>
355static uint2 getImageCoords(SrcTexture2d srcTexture, float2 texCoords)
356{
357    uint2 dimens(srcTexture.get_width(), srcTexture.get_height());
358    uint2 coords = uint2(texCoords * float2(dimens));
359
360    return coords;
361}
362
363template <typename T>
364static inline vec<T, 4> blitSampleTextureMS(texture2d_ms<T> srcTexture, float2 texCoords)
365{
366    uint2 coords = getImageCoords(srcTexture, texCoords);
367    return resolveTextureMS(srcTexture, coords);
368}
369
370template <typename T>
371static inline vec<T, 4> blitSampleTexture3D(texture3d<T> srcTexture,
372                                            sampler textureSampler,
373                                            float2 texCoords,
374                                            constant BlitParams &options)
375{
376    uint depth = srcTexture.get_depth(options.srcLevel);
377    float zCoord = (float(options.srcLayer) + 0.5) / float(depth);
378
379    return srcTexture.sample(textureSampler, float3(texCoords, zCoord), level(options.srcLevel));
380}
381# 101 ".\\blit.metal"
382template <typename T>
383static inline vec<T, 4> blitReadTexture(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
384{
385    vec<T, 4> output;
386
387    switch (kSourceTextureType)
388    {
389        case kTextureType2D:
390            output = srcTexture2d.sample(textureSampler, input.texCoords, level(options.srcLevel));
391            break;
392        case kTextureType2DArray:
393            output = srcTexture2dArray.sample(textureSampler, input.texCoords, options.srcLayer,
394                                              level(options.srcLevel));
395            break;
396        case kTextureType2DMultisample:
397            output = blitSampleTextureMS(srcTexture2dMS, input.texCoords);
398            break;
399        case kTextureTypeCube:
400            output = srcTextureCube.sample(textureSampler,
401                                           cubeTexcoords(input.texCoords, options.srcLayer),
402                                           level(options.srcLevel));
403            break;
404        case kTextureType3D:
405            output = blitSampleTexture3D(srcTexture3d, textureSampler, input.texCoords, options);
406            break;
407    }
408
409    if (kTransformLinearToSrgb) {
410        output.x = linearToSRGB(output.x);
411        output.y = linearToSRGB(output.y);
412        output.z = linearToSRGB(output.z);
413    }
414    if (kUnmultiplyAlpha)
415    {
416        if (output.a != 0.0)
417        {
418            output.xyz /= output.a;
419        }
420    }
421    if (kPremultiplyAlpha)
422    {
423        output.xyz *= output.a;
424    }
425
426    if (options.dstLuminance)
427    {
428        output.g = output.b = output.r;
429    }
430
431    return output;
432}
433
434template <typename T>
435static inline MultipleColorOutputs<T> blitFS(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
436{
437    vec<T, 4> output = blitReadTexture(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
438
439    return toMultipleColorOutputs(output);
440}
441
442fragment MultipleColorOutputs<float> blitFloatFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
443{
444    return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
445}
446fragment MultipleColorOutputs<int> blitIntFS(BlitVSOut input [[stage_in]], texture2d<int> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<int> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<int> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<int> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<int> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
447{
448    return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
449}
450fragment MultipleColorOutputs<uint> blitUIntFS(BlitVSOut input [[stage_in]], texture2d<uint> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<uint> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<uint> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<uint> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<uint> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
451{
452    return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
453}
454
455fragment MultipleColorOutputs<uint> copyTextureFloatToUIntFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
456{
457    float4 inputColor = blitReadTexture<>(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
458    uint4 output = uint4(inputColor * float4(255.0));
459
460    return toMultipleColorOutputs(output);
461}
462
463
464struct FragmentDepthOut
465{
466    float depth [[depth(any)]];
467};
468
469static inline float sampleDepth(
470    texture2d<float> srcTexture2d [[function_constant(kSourceTextureType2D)]],
471    texture2d_array<float> srcTexture2dArray [[function_constant(kSourceTextureType2DArray)]],
472    texture2d_ms<float> srcTexture2dMS [[function_constant(kSourceTextureType2DMS)]],
473    texturecube<float> srcTextureCube [[function_constant(kSourceTextureTypeCube)]],
474    float2 texCoords,
475    constant BlitParams &options)
476{
477    float4 output;
478
479    constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest);
480
481    switch (kSourceTextureType)
482    {
483        case kTextureType2D:
484            output = srcTexture2d.sample(textureSampler, texCoords, level(options.srcLevel));
485            break;
486        case kTextureType2DArray:
487            output = srcTexture2dArray.sample(textureSampler, texCoords, options.srcLayer,
488                                              level(options.srcLevel));
489            break;
490        case kTextureType2DMultisample:
491
492            output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0);
493            break;
494        case kTextureTypeCube:
495            output =
496                srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, options.srcLayer),
497                                      level(options.srcLevel));
498            break;
499    }
500
501    return output.r;
502}
503
504fragment FragmentDepthOut blitDepthFS(BlitVSOut input [[stage_in]],
505                                      texture2d<float> srcTexture2d
506                                      [[texture(0), function_constant(kSourceTextureType2D)]],
507                                      texture2d_array<float> srcTexture2dArray
508                                      [[texture(0), function_constant(kSourceTextureType2DArray)]],
509                                      texture2d_ms<float> srcTexture2dMS
510                                      [[texture(0), function_constant(kSourceTextureType2DMS)]],
511                                      texturecube<float> srcTextureCube
512                                      [[texture(0), function_constant(kSourceTextureTypeCube)]],
513                                      constant BlitParams &options [[buffer(0)]])
514{
515    FragmentDepthOut re;
516
517    re.depth = sampleDepth(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube,
518                           input.texCoords, options);
519
520    return re;
521}
522
523static inline uint32_t sampleStencil(
524    texture2d<uint32_t> srcTexture2d [[function_constant(kSourceTexture2Type2D)]],
525    texture2d_array<uint32_t> srcTexture2dArray [[function_constant(kSourceTexture2Type2DArray)]],
526    texture2d_ms<uint32_t> srcTexture2dMS [[function_constant(kSourceTexture2Type2DMS)]],
527    texturecube<uint32_t> srcTextureCube [[function_constant(kSourceTexture2TypeCube)]],
528    float2 texCoords,
529    int srcLevel,
530    int srcLayer)
531{
532    uint4 output;
533    constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest);
534
535    switch (kSourceTexture2Type)
536    {
537        case kTextureType2D:
538            output = srcTexture2d.sample(textureSampler, texCoords, level(srcLevel));
539            break;
540        case kTextureType2DArray:
541            output = srcTexture2dArray.sample(textureSampler, texCoords, srcLayer, level(srcLevel));
542            break;
543        case kTextureType2DMultisample:
544
545            output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0);
546            break;
547        case kTextureTypeCube:
548            output = srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, srcLayer),
549                                           level(srcLevel));
550            break;
551    }
552
553    return output.r;
554}
555
556
557struct BlitStencilToBufferParams
558{
559    float2 srcStartTexCoords;
560    float2 srcTexCoordSteps;
561    int srcLevel;
562    int srcLayer;
563
564    uint2 dstSize;
565    uint dstBufferRowPitch;
566
567    bool resolveMS;
568};
569
570kernel void blitStencilToBufferCS(ushort2 gIndices [[thread_position_in_grid]],
571                                  texture2d<uint32_t> srcTexture2d
572                                  [[texture(1), function_constant(kSourceTexture2Type2D)]],
573                                  texture2d_array<uint32_t> srcTexture2dArray
574                                  [[texture(1), function_constant(kSourceTexture2Type2DArray)]],
575                                  texture2d_ms<uint32_t> srcTexture2dMS
576                                  [[texture(1), function_constant(kSourceTexture2Type2DMS)]],
577                                  texturecube<uint32_t> srcTextureCube
578                                  [[texture(1), function_constant(kSourceTexture2TypeCube)]],
579                                  constant BlitStencilToBufferParams &options [[buffer(0)]],
580                                  device uchar *buffer [[buffer(1)]])
581{
582    if (gIndices.x >= options.dstSize.x || gIndices.y >= options.dstSize.y)
583    {
584        return;
585    }
586
587    float2 srcTexCoords = options.srcStartTexCoords + float2(gIndices) * options.srcTexCoordSteps;
588
589    if (kSourceTexture2Type == kTextureType2DMultisample && !options.resolveMS)
590    {
591        uint samples = srcTexture2dMS.get_num_samples();
592        uint2 imageCoords = getImageCoords(srcTexture2dMS, srcTexCoords);
593        uint bufferOffset = options.dstBufferRowPitch * gIndices.y + samples * gIndices.x;
594
595        for (uint sample = 0; sample < samples; ++sample)
596        {
597            uint stencilPerSample = srcTexture2dMS.read(imageCoords, sample).r;
598            buffer[bufferOffset + sample] = static_cast<uchar>(stencilPerSample);
599        }
600    }
601    else
602    {
603        uint32_t stencil =
604            sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube,
605                          srcTexCoords, options.srcLevel, options.srcLayer);
606
607        buffer[options.dstBufferRowPitch * gIndices.y + gIndices.x] = static_cast<uchar>(stencil);
608    }
609}
610
611
612#if __METAL_VERSION__ >= 210
613
614struct FragmentStencilOut
615{
616    uint32_t stencil [[stencil]];
617};
618
619struct FragmentDepthStencilOut
620{
621    float depth [[depth(any)]];
622    uint32_t stencil [[stencil]];
623};
624
625fragment FragmentStencilOut blitStencilFS(
626    BlitVSOut input [[stage_in]],
627    texture2d<uint32_t> srcTexture2d [[texture(1), function_constant(kSourceTexture2Type2D)]],
628    texture2d_array<uint32_t> srcTexture2dArray
629    [[texture(1), function_constant(kSourceTexture2Type2DArray)]],
630    texture2d_ms<uint32_t> srcTexture2dMS
631    [[texture(1), function_constant(kSourceTexture2Type2DMS)]],
632    texturecube<uint32_t> srcTextureCube [[texture(1), function_constant(kSourceTexture2TypeCube)]],
633    constant BlitParams &options [[buffer(0)]])
634{
635    FragmentStencilOut re;
636
637    re.stencil = sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube,
638                               input.texCoords, options.srcLevel, options.srcLayer);
639
640    return re;
641}
642
643fragment FragmentDepthStencilOut blitDepthStencilFS(
644    BlitVSOut input [[stage_in]],
645
646    texture2d<float> srcDepthTexture2d [[texture(0), function_constant(kSourceTextureType2D)]],
647    texture2d_array<float> srcDepthTexture2dArray
648    [[texture(0), function_constant(kSourceTextureType2DArray)]],
649    texture2d_ms<float> srcDepthTexture2dMS
650    [[texture(0), function_constant(kSourceTextureType2DMS)]],
651    texturecube<float> srcDepthTextureCube
652    [[texture(0), function_constant(kSourceTextureTypeCube)]],
653
654
655    texture2d<uint32_t> srcStencilTexture2d
656    [[texture(1), function_constant(kSourceTexture2Type2D)]],
657    texture2d_array<uint32_t> srcStencilTexture2dArray
658    [[texture(1), function_constant(kSourceTexture2Type2DArray)]],
659    texture2d_ms<uint32_t> srcStencilTexture2dMS
660    [[texture(1), function_constant(kSourceTexture2Type2DMS)]],
661    texturecube<uint32_t> srcStencilTextureCube
662    [[texture(1), function_constant(kSourceTexture2TypeCube)]],
663
664    constant BlitParams &options [[buffer(0)]])
665{
666    FragmentDepthStencilOut re;
667
668    re.depth = sampleDepth(srcDepthTexture2d, srcDepthTexture2dArray, srcDepthTexture2dMS,
669                           srcDepthTextureCube, input.texCoords, options);
670    re.stencil =
671        sampleStencil(srcStencilTexture2d, srcStencilTexture2dArray, srcStencilTexture2dMS,
672                      srcStencilTextureCube, input.texCoords, options.srcLevel, options.srcLayer);
673    return re;
674}
675#endif
676# 2 "temp_master_source.metal" 2
677# 1 ".\\clear.metal" 1
678# 10 ".\\clear.metal"
679using namespace rx::mtl_shader;
680
681struct ClearParams
682{
683    float4 clearColor;
684    float clearDepth;
685};
686
687vertex float4 clearVS(unsigned int vid [[ vertex_id ]],
688                      constant ClearParams &clearParams [[buffer(0)]])
689{
690    return float4(gCorners[vid], clearParams.clearDepth, 1.0);
691}
692
693fragment MultipleColorOutputs<float> clearFloatFS(constant ClearParams &clearParams [[buffer(0)]])
694{
695    return toMultipleColorOutputs(clearParams.clearColor);
696}
697
698fragment MultipleColorOutputs<int> clearIntFS(constant ClearParams &clearParams [[buffer(0)]])
699{
700    return toMultipleColorOutputs(as_type<int4>(clearParams.clearColor));
701}
702
703fragment MultipleColorOutputs<uint> clearUIntFS(constant ClearParams &clearParams [[buffer(0)]])
704{
705    return toMultipleColorOutputs(as_type<uint4>(clearParams.clearColor));
706}
707# 3 "temp_master_source.metal" 2
708# 1 ".\\gen_indices.metal" 1
709
710
711
712
713
714
715
716
717using namespace rx::mtl_shader;
718
719
720constant bool kSourceBufferAligned[[function_constant(100)]];
721constant bool kSourceIndexIsU8[[function_constant(200)]];
722constant bool kSourceIndexIsU16[[function_constant(300)]];
723constant bool kSourceIndexIsU32[[function_constant(400)]];
724constant bool kSourceBufferUnaligned = !kSourceBufferAligned;
725constant bool kUseSourceBufferU8 = kSourceIndexIsU8 || kSourceBufferUnaligned;
726constant bool kUseSourceBufferU16 = kSourceIndexIsU16 && kSourceBufferAligned;
727constant bool kUseSourceBufferU32 = kSourceIndexIsU32 && kSourceBufferAligned;
728
729struct IndexConversionParams
730{
731    uint32_t srcOffset;
732    uint32_t indexCount;
733    bool primitiveRestartEnabled;
734};
735
736
737
738inline ushort getIndexAligned(constant ushort *inputAligned, uint offset, uint idx)
739{
740    return inputAligned[offset / 2 + idx];
741}
742inline uint getIndexAligned(constant uint *inputAligned, uint offset, uint idx)
743{
744    return inputAligned[offset / 4 + idx];
745}
746inline uchar getIndexAligned(constant uchar *input, uint offset, uint idx)
747{
748    return input[offset + idx];
749}
750inline ushort getIndexUnalignedU16(constant uchar *input, uint offset, uint idx)
751{
752    ushort inputLo = input[offset + 2 * idx];
753    ushort inputHi = input[offset + 2 * idx + 1];
754
755    return inputLo | (inputHi << 8);
756}
757inline uint getIndexUnalignedU32(constant uchar *input, uint offset, uint idx)
758{
759    uint input0 = input[offset + 4 * idx];
760    uint input1 = input[offset + 4 * idx + 1];
761    uint input2 = input[offset + 4 * idx + 2];
762    uint input3 = input[offset + 4 * idx + 3];
763
764    return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24);
765}
766
767kernel void convertIndexU8ToU16(uint idx [[thread_position_in_grid]],
768                                constant IndexConversionParams &options [[buffer(0)]],
769                                constant uchar *input [[buffer(1)]],
770                                device ushort *output [[buffer(2)]])
771{
772    if (idx >= options.indexCount) { return; };
773
774    uchar value = getIndexAligned(input, options.srcOffset, idx);
775
776    if (options.primitiveRestartEnabled && value == 0xff)
777    {
778        output[idx] = 0xffff;
779    }
780    else
781    {
782        output[idx] = value;
783    }
784}
785
786kernel void convertIndexU16(uint idx [[thread_position_in_grid]],
787                            constant IndexConversionParams &options [[buffer(0)]],
788                            constant uchar *input
789                            [[buffer(1), function_constant(kSourceBufferUnaligned)]],
790                            constant ushort *inputAligned
791                            [[buffer(1), function_constant(kSourceBufferAligned)]],
792                            device ushort *output [[buffer(2)]])
793{
794    if (idx >= options.indexCount) { return; };
795
796    ushort value;
797    if (kSourceBufferAligned)
798    {
799        value = getIndexAligned(inputAligned, options.srcOffset, idx);
800    }
801    else
802    {
803        value = getIndexUnalignedU16(input, options.srcOffset, idx);
804    }
805    output[idx] = value;
806}
807
808kernel void convertIndexU32(uint idx [[thread_position_in_grid]],
809                            constant IndexConversionParams &options [[buffer(0)]],
810                            constant uchar *input
811                            [[buffer(1), function_constant(kSourceBufferUnaligned)]],
812                            constant uint *inputAligned
813                            [[buffer(1), function_constant(kSourceBufferAligned)]],
814                            device uint *output [[buffer(2)]])
815{
816    if (idx >= options.indexCount) { return; };
817
818    uint value;
819    if (kSourceBufferAligned)
820    {
821        value = getIndexAligned(inputAligned, options.srcOffset, idx);
822    }
823    else
824    {
825        value = getIndexUnalignedU32(input, options.srcOffset, idx);
826    }
827    output[idx] = value;
828}
829
830struct IndexFromArrayParams
831{
832    uint firstVertex;
833
834    uint vertexCount;
835};
836
837
838kernel void genTriFanIndicesFromArray(uint idx [[thread_position_in_grid]],
839                                      constant IndexFromArrayParams &options [[buffer(0)]],
840                                      device uint *output [[buffer(2)]])
841{
842    if (idx >= options.vertexCount) { return; };
843
844    uint vertexIdx = options.firstVertex + 2 + idx;
845
846
847
848    output[3 * idx ] = vertexIdx - 1;
849    output[3 * idx + 1] = vertexIdx;
850    output[3 * idx + 2] = options.firstVertex;
851}
852
853inline uint getIndexU32(uint offset,
854                        uint idx,
855                        constant uchar *inputU8 [[function_constant(kUseSourceBufferU8)]],
856                        constant ushort *inputU16 [[function_constant(kUseSourceBufferU16)]],
857                        constant uint *inputU32 [[function_constant(kUseSourceBufferU32)]])
858{
859    if (kUseSourceBufferU8)
860    {
861        if (kSourceIndexIsU16)
862        {
863            return getIndexUnalignedU16(inputU8, offset, idx);
864        }
865        else if (kSourceIndexIsU32)
866        {
867            return getIndexUnalignedU32(inputU8, offset, idx);
868        }
869        return getIndexAligned(inputU8, offset, idx);
870    }
871    else if (kUseSourceBufferU16)
872    {
873        return getIndexAligned(inputU16, offset, idx);
874    }
875    else if (kUseSourceBufferU32)
876    {
877        return getIndexAligned(inputU32, offset, idx);
878    }
879    return 0;
880}
881
882
883
884
885kernel void genTriFanIndicesFromElements(uint idx [[thread_position_in_grid]],
886                                         constant IndexConversionParams &options [[buffer(0)]],
887                                         constant uchar *inputU8
888                                         [[buffer(1), function_constant(kUseSourceBufferU8)]],
889                                         constant ushort *inputU16
890                                         [[buffer(1), function_constant(kUseSourceBufferU16)]],
891                                         constant uint *inputU32
892                                         [[buffer(1), function_constant(kUseSourceBufferU32)]],
893                                         device uint *output [[buffer(2)]])
894{
895    if (idx >= options.indexCount) { return; };
896
897    uint elemIdx = 2 + idx;
898
899    output[3 * idx] = getIndexU32(options.srcOffset, 0, inputU8, inputU16, inputU32);
900    output[3 * idx + 1] = getIndexU32(options.srcOffset, elemIdx - 1, inputU8, inputU16, inputU32);
901    output[3 * idx + 2] = getIndexU32(options.srcOffset, elemIdx, inputU8, inputU16, inputU32);
902}
903
904
905kernel void genLineLoopIndicesFromArray(uint idx [[thread_position_in_grid]],
906                                        constant IndexFromArrayParams &options [[buffer(0)]],
907                                        device uint *output [[buffer(2)]])
908{
909    uint totalIndices = options.vertexCount + 1;
910    if (idx >= totalIndices) { return; };
911
912    output[idx] = options.firstVertex + idx % options.vertexCount;
913}
914
915
916
917kernel void genLineLoopIndicesFromElements(uint idx [[thread_position_in_grid]],
918                                           constant IndexConversionParams &options [[buffer(0)]],
919                                           constant uchar *inputU8
920                                           [[buffer(1), function_constant(kUseSourceBufferU8)]],
921                                           constant ushort *inputU16
922                                           [[buffer(1), function_constant(kUseSourceBufferU16)]],
923                                           constant uint *inputU32
924                                           [[buffer(1), function_constant(kUseSourceBufferU32)]],
925                                           device uint *output [[buffer(2)]])
926{
927    uint totalTargetIndices = options.indexCount + 1;
928    if (idx >= totalTargetIndices) { return; };
929
930    output[idx] =
931        getIndexU32(options.srcOffset, idx % options.indexCount, inputU8, inputU16, inputU32);
932}
933# 4 "temp_master_source.metal" 2
934# 1 ".\\gen_mipmap.metal" 1
935
936
937
938
939
940
941
942
943using namespace rx::mtl_shader;
944# 31 ".\\gen_mipmap.metal"
945struct GenMipParams
946{
947    uint srcLevel;
948    uint numMipLevelsToGen;
949    bool sRGB;
950};
951
952
953
954kernel void generate3DMipmaps(uint lIndex [[thread_index_in_threadgroup]],
955                              ushort3 gIndices [[thread_position_in_grid]],
956                              texture3d<float> srcTexture [[texture(0)]],
957                              texture3d<float, access::write> dstMip1 [[texture(1)]],
958                              texture3d<float, access::write> dstMip2 [[texture(2)]],
959                              texture3d<float, access::write> dstMip3 [[texture(3)]],
960                              texture3d<float, access::write> dstMip4 [[texture(4)]],
961                              constant GenMipParams &options [[buffer(0)]])
962{
963    ushort3 mipSize = ushort3(dstMip1.get_width(), dstMip1.get_height(), dstMip1.get_depth());
964    bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y && gIndices.z < mipSize.z;
965
966    constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
967
968
969
970
971    threadgroup float sR[(8 * 8 * 8)];
972    threadgroup float sG[(8 * 8 * 8)];
973    threadgroup float sB[(8 * 8 * 8)];
974    threadgroup float sA[(8 * 8 * 8)];
975
976
977    float4 texel1;
978    if (validThread)
979    {
980        float3 texCoords = (float3(gIndices) + float3(0.5, 0.5, 0.5)) / float3(mipSize);
981        texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel));
982
983
984        dstMip1.write(texel1, gIndices);
985    }
986    else
987    {
988
989        lIndex = 0xffffffff;
990    }
991
992    if (options.numMipLevelsToGen == 1)
993    {
994        return;
995    }
996
997
998
999
1000    if (options.sRGB)
1001    {
1002        texel1 = linearToSRGB(texel1);
1003    }
1004    sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1005
1006    threadgroup_barrier(mem_flags::mem_threadgroup);
1007
1008
1009    if ((lIndex & 0x49) == 0)
1010    {
1011        bool3 atEdge = gIndices == (mipSize - ushort3(1));
1012
1013
1014
1015        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1]));
1016
1017        float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8]));
1018
1019        float4 texel4 = (atEdge.z) ? (texel1) : (float4(sR[lIndex + (8 * 8)], sG[lIndex + (8 * 8)], sB[lIndex + (8 * 8)], sA[lIndex + (8 * 8)]));
1020
1021        float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)]));
1022
1023
1024        float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + ((8 * 8) + 1)], sG[lIndex + ((8 * 8) + 1)], sB[lIndex + ((8 * 8) + 1)], sA[lIndex + ((8 * 8) + 1)]));
1025
1026
1027        float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + ((8 * 8) + 8)], sG[lIndex + ((8 * 8) + 8)], sB[lIndex + ((8 * 8) + 8)], sA[lIndex + ((8 * 8) + 8)]));
1028
1029
1030        float4 texel8 =
1031            (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + ((8 * 8) + 8 + 1)], sG[lIndex + ((8 * 8) + 8 + 1)], sB[lIndex + ((8 * 8) + 8 + 1)], sA[lIndex + ((8 * 8) + 8 + 1)]));
1032
1033
1034        texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0;
1035
1036        dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1);
1037
1038
1039        sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1040    }
1041
1042    if (options.numMipLevelsToGen == 2)
1043    {
1044        return;
1045    }
1046
1047
1048    threadgroup_barrier(mem_flags::mem_threadgroup);
1049
1050
1051    if ((lIndex & 0xdb) == 0)
1052    {
1053        mipSize = max(mipSize >> 1, ushort3(1));
1054        bool3 atEdge = (gIndices >> 1) == (mipSize - ushort3(1));
1055
1056
1057
1058        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2]));
1059
1060        float4 texel3 =
1061            (atEdge.y) ? (texel1) : (float4(sR[lIndex + (2 * 8)], sG[lIndex + (2 * 8)], sB[lIndex + (2 * 8)], sA[lIndex + (2 * 8)]));
1062
1063        float4 texel4 =
1064            (atEdge.z) ? (texel1) : (float4(sR[lIndex + (2 * (8 * 8))], sG[lIndex + (2 * (8 * 8))], sB[lIndex + (2 * (8 * 8))], sA[lIndex + (2 * (8 * 8))]));
1065
1066        float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)]));
1067
1068
1069        float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (2 * (8 * 8) + 2)], sG[lIndex + (2 * (8 * 8) + 2)], sB[lIndex + (2 * (8 * 8) + 2)], sA[lIndex + (2 * (8 * 8) + 2)]));
1070
1071
1072        float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8)], sG[lIndex + (2 * (8 * 8) + 2 * 8)], sB[lIndex + (2 * (8 * 8) + 2 * 8)], sA[lIndex + (2 * (8 * 8) + 2 * 8)]));
1073
1074
1075
1076        float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sG[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sB[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sA[lIndex + (2 * (8 * 8) + 2 * 8 + 2)]));
1077
1078
1079
1080        texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0;
1081
1082        dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2);
1083
1084
1085        sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1086    }
1087
1088    if (options.numMipLevelsToGen == 3)
1089    {
1090        return;
1091    }
1092
1093
1094    threadgroup_barrier(mem_flags::mem_threadgroup);
1095
1096
1097    if ((lIndex & 0x1ff) == 0)
1098    {
1099        mipSize = max(mipSize >> 1, ushort3(1));
1100        bool3 atEdge = (gIndices >> 2) == (mipSize - ushort3(1));
1101
1102
1103
1104        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4]));
1105
1106        float4 texel3 =
1107            (atEdge.y) ? (texel1) : (float4(sR[lIndex + (4 * 8)], sG[lIndex + (4 * 8)], sB[lIndex + (4 * 8)], sA[lIndex + (4 * 8)]));
1108
1109        float4 texel4 =
1110            (atEdge.z) ? (texel1) : (float4(sR[lIndex + (4 * (8 * 8))], sG[lIndex + (4 * (8 * 8))], sB[lIndex + (4 * (8 * 8))], sA[lIndex + (4 * (8 * 8))]));
1111
1112        float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)]));
1113
1114
1115        float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (4 * (8 * 8) + 4)], sG[lIndex + (4 * (8 * 8) + 4)], sB[lIndex + (4 * (8 * 8) + 4)], sA[lIndex + (4 * (8 * 8) + 4)]));
1116
1117
1118        float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8)], sG[lIndex + (4 * (8 * 8) + 4 * 8)], sB[lIndex + (4 * (8 * 8) + 4 * 8)], sA[lIndex + (4 * (8 * 8) + 4 * 8)]));
1119
1120
1121
1122        float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sG[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sB[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sA[lIndex + (4 * (8 * 8) + 4 * 8 + 4)]));
1123
1124
1125
1126        texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0;
1127
1128        dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3);
1129    }
1130}
1131
1132kernel void generate2DMipmaps(uint lIndex [[thread_index_in_threadgroup]],
1133                              ushort2 gIndices [[thread_position_in_grid]],
1134                              texture2d<float> srcTexture [[texture(0)]],
1135                              texture2d<float, access::write> dstMip1 [[texture(1)]],
1136                              texture2d<float, access::write> dstMip2 [[texture(2)]],
1137                              texture2d<float, access::write> dstMip3 [[texture(3)]],
1138                              texture2d<float, access::write> dstMip4 [[texture(4)]],
1139                              constant GenMipParams &options [[buffer(0)]])
1140{
1141    uint firstMipLevel = options.srcLevel + 1;
1142    ushort2 mipSize =
1143        ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel));
1144    bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y;
1145
1146    constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
1147
1148
1149
1150
1151    threadgroup float sR[(8 * 8)];
1152    threadgroup float sG[(8 * 8)];
1153    threadgroup float sB[(8 * 8)];
1154    threadgroup float sA[(8 * 8)];
1155
1156
1157    float4 texel1;
1158    if (validThread)
1159    {
1160        float2 texCoords = (float2(gIndices) + float2(0.5, 0.5)) / float2(mipSize);
1161        texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel));
1162
1163
1164        dstMip1.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices);
1165    }
1166    else
1167    {
1168
1169        lIndex = 0xffffffff;
1170    }
1171
1172    if (options.numMipLevelsToGen == 1)
1173    {
1174        return;
1175    }
1176
1177
1178
1179
1180    sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1181
1182    threadgroup_barrier(mem_flags::mem_threadgroup);
1183
1184
1185    if ((lIndex & 0x09) == 0)
1186    {
1187        bool2 atEdge = gIndices == (mipSize - ushort2(1));
1188
1189
1190
1191        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1]));
1192
1193        float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8]));
1194
1195        float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)]));
1196
1197
1198        texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1199
1200        dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1);
1201
1202
1203        sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1204    }
1205
1206    if (options.numMipLevelsToGen == 2)
1207    {
1208        return;
1209    }
1210
1211
1212    threadgroup_barrier(mem_flags::mem_threadgroup);
1213
1214
1215    if ((lIndex & 0x1b) == 0)
1216    {
1217        mipSize = max(mipSize >> 1, ushort2(1));
1218        bool2 atEdge = (gIndices >> 1) == (mipSize - ushort2(1));
1219
1220
1221
1222        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2]));
1223
1224        float4 texel3 =
1225            (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8]));
1226
1227        float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)]));
1228
1229
1230        texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1231
1232        dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2);
1233
1234
1235        sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1236    }
1237
1238    if (options.numMipLevelsToGen == 3)
1239    {
1240        return;
1241    }
1242
1243
1244    threadgroup_barrier(mem_flags::mem_threadgroup);
1245
1246
1247    if ((lIndex & 0x3f) == 0)
1248    {
1249        mipSize = max(mipSize >> 1, ushort2(1));
1250        bool2 atEdge = (gIndices >> 2) == (mipSize - ushort2(1));
1251
1252
1253
1254        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4]));
1255
1256        float4 texel3 =
1257            (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8]));
1258
1259        float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)]));
1260
1261
1262        texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1263
1264        dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3);
1265    }
1266}
1267
1268template <typename TextureTypeR, typename TextureTypeW>
1269static __attribute__((always_inline)) void generateCubeOr2DArray2ndAndMoreMipmaps(
1270    uint lIndex,
1271    ushort3 gIndices,
1272    TextureTypeR srcTexture,
1273    TextureTypeW dstMip2,
1274    TextureTypeW dstMip3,
1275    TextureTypeW dstMip4,
1276    ushort2 mip1Size,
1277    float4 mip1Texel,
1278    threadgroup float *sR,
1279    threadgroup float *sG,
1280    threadgroup float *sB,
1281    threadgroup float *sA,
1282    constant GenMipParams &options)
1283{
1284    ushort2 mipSize = mip1Size;
1285    float4 texel1 = mip1Texel;
1286
1287
1288
1289
1290    sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1291
1292    threadgroup_barrier(mem_flags::mem_threadgroup);
1293
1294
1295    if ((lIndex & 0x09) == 0)
1296    {
1297        bool2 atEdge = gIndices.xy == (mipSize - ushort2(1));
1298
1299
1300
1301        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1]));
1302
1303        float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8]));
1304
1305        float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)]));
1306
1307
1308        texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1309
1310        dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 1, gIndices.z);
1311
1312
1313        sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1314    }
1315
1316    if (options.numMipLevelsToGen == 2)
1317    {
1318        return;
1319    }
1320
1321
1322    threadgroup_barrier(mem_flags::mem_threadgroup);
1323
1324
1325    if ((lIndex & 0x1b) == 0)
1326    {
1327        mipSize = max(mipSize >> 1, ushort2(1));
1328        bool2 atEdge = (gIndices.xy >> 1) == (mipSize - ushort2(1));
1329
1330
1331
1332        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2]));
1333
1334        float4 texel3 =
1335            (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8]));
1336
1337        float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)]));
1338
1339
1340        texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1341
1342        dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 2, gIndices.z);
1343
1344
1345        sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
1346    }
1347
1348    if (options.numMipLevelsToGen == 3)
1349    {
1350        return;
1351    }
1352
1353
1354    threadgroup_barrier(mem_flags::mem_threadgroup);
1355
1356
1357    if ((lIndex & 0x3f) == 0)
1358    {
1359        mipSize = max(mipSize >> 1, ushort2(1));
1360        bool2 atEdge = (gIndices.xy >> 2) == (mipSize - ushort2(1));
1361
1362
1363
1364        float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4]));
1365
1366        float4 texel3 =
1367            (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8]));
1368
1369        float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)]));
1370
1371
1372        texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
1373
1374        dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 3, gIndices.z);
1375    }
1376}
1377
1378kernel void generateCubeMipmaps(uint lIndex [[thread_index_in_threadgroup]],
1379                                ushort3 gIndices [[thread_position_in_grid]],
1380                                texturecube<float> srcTexture [[texture(0)]],
1381                                texturecube<float, access::write> dstMip1 [[texture(1)]],
1382                                texturecube<float, access::write> dstMip2 [[texture(2)]],
1383                                texturecube<float, access::write> dstMip3 [[texture(3)]],
1384                                texturecube<float, access::write> dstMip4 [[texture(4)]],
1385                                constant GenMipParams &options [[buffer(0)]])
1386{
1387    uint firstMipLevel = options.srcLevel + 1;
1388    ushort2 mip1Size =
1389        ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel));
1390    bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y;
1391
1392    constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
1393
1394
1395    float4 mip1Texel;
1396    if (validThread)
1397    {
1398        float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size);
1399        mip1Texel = srcTexture.sample(textureSampler, cubeTexcoords(texCoords, int(gIndices.z)),
1400                                      level(options.srcLevel));
1401
1402
1403        dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z);
1404    }
1405    else
1406    {
1407
1408        lIndex = 0xffffffff;
1409    }
1410
1411    if (options.numMipLevelsToGen == 1)
1412    {
1413        return;
1414    }
1415
1416
1417    threadgroup float sR[(8 * 8)];
1418    threadgroup float sG[(8 * 8)];
1419    threadgroup float sB[(8 * 8)];
1420    threadgroup float sA[(8 * 8)];
1421
1422    generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4,
1423                                           mip1Size, mip1Texel, sR, sG, sB, sA, options);
1424}
1425
1426kernel void generate2DArrayMipmaps(uint lIndex [[thread_index_in_threadgroup]],
1427                                   ushort3 gIndices [[thread_position_in_grid]],
1428                                   texture2d_array<float> srcTexture [[texture(0)]],
1429                                   texture2d_array<float, access::write> dstMip1 [[texture(1)]],
1430                                   texture2d_array<float, access::write> dstMip2 [[texture(2)]],
1431                                   texture2d_array<float, access::write> dstMip3 [[texture(3)]],
1432                                   texture2d_array<float, access::write> dstMip4 [[texture(4)]],
1433                                   constant GenMipParams &options [[buffer(0)]])
1434{
1435    uint firstMipLevel = options.srcLevel + 1;
1436    ushort2 mip1Size =
1437        ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel));
1438    bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y;
1439
1440    constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
1441
1442
1443    float4 mip1Texel;
1444    if (validThread)
1445    {
1446        float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size);
1447        mip1Texel =
1448            srcTexture.sample(textureSampler, texCoords, gIndices.z, level(options.srcLevel));
1449
1450
1451        dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z);
1452    }
1453    else
1454    {
1455
1456        lIndex = 0xffffffff;
1457    }
1458
1459    if (options.numMipLevelsToGen == 1)
1460    {
1461        return;
1462    }
1463
1464
1465    threadgroup float sR[(8 * 8)];
1466    threadgroup float sG[(8 * 8)];
1467    threadgroup float sB[(8 * 8)];
1468    threadgroup float sA[(8 * 8)];
1469
1470    generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4,
1471                                           mip1Size, mip1Texel, sR, sG, sB, sA, options);
1472}
1473# 5 "temp_master_source.metal" 2
1474# 1 ".\\copy_buffer.metal" 1
1475# 12 ".\\copy_buffer.metal"
1476#include <metal_pack>
1477
1478
1479# 1 ".\\format_autogen.h" 1
1480
1481
1482
1483namespace rx
1484{
1485namespace mtl_shader
1486{
1487
1488namespace FormatID
1489{
1490enum
1491{
1492    NONE,
1493    D16_UNORM,
1494    D24_UNORM_S8_UINT,
1495    D24_UNORM_X8_UINT,
1496    D32_FLOAT,
1497    D32_FLOAT_S8X24_UINT,
1498    D32_UNORM,
1499    S8_UINT,
1500    A16_FLOAT,
1501    A1R5G5B5_UNORM,
1502    A2R10G10B10_SINT_VERTEX,
1503    A2R10G10B10_SNORM_VERTEX,
1504    A2R10G10B10_SSCALED_VERTEX,
1505    A2R10G10B10_UINT_VERTEX,
1506    A2R10G10B10_UNORM_VERTEX,
1507    A2R10G10B10_USCALED_VERTEX,
1508    A32_FLOAT,
1509    A8_UNORM,
1510    ASTC_10x10_SRGB_BLOCK,
1511    ASTC_10x10_UNORM_BLOCK,
1512    ASTC_10x5_SRGB_BLOCK,
1513    ASTC_10x5_UNORM_BLOCK,
1514    ASTC_10x6_SRGB_BLOCK,
1515    ASTC_10x6_UNORM_BLOCK,
1516    ASTC_10x8_SRGB_BLOCK,
1517    ASTC_10x8_UNORM_BLOCK,
1518    ASTC_12x10_SRGB_BLOCK,
1519    ASTC_12x10_UNORM_BLOCK,
1520    ASTC_12x12_SRGB_BLOCK,
1521    ASTC_12x12_UNORM_BLOCK,
1522    ASTC_3x3x3_UNORM_BLOCK,
1523    ASTC_3x3x3_UNORM_SRGB_BLOCK,
1524    ASTC_4x3x3_UNORM_BLOCK,
1525    ASTC_4x3x3_UNORM_SRGB_BLOCK,
1526    ASTC_4x4_SRGB_BLOCK,
1527    ASTC_4x4_UNORM_BLOCK,
1528    ASTC_4x4x3_UNORM_BLOCK,
1529    ASTC_4x4x3_UNORM_SRGB_BLOCK,
1530    ASTC_4x4x4_UNORM_BLOCK,
1531    ASTC_4x4x4_UNORM_SRGB_BLOCK,
1532    ASTC_5x4_SRGB_BLOCK,
1533    ASTC_5x4_UNORM_BLOCK,
1534    ASTC_5x4x4_UNORM_BLOCK,
1535    ASTC_5x4x4_UNORM_SRGB_BLOCK,
1536    ASTC_5x5_SRGB_BLOCK,
1537    ASTC_5x5_UNORM_BLOCK,
1538    ASTC_5x5x4_UNORM_BLOCK,
1539    ASTC_5x5x4_UNORM_SRGB_BLOCK,
1540    ASTC_5x5x5_UNORM_BLOCK,
1541    ASTC_5x5x5_UNORM_SRGB_BLOCK,
1542    ASTC_6x5_SRGB_BLOCK,
1543    ASTC_6x5_UNORM_BLOCK,
1544    ASTC_6x5x5_UNORM_BLOCK,
1545    ASTC_6x5x5_UNORM_SRGB_BLOCK,
1546    ASTC_6x6_SRGB_BLOCK,
1547    ASTC_6x6_UNORM_BLOCK,
1548    ASTC_6x6x5_UNORM_BLOCK,
1549    ASTC_6x6x5_UNORM_SRGB_BLOCK,
1550    ASTC_6x6x6_UNORM_BLOCK,
1551    ASTC_6x6x6_UNORM_SRGB_BLOCK,
1552    ASTC_8x5_SRGB_BLOCK,
1553    ASTC_8x5_UNORM_BLOCK,
1554    ASTC_8x6_SRGB_BLOCK,
1555    ASTC_8x6_UNORM_BLOCK,
1556    ASTC_8x8_SRGB_BLOCK,
1557    ASTC_8x8_UNORM_BLOCK,
1558    B10G10R10A2_UNORM,
1559    B4G4R4A4_UNORM,
1560    B5G5R5A1_UNORM,
1561    B5G6R5_UNORM,
1562    B8G8R8A8_TYPELESS,
1563    B8G8R8A8_TYPELESS_SRGB,
1564    B8G8R8A8_UNORM,
1565    B8G8R8A8_UNORM_SRGB,
1566    B8G8R8X8_UNORM,
1567    B8G8R8X8_UNORM_SRGB,
1568    BC1_RGBA_UNORM_BLOCK,
1569    BC1_RGBA_UNORM_SRGB_BLOCK,
1570    BC1_RGB_UNORM_BLOCK,
1571    BC1_RGB_UNORM_SRGB_BLOCK,
1572    BC2_RGBA_UNORM_BLOCK,
1573    BC2_RGBA_UNORM_SRGB_BLOCK,
1574    BC3_RGBA_UNORM_BLOCK,
1575    BC3_RGBA_UNORM_SRGB_BLOCK,
1576    BC4_RED_SNORM_BLOCK,
1577    BC4_RED_UNORM_BLOCK,
1578    BC5_RG_SNORM_BLOCK,
1579    BC5_RG_UNORM_BLOCK,
1580    BC6H_RGB_SFLOAT_BLOCK,
1581    BC6H_RGB_UFLOAT_BLOCK,
1582    BC7_RGBA_UNORM_BLOCK,
1583    BC7_RGBA_UNORM_SRGB_BLOCK,
1584    EAC_R11G11_SNORM_BLOCK,
1585    EAC_R11G11_UNORM_BLOCK,
1586    EAC_R11_SNORM_BLOCK,
1587    EAC_R11_UNORM_BLOCK,
1588    ETC1_LOSSY_DECODE_R8G8B8_UNORM_BLOCK,
1589    ETC1_R8G8B8_UNORM_BLOCK,
1590    ETC2_R8G8B8A1_SRGB_BLOCK,
1591    ETC2_R8G8B8A1_UNORM_BLOCK,
1592    ETC2_R8G8B8A8_SRGB_BLOCK,
1593    ETC2_R8G8B8A8_UNORM_BLOCK,
1594    ETC2_R8G8B8_SRGB_BLOCK,
1595    ETC2_R8G8B8_UNORM_BLOCK,
1596    G8_B8R8_2PLANE_420_UNORM,
1597    G8_B8_R8_3PLANE_420_UNORM,
1598    L16A16_FLOAT,
1599    L16_FLOAT,
1600    L32A32_FLOAT,
1601    L32_FLOAT,
1602    L4A4_UNORM,
1603    L8A8_UNORM,
1604    L8_UNORM,
1605    PALETTE4_R4G4B4A4_UNORM,
1606    PALETTE4_R5G5B5A1_UNORM,
1607    PALETTE4_R5G6B5_UNORM,
1608    PALETTE4_R8G8B8A8_UNORM,
1609    PALETTE4_R8G8B8_UNORM,
1610    PALETTE8_R4G4B4A4_UNORM,
1611    PALETTE8_R5G5B5A1_UNORM,
1612    PALETTE8_R5G6B5_UNORM,
1613    PALETTE8_R8G8B8A8_UNORM,
1614    PALETTE8_R8G8B8_UNORM,
1615    PVRTC1_RGBA_2BPP_UNORM_BLOCK,
1616    PVRTC1_RGBA_2BPP_UNORM_SRGB_BLOCK,
1617    PVRTC1_RGBA_4BPP_UNORM_BLOCK,
1618    PVRTC1_RGBA_4BPP_UNORM_SRGB_BLOCK,
1619    PVRTC1_RGB_2BPP_UNORM_BLOCK,
1620    PVRTC1_RGB_2BPP_UNORM_SRGB_BLOCK,
1621    PVRTC1_RGB_4BPP_UNORM_BLOCK,
1622    PVRTC1_RGB_4BPP_UNORM_SRGB_BLOCK,
1623    R10G10B10A2_SINT,
1624    R10G10B10A2_SNORM,
1625    R10G10B10A2_SSCALED,
1626    R10G10B10A2_UINT,
1627    R10G10B10A2_UNORM,
1628    R10G10B10A2_USCALED,
1629    R10G10B10X2_UNORM,
1630    R11G11B10_FLOAT,
1631    R16G16B16A16_FLOAT,
1632    R16G16B16A16_SINT,
1633    R16G16B16A16_SNORM,
1634    R16G16B16A16_SSCALED,
1635    R16G16B16A16_UINT,
1636    R16G16B16A16_UNORM,
1637    R16G16B16A16_USCALED,
1638    R16G16B16_FLOAT,
1639    R16G16B16_SINT,
1640    R16G16B16_SNORM,
1641    R16G16B16_SSCALED,
1642    R16G16B16_UINT,
1643    R16G16B16_UNORM,
1644    R16G16B16_USCALED,
1645    R16G16_FLOAT,
1646    R16G16_SINT,
1647    R16G16_SNORM,
1648    R16G16_SSCALED,
1649    R16G16_UINT,
1650    R16G16_UNORM,
1651    R16G16_USCALED,
1652    R16_FLOAT,
1653    R16_SINT,
1654    R16_SNORM,
1655    R16_SSCALED,
1656    R16_UINT,
1657    R16_UNORM,
1658    R16_USCALED,
1659    R32G32B32A32_FIXED,
1660    R32G32B32A32_FLOAT,
1661    R32G32B32A32_SINT,
1662    R32G32B32A32_SNORM,
1663    R32G32B32A32_SSCALED,
1664    R32G32B32A32_UINT,
1665    R32G32B32A32_UNORM,
1666    R32G32B32A32_USCALED,
1667    R32G32B32_FIXED,
1668    R32G32B32_FLOAT,
1669    R32G32B32_SINT,
1670    R32G32B32_SNORM,
1671    R32G32B32_SSCALED,
1672    R32G32B32_UINT,
1673    R32G32B32_UNORM,
1674    R32G32B32_USCALED,
1675    R32G32_FIXED,
1676    R32G32_FLOAT,
1677    R32G32_SINT,
1678    R32G32_SNORM,
1679    R32G32_SSCALED,
1680    R32G32_UINT,
1681    R32G32_UNORM,
1682    R32G32_USCALED,
1683    R32_FIXED,
1684    R32_FLOAT,
1685    R32_SINT,
1686    R32_SNORM,
1687    R32_SSCALED,
1688    R32_UINT,
1689    R32_UNORM,
1690    R32_USCALED,
1691    R4G4B4A4_UNORM,
1692    R5G5B5A1_UNORM,
1693    R5G6B5_UNORM,
1694    R8G8B8A8_SINT,
1695    R8G8B8A8_SNORM,
1696    R8G8B8A8_SSCALED,
1697    R8G8B8A8_TYPELESS,
1698    R8G8B8A8_TYPELESS_SRGB,
1699    R8G8B8A8_UINT,
1700    R8G8B8A8_UNORM,
1701    R8G8B8A8_UNORM_SRGB,
1702    R8G8B8A8_USCALED,
1703    R8G8B8X8_UNORM,
1704    R8G8B8X8_UNORM_SRGB,
1705    R8G8B8_SINT,
1706    R8G8B8_SNORM,
1707    R8G8B8_SSCALED,
1708    R8G8B8_UINT,
1709    R8G8B8_UNORM,
1710    R8G8B8_UNORM_SRGB,
1711    R8G8B8_USCALED,
1712    R8G8_SINT,
1713    R8G8_SNORM,
1714    R8G8_SSCALED,
1715    R8G8_UINT,
1716    R8G8_UNORM,
1717    R8G8_UNORM_SRGB,
1718    R8G8_USCALED,
1719    R8_SINT,
1720    R8_SNORM,
1721    R8_SSCALED,
1722    R8_UINT,
1723    R8_UNORM,
1724    R8_UNORM_SRGB,
1725    R8_USCALED,
1726    R9G9B9E5_SHAREDEXP,
1727    X2R10G10B10_SINT_VERTEX,
1728    X2R10G10B10_SNORM_VERTEX,
1729    X2R10G10B10_SSCALED_VERTEX,
1730    X2R10G10B10_UINT_VERTEX,
1731    X2R10G10B10_UNORM_VERTEX,
1732    X2R10G10B10_USCALED_VERTEX,
1733    EXTERNAL0,
1734    EXTERNAL1,
1735    EXTERNAL2,
1736    EXTERNAL3,
1737    EXTERNAL4,
1738    EXTERNAL5,
1739    EXTERNAL6,
1740    EXTERNAL7
1741};
1742
1743}
1744
1745}
1746}
1747# 16 ".\\copy_buffer.metal" 2
1748
1749using namespace rx::mtl_shader;
1750
1751constant int kCopyFormatType [[function_constant(10)]];
1752
1753
1754constant int kCopyTextureType [[function_constant(20)]];
1755constant bool kCopyTextureType2D = kCopyTextureType == kTextureType2D;
1756constant bool kCopyTextureType2DArray = kCopyTextureType == kTextureType2DArray;
1757constant bool kCopyTextureType2DMS = kCopyTextureType == kTextureType2DMultisample;
1758constant bool kCopyTextureTypeCube = kCopyTextureType == kTextureTypeCube;
1759constant bool kCopyTextureType3D = kCopyTextureType == kTextureType3D;
1760
1761struct CopyPixelParams
1762{
1763    uint3 copySize;
1764    uint3 textureOffset;
1765
1766    uint bufferStartOffset;
1767    uint pixelSize;
1768    uint bufferRowPitch;
1769    uint bufferDepthPitch;
1770};
1771
1772struct WritePixelParams
1773{
1774    uint2 copySize;
1775    uint2 textureOffset;
1776
1777    uint bufferStartOffset;
1778
1779    uint pixelSize;
1780    uint bufferRowPitch;
1781
1782    uint textureLevel;
1783    uint textureLayer;
1784
1785    bool reverseTextureRowOrder;
1786};
1787# 120 ".\\copy_buffer.metal"
1788template <typename T>
1789static inline void textureWrite(ushort3 gIndices,
1790                                constant CopyPixelParams &options,
1791                                vec<T, 4> color,
1792                                texture2d<T, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
1793{
1794    uint3 writeIndices = options.textureOffset + uint3(gIndices);
1795    switch (kCopyTextureType)
1796    {
1797        case kTextureType2D:
1798            dstTexture2d.write(color, writeIndices.xy);
1799            break;
1800        case kTextureType2DArray:
1801            dstTexture2dArray.write(color, writeIndices.xy, writeIndices.z);
1802            break;
1803        case kTextureType3D:
1804            dstTexture3d.write(color, writeIndices);
1805            break;
1806        case kTextureTypeCube:
1807            dstTextureCube.write(color, writeIndices.xy, writeIndices.z);
1808            break;
1809    }
1810}
1811
1812
1813template <typename T>
1814static inline vec<T, 4> textureRead(ushort2 gIndices,
1815                                    constant WritePixelParams &options,
1816                                    texture2d<T, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<T, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]])
1817{
1818    vec<T, 4> color;
1819    uint2 coords = uint2(gIndices);
1820    if (options.reverseTextureRowOrder)
1821    {
1822        coords.y = options.copySize.y - 1 - gIndices.y;
1823    }
1824    coords += options.textureOffset;
1825    switch (kCopyTextureType)
1826    {
1827        case kTextureType2D:
1828            color = srcTexture2d.read(coords.xy, options.textureLevel);
1829            break;
1830        case kTextureType2DArray:
1831            color = srcTexture2dArray.read(coords.xy, options.textureLayer, options.textureLevel);
1832            break;
1833        case kTextureType2DMultisample:
1834            color = resolveTextureMS(srcTexture2dMS, coords.xy);
1835            break;
1836        case kTextureType3D:
1837            color = srcTexture3d.read(uint3(coords, options.textureLayer), options.textureLevel);
1838            break;
1839        case kTextureTypeCube:
1840            color = srcTextureCube.read(coords.xy, options.textureLayer, options.textureLevel);
1841            break;
1842    }
1843    return color;
1844}
1845# 215 ".\\copy_buffer.metal"
1846static inline float4 readR5G6B5_UNORM(uint bufferOffset, constant uchar *buffer)
1847{
1848    float4 color;
1849    ushort src = bytesToShort<ushort>(buffer, bufferOffset);
1850
1851    color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src));
1852    color.g = normalizedToFloat<6>(getShiftedData<6, 5>(src));
1853    color.b = normalizedToFloat<5>(getShiftedData<5, 0>(src));
1854    color.a = 1.0;
1855    return color;
1856}
1857static inline void writeR5G6B5_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
1858{
1859    ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) |
1860                 shiftData<6, 5>(floatToNormalized<6, ushort>(color.g)) |
1861                 shiftData<5, 0>(floatToNormalized<5, ushort>(color.b));
1862
1863    shortToBytes(dst, bufferOffset, buffer);
1864}
1865
1866
1867static inline float4 readR4G4B4A4_UNORM(uint bufferOffset, constant uchar *buffer)
1868{
1869    float4 color;
1870    ushort src = bytesToShort<ushort>(buffer, bufferOffset);
1871
1872    color.r = normalizedToFloat<4>(getShiftedData<4, 12>(src));
1873    color.g = normalizedToFloat<4>(getShiftedData<4, 8>(src));
1874    color.b = normalizedToFloat<4>(getShiftedData<4, 4>(src));
1875    color.a = normalizedToFloat<4>(getShiftedData<4, 0>(src));
1876    return color;
1877}
1878static inline void writeR4G4B4A4_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
1879{
1880    ushort dst = shiftData<4, 12>(floatToNormalized<4, ushort>(color.r)) |
1881                 shiftData<4, 8>(floatToNormalized<4, ushort>(color.g)) |
1882                 shiftData<4, 4>(floatToNormalized<4, ushort>(color.b)) |
1883                 shiftData<4, 0>(floatToNormalized<4, ushort>(color.a));
1884    ;
1885
1886    shortToBytes(dst, bufferOffset, buffer);
1887}
1888
1889
1890static inline float4 readR5G5B5A1_UNORM(uint bufferOffset, constant uchar *buffer)
1891{
1892    float4 color;
1893    ushort src = bytesToShort<ushort>(buffer, bufferOffset);
1894
1895    color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src));
1896    color.g = normalizedToFloat<5>(getShiftedData<5, 6>(src));
1897    color.b = normalizedToFloat<5>(getShiftedData<5, 1>(src));
1898    color.a = normalizedToFloat<1>(getShiftedData<1, 0>(src));
1899    return color;
1900}
1901static inline void writeR5G5B5A1_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
1902{
1903    ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) |
1904                 shiftData<5, 6>(floatToNormalized<5, ushort>(color.g)) |
1905                 shiftData<5, 1>(floatToNormalized<5, ushort>(color.b)) |
1906                 shiftData<1, 0>(floatToNormalized<1, ushort>(color.a));
1907    ;
1908
1909    shortToBytes(dst, bufferOffset, buffer);
1910}
1911
1912
1913static inline int4 readR10G10B10A2_SINT(uint bufferOffset, constant uchar *buffer)
1914{
1915    int4 color;
1916    int src = bytesToInt<int>(buffer, bufferOffset);
1917
1918    constexpr int3 rgbSignMask(0x200);
1919    constexpr int3 negativeMask(0xfffffc00);
1920    constexpr int alphaSignMask = 0x2;
1921    constexpr int alphaNegMask = 0xfffffffc;
1922
1923    color.r = getShiftedData<10, 0>(src);
1924    color.g = getShiftedData<10, 10>(src);
1925    color.b = getShiftedData<10, 20>(src);
1926
1927    int3 isRgbNegative = (color.rgb & rgbSignMask) >> 9;
1928    color.rgb = (isRgbNegative * negativeMask) | color.rgb;
1929
1930    color.a = getShiftedData<2, 30>(src);
1931    int isAlphaNegative = color.a & alphaSignMask >> 1;
1932    color.a = (isAlphaNegative * alphaNegMask) | color.a;
1933    return color;
1934}
1935
1936static inline uint4 readR10G10B10A2_UINT(uint bufferOffset, constant uchar *buffer)
1937{
1938    uint4 color;
1939    uint src = bytesToInt<uint>(buffer, bufferOffset);
1940
1941    color.r = getShiftedData<10, 0>(src);
1942    color.g = getShiftedData<10, 10>(src);
1943    color.b = getShiftedData<10, 20>(src);
1944    color.a = getShiftedData<2, 30>(src);
1945    return color;
1946}
1947
1948
1949static inline float4 readR8G8B8A8(uint bufferOffset, constant uchar *buffer, bool isSRGB)
1950{
1951    float4 color;
1952    uint src = bytesToInt<uint>(buffer, bufferOffset);
1953
1954    if (isSRGB)
1955    {
1956        color = unpack_unorm4x8_srgb_to_float(src);
1957    }
1958    else
1959    {
1960        color = unpack_unorm4x8_to_float(src);
1961    }
1962    return color;
1963}
1964static inline void writeR8G8B8A8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB)
1965{
1966    uint dst;
1967
1968    if (isSRGB)
1969    {
1970        dst = pack_float_to_srgb_unorm4x8(color);
1971    }
1972    else
1973    {
1974        dst = pack_float_to_unorm4x8(color);
1975    }
1976
1977    intToBytes(dst, bufferOffset, buffer);
1978}
1979
1980static inline float4 readR8G8B8(uint bufferOffset, constant uchar *buffer, bool isSRGB)
1981{
1982    float4 color;
1983    color.r = normalizedToFloat<uchar>(buffer[bufferOffset]);
1984    color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]);
1985    color.b = normalizedToFloat<uchar>(buffer[bufferOffset + 2]);
1986    color.a = 1.0;
1987
1988    if (isSRGB)
1989    {
1990        color = sRGBtoLinear(color);
1991    }
1992    return color;
1993}
1994static inline void writeR8G8B8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB)
1995{
1996    color.a = 1.0;
1997    uint dst;
1998
1999    if (isSRGB)
2000    {
2001        dst = pack_float_to_srgb_unorm4x8(color);
2002    }
2003    else
2004    {
2005        dst = pack_float_to_unorm4x8(color);
2006    }
2007    int24bitToBytes(dst, bufferOffset, buffer);
2008}
2009
2010
2011static inline float4 readR8G8B8A8_SNORM(uint bufferOffset, constant uchar *buffer)
2012{
2013    float4 color;
2014    uint src = bytesToInt<uint>(buffer, bufferOffset);
2015
2016    color = unpack_snorm4x8_to_float(src);
2017
2018    return color;
2019}
2020static inline void writeR8G8B8A8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2021{
2022    uint dst = pack_float_to_snorm4x8(color);
2023
2024    intToBytes(dst, bufferOffset, buffer);
2025}
2026
2027
2028static inline float4 readR8G8B8_SNORM(uint bufferOffset, constant uchar *buffer)
2029{
2030    float4 color;
2031    color.r = normalizedToFloat<7, char>(buffer[bufferOffset]);
2032    color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]);
2033    color.b = normalizedToFloat<7, char>(buffer[bufferOffset + 2]);
2034    color.a = 1.0;
2035
2036    return color;
2037}
2038static inline void writeR8G8B8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2039{
2040    uint dst = pack_float_to_snorm4x8(color);
2041
2042    int24bitToBytes(dst, bufferOffset, buffer);
2043}
2044
2045
2046static inline float4 readR8G8B8A8_UNORM(uint bufferOffset, constant uchar *buffer)
2047{
2048    return readR8G8B8A8(bufferOffset, buffer, false);
2049}
2050static inline void writeR8G8B8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2051{
2052    return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false);
2053}
2054
2055static inline float4 readR8G8B8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer)
2056{
2057    return readR8G8B8A8(bufferOffset, buffer, true);
2058}
2059static inline void writeR8G8B8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2060{
2061    return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true);
2062}
2063
2064
2065static inline float4 readB8G8R8A8_UNORM(uint bufferOffset, constant uchar *buffer)
2066{
2067    return readR8G8B8A8(bufferOffset, buffer, false).bgra;
2068}
2069static inline void writeB8G8R8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2070{
2071    color.rgba = color.bgra;
2072    return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false);
2073}
2074
2075static inline float4 readB8G8R8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer)
2076{
2077    return readR8G8B8A8(bufferOffset, buffer, true).bgra;
2078}
2079static inline void writeB8G8R8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2080{
2081    color.rgba = color.bgra;
2082    return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true);
2083}
2084
2085
2086static inline float4 readR8G8B8_UNORM(uint bufferOffset, constant uchar *buffer)
2087{
2088    return readR8G8B8(bufferOffset, buffer, false);
2089}
2090static inline void writeR8G8B8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2091{
2092    return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, false);
2093}
2094
2095static inline float4 readR8G8B8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer)
2096{
2097    return readR8G8B8(bufferOffset, buffer, true);
2098}
2099static inline void writeR8G8B8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2100{
2101    return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, true);
2102}
2103
2104
2105static inline float4 readL8_UNORM(uint bufferOffset, constant uchar *buffer)
2106{
2107    float4 color;
2108    color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset]));
2109    color.a = 1.0;
2110    return color;
2111}
2112static inline void writeL8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2113{
2114    buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
2115}
2116
2117
2118static inline void writeA8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2119{
2120    buffer[bufferOffset] = floatToNormalized<uchar>(color.a);
2121}
2122
2123
2124static inline float4 readL8A8_UNORM(uint bufferOffset, constant uchar *buffer)
2125{
2126    float4 color;
2127    color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset]));
2128    color.a = normalizedToFloat<uchar>(buffer[bufferOffset + 1]);
2129    return color;
2130}
2131static inline void writeL8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2132{
2133    buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
2134    buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.a);
2135}
2136
2137
2138static inline float4 readR8_UNORM(uint bufferOffset, constant uchar *buffer)
2139{
2140    float4 color;
2141    color.r = normalizedToFloat<uchar>(buffer[bufferOffset]);
2142    color.g = color.b = 0.0;
2143    color.a = 1.0;
2144    return color;
2145}
2146static inline void writeR8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2147{
2148    buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
2149}
2150
2151static inline float4 readR8_SNORM(uint bufferOffset, constant uchar *buffer)
2152{
2153    float4 color;
2154    color.r = normalizedToFloat<7, char>(buffer[bufferOffset]);
2155    color.g = color.b = 0.0;
2156    color.a = 1.0;
2157    return color;
2158}
2159static inline void writeR8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2160{
2161    buffer[bufferOffset] = as_type<uchar>(floatToNormalized<char>(color.r));
2162}
2163
2164
2165static inline int4 readR8_SINT(uint bufferOffset, constant uchar *buffer)
2166{
2167    int4 color;
2168    color.r = as_type<char>(buffer[bufferOffset]);
2169    color.g = color.b = 0;
2170    color.a = 1;
2171    return color;
2172}
2173static inline void writeR8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2174{
2175    buffer[bufferOffset] = static_cast<uchar>(color.r);
2176}
2177
2178
2179static inline uint4 readR8_UINT(uint bufferOffset, constant uchar *buffer)
2180{
2181    uint4 color;
2182    color.r = as_type<uchar>(buffer[bufferOffset]);
2183    color.g = color.b = 0;
2184    color.a = 1;
2185    return color;
2186}
2187static inline void writeR8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2188{
2189    buffer[bufferOffset] = static_cast<uchar>(color.r);
2190}
2191
2192
2193static inline float4 readR8G8_UNORM(uint bufferOffset, constant uchar *buffer)
2194{
2195    float4 color;
2196    color.r = normalizedToFloat<uchar>(buffer[bufferOffset]);
2197    color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]);
2198    color.b = 0.0;
2199    color.a = 1.0;
2200    return color;
2201}
2202static inline void writeR8G8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2203{
2204    buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
2205    buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.g);
2206}
2207
2208static inline float4 readR8G8_SNORM(uint bufferOffset, constant uchar *buffer)
2209{
2210    float4 color;
2211    color.r = normalizedToFloat<7, char>(buffer[bufferOffset]);
2212    color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]);
2213    color.b = 0.0;
2214    color.a = 1.0;
2215    return color;
2216}
2217static inline void writeR8G8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2218{
2219    buffer[bufferOffset] = as_type<uchar>(floatToNormalized<char>(color.r));
2220    buffer[bufferOffset + 1] = as_type<uchar>(floatToNormalized<char>(color.g));
2221}
2222
2223
2224static inline int4 readR8G8_SINT(uint bufferOffset, constant uchar *buffer)
2225{
2226    int4 color;
2227    color.r = as_type<char>(buffer[bufferOffset]);
2228    color.g = as_type<char>(buffer[bufferOffset + 1]);
2229    color.b = 0;
2230    color.a = 1;
2231    return color;
2232}
2233static inline void writeR8G8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2234{
2235    buffer[bufferOffset] = static_cast<uchar>(color.r);
2236    buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
2237}
2238
2239
2240static inline uint4 readR8G8_UINT(uint bufferOffset, constant uchar *buffer)
2241{
2242    uint4 color;
2243    color.r = as_type<uchar>(buffer[bufferOffset]);
2244    color.g = as_type<uchar>(buffer[bufferOffset + 1]);
2245    color.b = 0;
2246    color.a = 1;
2247    return color;
2248}
2249static inline void writeR8G8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2250{
2251    buffer[bufferOffset] = static_cast<uchar>(color.r);
2252    buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
2253}
2254
2255
2256static inline int4 readR8G8B8_SINT(uint bufferOffset, constant uchar *buffer)
2257{
2258    int4 color;
2259    color.r = as_type<char>(buffer[bufferOffset]);
2260    color.g = as_type<char>(buffer[bufferOffset + 1]);
2261    color.b = as_type<char>(buffer[bufferOffset + 2]);
2262    color.a = 1;
2263    return color;
2264}
2265
2266
2267static inline uint4 readR8G8B8_UINT(uint bufferOffset, constant uchar *buffer)
2268{
2269    uint4 color;
2270    color.r = as_type<uchar>(buffer[bufferOffset]);
2271    color.g = as_type<uchar>(buffer[bufferOffset + 1]);
2272    color.b = as_type<uchar>(buffer[bufferOffset + 2]);
2273    color.a = 1;
2274    return color;
2275}
2276
2277
2278static inline int4 readR8G8B8A8_SINT(uint bufferOffset, constant uchar *buffer)
2279{
2280    int4 color;
2281    color.r = as_type<char>(buffer[bufferOffset]);
2282    color.g = as_type<char>(buffer[bufferOffset + 1]);
2283    color.b = as_type<char>(buffer[bufferOffset + 2]);
2284    color.a = as_type<char>(buffer[bufferOffset + 3]);
2285    return color;
2286}
2287static inline void writeR8G8B8A8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2288{
2289    buffer[bufferOffset] = static_cast<uchar>(color.r);
2290    buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
2291    buffer[bufferOffset + 2] = static_cast<uchar>(color.b);
2292    buffer[bufferOffset + 3] = static_cast<uchar>(color.a);
2293}
2294
2295
2296static inline uint4 readR8G8B8A8_UINT(uint bufferOffset, constant uchar *buffer)
2297{
2298    uint4 color;
2299    color.r = as_type<uchar>(buffer[bufferOffset]);
2300    color.g = as_type<uchar>(buffer[bufferOffset + 1]);
2301    color.b = as_type<uchar>(buffer[bufferOffset + 2]);
2302    color.a = as_type<uchar>(buffer[bufferOffset + 3]);
2303    return color;
2304}
2305static inline void writeR8G8B8A8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2306{
2307    buffer[bufferOffset] = static_cast<uchar>(color.r);
2308    buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
2309    buffer[bufferOffset + 2] = static_cast<uchar>(color.b);
2310    buffer[bufferOffset + 3] = static_cast<uchar>(color.a);
2311}
2312
2313
2314static inline float4 readR16_FLOAT(uint bufferOffset, constant uchar *buffer)
2315{
2316    float4 color;
2317    color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2318    color.g = color.b = 0.0;
2319    color.a = 1.0;
2320    return color;
2321}
2322static inline void writeR16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2323{
2324    shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
2325}
2326
2327template <typename ShortType>
2328static inline float4 readR16_NORM(uint bufferOffset, constant uchar *buffer)
2329{
2330    float4 color;
2331    color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
2332    color.g = color.b = 0.0;
2333    color.a = 1.0;
2334    return color;
2335}
2336
2337
2338
2339template<typename ShortType>
2340static inline void writeR16_NORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2341{
2342    shortToBytes(floatToNormalized<ShortType>(color.r), bufferOffset, buffer);
2343}
2344
2345
2346
2347
2348static inline int4 readR16_SINT(uint bufferOffset, constant uchar *buffer)
2349{
2350    int4 color;
2351    color.r = bytesToShort<short>(buffer, bufferOffset);
2352    color.g = color.b = 0;
2353    color.a = 1;
2354    return color;
2355}
2356static inline void writeR16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2357{
2358    shortToBytes(static_cast<short>(color.r), bufferOffset, buffer);
2359}
2360
2361
2362static inline uint4 readR16_UINT(uint bufferOffset, constant uchar *buffer)
2363{
2364    uint4 color;
2365    color.r = bytesToShort<ushort>(buffer, bufferOffset);
2366    color.g = color.b = 0;
2367    color.a = 1;
2368    return color;
2369}
2370static inline void writeR16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2371{
2372    shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer);
2373}
2374
2375
2376static inline float4 readA16_FLOAT(uint bufferOffset, constant uchar *buffer)
2377{
2378    float4 color;
2379    color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2380    color.rgb = 0.0;
2381    return color;
2382}
2383static inline void writeA16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2384{
2385    shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset, buffer);
2386}
2387
2388
2389static inline float4 readL16_FLOAT(uint bufferOffset, constant uchar *buffer)
2390{
2391    float4 color;
2392    color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2393    color.a = 1.0;
2394    return color;
2395}
2396static inline void writeL16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2397{
2398    shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
2399}
2400
2401
2402static inline float4 readL16A16_FLOAT(uint bufferOffset, constant uchar *buffer)
2403{
2404    float4 color;
2405    color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2406    color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
2407    return color;
2408}
2409static inline void writeL16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2410{
2411    shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
2412    shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 2, buffer);
2413}
2414
2415
2416static inline float4 readR16G16_FLOAT(uint bufferOffset, constant uchar *buffer)
2417{
2418    float4 color;
2419    color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2420    color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
2421    color.b = 0.0;
2422    color.a = 1.0;
2423    return color;
2424}
2425static inline void writeR16G16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2426{
2427    shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
2428    shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer);
2429}
2430
2431
2432template <typename ShortType>
2433static inline float4 readR16G16_NORM(uint bufferOffset, constant uchar *buffer)
2434{
2435    float4 color;
2436    color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
2437    color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2));
2438    color.b = 0.0;
2439    color.a = 1.0;
2440    return color;
2441}
2442
2443
2444
2445template<typename ShortType>
2446static inline void writeR16G16_NORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2447{
2448    shortToBytes(floatToNormalized<ShortType>(color.r), bufferOffset, buffer);
2449    shortToBytes(floatToNormalized<ShortType>(color.g), bufferOffset + 2, buffer);
2450}
2451
2452
2453
2454
2455static inline int4 readR16G16_SINT(uint bufferOffset, constant uchar *buffer)
2456{
2457    int4 color;
2458    color.r = bytesToShort<short>(buffer, bufferOffset);
2459    color.g = bytesToShort<short>(buffer, bufferOffset + 2);
2460    color.b = 0;
2461    color.a = 1;
2462    return color;
2463}
2464static inline void writeR16G16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2465{
2466    shortToBytes(static_cast<short>(color.r), bufferOffset, buffer);
2467    shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer);
2468}
2469
2470
2471static inline uint4 readR16G16_UINT(uint bufferOffset, constant uchar *buffer)
2472{
2473    uint4 color;
2474    color.r = bytesToShort<ushort>(buffer, bufferOffset);
2475    color.g = bytesToShort<ushort>(buffer, bufferOffset + 2);
2476    color.b = 0;
2477    color.a = 1;
2478    return color;
2479}
2480static inline void writeR16G16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2481{
2482    shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer);
2483    shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer);
2484}
2485
2486
2487static inline float4 readR16G16B16_FLOAT(uint bufferOffset, constant uchar *buffer)
2488{
2489    float4 color;
2490    color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2491    color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
2492    color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4));
2493    color.a = 1.0;
2494    return color;
2495}
2496
2497
2498template <typename ShortType>
2499static inline float4 readR16G16B16_NORM(uint bufferOffset, constant uchar *buffer)
2500{
2501    float4 color;
2502    color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
2503    color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2));
2504    color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4));
2505    color.a = 1.0;
2506    return color;
2507}
2508
2509
2510
2511static inline int4 readR16G16B16_SINT(uint bufferOffset, constant uchar *buffer)
2512{
2513    int4 color;
2514    color.r = bytesToShort<short>(buffer, bufferOffset);
2515    color.g = bytesToShort<short>(buffer, bufferOffset + 2);
2516    color.b = bytesToShort<short>(buffer, bufferOffset + 4);
2517    color.a = 1;
2518    return color;
2519}
2520
2521
2522static inline uint4 readR16G16B16_UINT(uint bufferOffset, constant uchar *buffer)
2523{
2524    uint4 color;
2525    color.r = bytesToShort<ushort>(buffer, bufferOffset);
2526    color.g = bytesToShort<ushort>(buffer, bufferOffset + 2);
2527    color.b = bytesToShort<ushort>(buffer, bufferOffset + 4);
2528    color.a = 1;
2529    return color;
2530}
2531
2532
2533static inline float4 readR16G16B16A16_FLOAT(uint bufferOffset, constant uchar *buffer)
2534{
2535    float4 color;
2536    color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
2537    color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
2538    color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4));
2539    color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 6));
2540    return color;
2541}
2542static inline void writeR16G16B16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2543{
2544    shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
2545    shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer);
2546    shortToBytes(as_type<ushort>(static_cast<half>(color.b)), bufferOffset + 4, buffer);
2547    shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 6, buffer);
2548}
2549
2550
2551template <typename ShortType>
2552static inline float4 readR16G16B16A16_NORM(uint bufferOffset, constant uchar *buffer)
2553{
2554    float4 color;
2555    color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
2556    color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2));
2557    color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4));
2558    color.a = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 6));
2559    return color;
2560}
2561
2562
2563
2564template<typename ShortType>
2565static inline void writeR16G16B16A16_NORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2566{
2567    shortToBytes(floatToNormalized<ShortType>(color.r), bufferOffset, buffer);
2568    shortToBytes(floatToNormalized<ShortType>(color.g), bufferOffset + 2, buffer);
2569    shortToBytes(floatToNormalized<ShortType>(color.b), bufferOffset + 4, buffer);
2570    shortToBytes(floatToNormalized<ShortType>(color.a), bufferOffset + 6, buffer);
2571}
2572
2573
2574
2575
2576static inline int4 readR16G16B16A16_SINT(uint bufferOffset, constant uchar *buffer)
2577{
2578    int4 color;
2579    color.r = bytesToShort<short>(buffer, bufferOffset);
2580    color.g = bytesToShort<short>(buffer, bufferOffset + 2);
2581    color.b = bytesToShort<short>(buffer, bufferOffset + 4);
2582    color.a = bytesToShort<short>(buffer, bufferOffset + 6);
2583    return color;
2584}
2585static inline void writeR16G16B16A16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2586{
2587    shortToBytes(static_cast<short>(color.r), bufferOffset, buffer);
2588    shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer);
2589    shortToBytes(static_cast<short>(color.b), bufferOffset + 4, buffer);
2590    shortToBytes(static_cast<short>(color.a), bufferOffset + 6, buffer);
2591}
2592
2593
2594static inline uint4 readR16G16B16A16_UINT(uint bufferOffset, constant uchar *buffer)
2595{
2596    uint4 color;
2597    color.r = bytesToShort<ushort>(buffer, bufferOffset);
2598    color.g = bytesToShort<ushort>(buffer, bufferOffset + 2);
2599    color.b = bytesToShort<ushort>(buffer, bufferOffset + 4);
2600    color.a = bytesToShort<ushort>(buffer, bufferOffset + 6);
2601    return color;
2602}
2603static inline void writeR16G16B16A16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2604{
2605    shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer);
2606    shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer);
2607    shortToBytes(static_cast<ushort>(color.b), bufferOffset + 4, buffer);
2608    shortToBytes(static_cast<ushort>(color.a), bufferOffset + 6, buffer);
2609}
2610
2611
2612static inline float4 readR32_FLOAT(uint bufferOffset, constant uchar *buffer)
2613{
2614    float4 color;
2615    color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2616    color.g = color.b = 0.0;
2617    color.a = 1.0;
2618    return color;
2619}
2620static inline void writeR32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2621{
2622    intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
2623}
2624
2625
2626template <typename IntType>
2627static inline float4 readR32_NORM(uint bufferOffset, constant uchar *buffer)
2628{
2629    float4 color;
2630    color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
2631    color.g = color.b = 0.0;
2632    color.a = 1.0;
2633    return color;
2634}
2635
2636
2637
2638
2639static inline float4 readA32_FLOAT(uint bufferOffset, constant uchar *buffer)
2640{
2641    float4 color;
2642    color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2643    color.rgb = 0.0;
2644    return color;
2645}
2646static inline void writeA32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2647{
2648    intToBytes(as_type<uint>(color.a), bufferOffset, buffer);
2649}
2650
2651
2652static inline float4 readL32_FLOAT(uint bufferOffset, constant uchar *buffer)
2653{
2654    float4 color;
2655    color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2656    color.a = 1.0;
2657    return color;
2658}
2659static inline void writeL32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2660{
2661    intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
2662}
2663
2664
2665static inline int4 readR32_SINT(uint bufferOffset, constant uchar *buffer)
2666{
2667    int4 color;
2668    color.r = bytesToInt<int>(buffer, bufferOffset);
2669    color.g = color.b = 0;
2670    color.a = 1;
2671    return color;
2672}
2673static inline void writeR32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2674{
2675    intToBytes(color.r, bufferOffset, buffer);
2676}
2677
2678
2679static inline float4 readR32_FIXED(uint bufferOffset, constant uchar *buffer)
2680{
2681    float4 color;
2682    constexpr float kDivisor = 1.0f / (1 << 16);
2683    color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
2684    color.g = color.b = 0.0;
2685    color.a = 1.0;
2686    return color;
2687}
2688
2689
2690static inline uint4 readR32_UINT(uint bufferOffset, constant uchar *buffer)
2691{
2692    uint4 color;
2693    color.r = bytesToInt<uint>(buffer, bufferOffset);
2694    color.g = color.b = 0;
2695    color.a = 1;
2696    return color;
2697}
2698static inline void writeR32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2699{
2700    intToBytes(color.r, bufferOffset, buffer);
2701}
2702
2703
2704static inline float4 readL32A32_FLOAT(uint bufferOffset, constant uchar *buffer)
2705{
2706    float4 color;
2707    color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2708    color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
2709    return color;
2710}
2711static inline void writeL32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2712{
2713    intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
2714    intToBytes(as_type<uint>(color.a), bufferOffset + 4, buffer);
2715}
2716
2717
2718static inline float4 readR32G32_FLOAT(uint bufferOffset, constant uchar *buffer)
2719{
2720    float4 color;
2721    color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2722    color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
2723    color.b = 0.0;
2724    color.a = 1.0;
2725    return color;
2726}
2727static inline void writeR32G32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2728{
2729    intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
2730    intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer);
2731}
2732
2733
2734template <typename IntType>
2735static inline float4 readR32G32_NORM(uint bufferOffset, constant uchar *buffer)
2736{
2737    float4 color;
2738    color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
2739    color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4));
2740    color.b = 0.0;
2741    color.a = 1.0;
2742    return color;
2743}
2744
2745
2746
2747
2748static inline int4 readR32G32_SINT(uint bufferOffset, constant uchar *buffer)
2749{
2750    int4 color;
2751    color.r = bytesToInt<int>(buffer, bufferOffset);
2752    color.g = bytesToInt<int>(buffer, bufferOffset + 4);
2753    color.b = 0;
2754    color.a = 1;
2755    return color;
2756}
2757static inline void writeR32G32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2758{
2759    intToBytes(color.r, bufferOffset, buffer);
2760    intToBytes(color.g, bufferOffset + 4, buffer);
2761}
2762
2763
2764static inline float4 readR32G32_FIXED(uint bufferOffset, constant uchar *buffer)
2765{
2766    float4 color;
2767    constexpr float kDivisor = 1.0f / (1 << 16);
2768    color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
2769    color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor;
2770    color.b = 0.0;
2771    color.a = 1.0;
2772    return color;
2773}
2774
2775
2776static inline uint4 readR32G32_UINT(uint bufferOffset, constant uchar *buffer)
2777{
2778    uint4 color;
2779    color.r = bytesToInt<uint>(buffer, bufferOffset);
2780    color.g = bytesToInt<uint>(buffer, bufferOffset + 4);
2781    color.b = 0;
2782    color.a = 1;
2783    return color;
2784}
2785static inline void writeR32G32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2786{
2787    intToBytes(color.r, bufferOffset, buffer);
2788    intToBytes(color.g, bufferOffset + 4, buffer);
2789}
2790
2791
2792static inline float4 readR32G32B32_FLOAT(uint bufferOffset, constant uchar *buffer)
2793{
2794    float4 color;
2795    color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2796    color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
2797    color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8));
2798    color.a = 1.0;
2799    return color;
2800}
2801
2802
2803template <typename IntType>
2804static inline float4 readR32G32B32_NORM(uint bufferOffset, constant uchar *buffer)
2805{
2806    float4 color;
2807    color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
2808    color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4));
2809    color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8));
2810    color.a = 1.0;
2811    return color;
2812}
2813
2814
2815
2816
2817static inline int4 readR32G32B32_SINT(uint bufferOffset, constant uchar *buffer)
2818{
2819    int4 color;
2820    color.r = bytesToInt<int>(buffer, bufferOffset);
2821    color.g = bytesToInt<int>(buffer, bufferOffset + 4);
2822    color.b = bytesToInt<int>(buffer, bufferOffset + 8);
2823    color.a = 1;
2824    return color;
2825}
2826
2827
2828static inline float4 readR32G32B32_FIXED(uint bufferOffset, constant uchar *buffer)
2829{
2830    float4 color;
2831    constexpr float kDivisor = 1.0f / (1 << 16);
2832    color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
2833    color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor;
2834    color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor;
2835    color.a = 1.0;
2836    return color;
2837}
2838
2839
2840static inline uint4 readR32G32B32_UINT(uint bufferOffset, constant uchar *buffer)
2841{
2842    uint4 color;
2843    color.r = bytesToInt<uint>(buffer, bufferOffset);
2844    color.g = bytesToInt<uint>(buffer, bufferOffset + 4);
2845    color.b = bytesToInt<uint>(buffer, bufferOffset + 8);
2846    color.a = 1;
2847    return color;
2848}
2849
2850
2851static inline float4 readR32G32B32A32_FLOAT(uint bufferOffset, constant uchar *buffer)
2852{
2853    float4 color;
2854    color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
2855    color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
2856    color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8));
2857    color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 12));
2858    return color;
2859}
2860static inline void writeR32G32B32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
2861{
2862    intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
2863    intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer);
2864    intToBytes(as_type<uint>(color.b), bufferOffset + 8, buffer);
2865    intToBytes(as_type<uint>(color.a), bufferOffset + 12, buffer);
2866}
2867
2868
2869template <typename IntType>
2870static inline float4 readR32G32B32A32_NORM(uint bufferOffset, constant uchar *buffer)
2871{
2872    float4 color;
2873    color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
2874    color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4));
2875    color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8));
2876    color.a = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 12));
2877    return color;
2878}
2879
2880
2881
2882
2883static inline int4 readR32G32B32A32_SINT(uint bufferOffset, constant uchar *buffer)
2884{
2885    int4 color;
2886    color.r = bytesToInt<int>(buffer, bufferOffset);
2887    color.g = bytesToInt<int>(buffer, bufferOffset + 4);
2888    color.b = bytesToInt<int>(buffer, bufferOffset + 8);
2889    color.a = bytesToInt<int>(buffer, bufferOffset + 12);
2890    return color;
2891}
2892static inline void writeR32G32B32A32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
2893{
2894    intToBytes(color.r, bufferOffset, buffer);
2895    intToBytes(color.g, bufferOffset + 4, buffer);
2896    intToBytes(color.b, bufferOffset + 8, buffer);
2897    intToBytes(color.a, bufferOffset + 12, buffer);
2898}
2899
2900static inline float4 readR32G32B32A32_FIXED(uint bufferOffset, constant uchar *buffer)
2901{
2902    float4 color;
2903    constexpr float kDivisor = 1.0f / (1 << 16);
2904    color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
2905    color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor;
2906    color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor;
2907    color.a = bytesToInt<int>(buffer, bufferOffset + 12) * kDivisor;
2908    return color;
2909}
2910
2911
2912static inline uint4 readR32G32B32A32_UINT(uint bufferOffset, constant uchar *buffer)
2913{
2914    uint4 color;
2915    color.r = bytesToInt<uint>(buffer, bufferOffset);
2916    color.g = bytesToInt<uint>(buffer, bufferOffset + 4);
2917    color.b = bytesToInt<uint>(buffer, bufferOffset + 8);
2918    color.a = bytesToInt<uint>(buffer, bufferOffset + 12);
2919    return color;
2920}
2921static inline void writeR32G32B32A32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
2922{
2923    intToBytes(color.r, bufferOffset, buffer);
2924    intToBytes(color.g, bufferOffset + 4, buffer);
2925    intToBytes(color.b, bufferOffset + 8, buffer);
2926    intToBytes(color.a, bufferOffset + 12, buffer);
2927}
2928# 1320 ".\\copy_buffer.metal"
2929static inline int4 readR8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8_SINT(bufferOffset, buffer); } static inline uint4 readR8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8_UINT(bufferOffset, buffer); } static inline int4 readR8G8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8A8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8A8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_UINT(bufferOffset, buffer); }
2930static inline int4 readR16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16_SINT(bufferOffset, buffer); } static inline uint4 readR16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16_UINT(bufferOffset, buffer); } static inline int4 readR16G16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16A16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16A16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_UINT(bufferOffset, buffer); }
2931static inline int4 readR32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32_SINT(bufferOffset, buffer); } static inline uint4 readR32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32_UINT(bufferOffset, buffer); } static inline int4 readR32G32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32A32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32A32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_UINT(bufferOffset, buffer); }
2932
2933static inline int4 readR10G10B10A2_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_SINT(bufferOffset, buffer); } static inline uint4 readR10G10B10A2_USCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_UINT(bufferOffset, buffer); }
2934
2935
2936kernel void readFromBufferToFloatTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<float, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
2937{
2938    if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; }
2939# 1372 ".\\copy_buffer.metal"
2940    uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2941
2942    switch (kCopyFormatType)
2943    {
2944        case FormatID::R5G6B5_UNORM: { auto color = readR5G6B5_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM: { auto color = readR8G8B8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = readR8G8B8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SNORM: { auto color = readR8G8B8A8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM: { auto color = readB8G8R8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = readB8G8R8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM: { auto color = readR8G8B8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = readR8G8B8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SNORM: { auto color = readR8G8B8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8_UNORM: { auto color = readL8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8A8_UNORM: { auto color = readL8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R5G5B5A1_UNORM: { auto color = readR5G5B5A1_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R4G4B4A4_UNORM: { auto color = readR4G4B4A4_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_UNORM: { auto color = readR8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_SNORM: { auto color = readR8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UNORM: { auto color = readR8G8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SNORM: { auto color = readR8G8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_FLOAT: { auto color = readR16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_SNORM: { auto color = readR16_NORM<short>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_UNORM: { auto color = readR16_NORM<ushort>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A16_FLOAT: { auto color = readA16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16_FLOAT: { auto color = readL16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16A16_FLOAT: { auto color = readL16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_FLOAT: { auto color = readR16G16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_SNORM: { auto color = readR16G16_NORM<short>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_UNORM: { auto color = readR16G16_NORM<ushort>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_FLOAT: { auto color = readR16G16B16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_SNORM: { auto color = readR16G16B16_NORM<short>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_UNORM: { auto color = readR16G16B16_NORM<ushort>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = readR16G16B16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_SNORM: { auto color = readR16G16B16A16_NORM<short>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_UNORM: { auto color = readR16G16B16A16_NORM<ushort>(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_FLOAT: { auto color = readR32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A32_FLOAT: { auto color = readA32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32_FLOAT: { auto color = readL32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32A32_FLOAT: { auto color = readL32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_FLOAT: { auto color = readR32G32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_FLOAT: { auto color = readR32G32B32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = readR32G32B32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break;
2945    }
2946
2947
2948}
2949
2950kernel void readFromBufferToIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<int, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
2951{
2952    if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; }
2953# 1400 ".\\copy_buffer.metal"
2954    uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2955
2956    switch (kCopyFormatType)
2957    {
2958        case FormatID::R8_SINT: { auto color = readR8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SINT: { auto color = readR8G8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SINT: { auto color = readR8G8B8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SINT: { auto color = readR8G8B8A8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_SINT: { auto color = readR16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_SINT: { auto color = readR16G16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_SINT: { auto color = readR16G16B16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_SINT: { auto color = readR16G16B16A16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_SINT: { auto color = readR32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_SINT: { auto color = readR32G32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_SINT: { auto color = readR32G32B32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_SINT: { auto color = readR32G32B32A32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break;
2959    }
2960
2961
2962}
2963
2964kernel void readFromBufferToUIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<uint, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
2965{
2966    if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; }
2967# 1428 ".\\copy_buffer.metal"
2968    uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2969
2970    switch (kCopyFormatType)
2971    {
2972        case FormatID::R8_UINT: { auto color = readR8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UINT: { auto color = readR8G8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UINT: { auto color = readR8G8B8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UINT: { auto color = readR8G8B8A8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_UINT: { auto color = readR16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_UINT: { auto color = readR16G16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_UINT: { auto color = readR16G16B16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_UINT: { auto color = readR16G16B16A16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_UINT: { auto color = readR32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_UINT: { auto color = readR32G32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_UINT: { auto color = readR32G32B32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_UINT: { auto color = readR32G32B32A32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break;
2973    }
2974
2975
2976}
2977
2978
2979kernel void writeFromFloatTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<float, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<float, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]])
2980{
2981    if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; }
2982# 1481 ".\\copy_buffer.metal"
2983    uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2984
2985    switch (kCopyFormatType)
2986    {
2987        case FormatID::R5G6B5_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G6B5_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R5G5B5A1_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G5B5A1_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R4G4B4A4_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR4G4B4A4_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_NORM<short>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_NORM<ushort>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_NORM<short>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_NORM<ushort>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_NORM<short>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_NORM<ushort>(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break;
2988    }
2989
2990
2991}
2992
2993kernel void writeFromIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<int, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<int, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]])
2994{
2995    if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; }
2996# 1506 ".\\copy_buffer.metal"
2997    uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
2998
2999    switch (kCopyFormatType)
3000    {
3001        case FormatID::R8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_SINT(gIndices, options, bufferOffset, color, buffer); } break;
3002    }
3003
3004
3005}
3006
3007kernel void writeFromUIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<uint, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<uint, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]])
3008{
3009    if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; }
3010# 1531 ".\\copy_buffer.metal"
3011    uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
3012
3013    switch (kCopyFormatType)
3014    {
3015        case FormatID::R8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_UINT(gIndices, options, bufferOffset, color, buffer); } break;
3016    }
3017
3018
3019}
3020
3021
3022struct CopyVertexParams
3023{
3024    uint srcBufferStartOffset;
3025    uint srcStride;
3026    uint srcComponentBytes;
3027    uint srcComponents;
3028
3029
3030
3031    uchar4 srcDefaultAlphaData;
3032
3033    uint dstBufferStartOffset;
3034    uint dstStride;
3035    uint dstComponents;
3036
3037    uint vertexCount;
3038};
3039# 1581 ".\\copy_buffer.metal"
3040template <typename IntType>
3041static inline void writeFloatVertex(constant CopyVertexParams &options,
3042                                    uint idx,
3043                                    vec<IntType, 4> data,
3044                                    device uchar *dst)
3045{
3046    uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset;
3047
3048    for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4)
3049    {
3050        floatToBytes(static_cast<float>(data[component]), dstOffset, dst);
3051    }
3052}
3053
3054template <>
3055inline void writeFloatVertex(constant CopyVertexParams &options,
3056                             uint idx,
3057                             vec<float, 4> data,
3058                             device uchar *dst)
3059{
3060    uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset;
3061
3062    for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4)
3063    {
3064        floatToBytes(data[component], dstOffset, dst);
3065    }
3066}
3067
3068
3069static inline void convertToFloatVertexFormat(uint index,
3070                                              constant CopyVertexParams &options,
3071                                              constant uchar *srcBuffer,
3072                                              device uchar *dstBuffer)
3073{
3074# 1627 ".\\copy_buffer.metal"
3075    uint bufferOffset = options.srcBufferStartOffset + options.srcStride * index;
3076# 1636 ".\\copy_buffer.metal"
3077    switch (kCopyFormatType)
3078    {
3079        case FormatID::R8_UNORM: { auto data = readR8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SNORM: { auto data = readR8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_UINT: { auto data = readR8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SINT: { auto data = readR8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_USCALED: { auto data = readR8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SSCALED: { auto data = readR8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UNORM: { auto data = readR8G8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SNORM: { auto data = readR8G8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UINT: { auto data = readR8G8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SINT: { auto data = readR8G8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_USCALED: { auto data = readR8G8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SSCALED: { auto data = readR8G8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UNORM: { auto data = readR8G8B8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SNORM: { auto data = readR8G8B8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UINT: { auto data = readR8G8B8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SINT: { auto data = readR8G8B8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_USCALED: { auto data = readR8G8B8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SSCALED: { auto data = readR8G8B8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UNORM: { auto data = readR8G8B8A8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SNORM: { auto data = readR8G8B8A8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UINT: { auto data = readR8G8B8A8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SINT: { auto data = readR8G8B8A8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_USCALED: { auto data = readR8G8B8A8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SSCALED: { auto data = readR8G8B8A8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UNORM: { auto data = readR16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SNORM: { auto data = readR16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UINT: { auto data = readR16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SINT: { auto data = readR16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_USCALED: { auto data = readR16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SSCALED: { auto data = readR16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UNORM: { auto data = readR16G16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SNORM: { auto data = readR16G16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UINT: { auto data = readR16G16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SINT: { auto data = readR16G16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_USCALED: { auto data = readR16G16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SSCALED: { auto data = readR16G16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UNORM: { auto data = readR16G16B16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SNORM: { auto data = readR16G16B16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UINT: { auto data = readR16G16B16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SINT: { auto data = readR16G16B16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_USCALED: { auto data = readR16G16B16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SSCALED: { auto data = readR16G16B16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UNORM: { auto data = readR16G16B16A16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SNORM: { auto data = readR16G16B16A16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UINT: { auto data = readR16G16B16A16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SINT: { auto data = readR16G16B16A16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_USCALED: { auto data = readR16G16B16A16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SSCALED: { auto data = readR16G16B16A16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UNORM: { auto data = readR32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SNORM: { auto data = readR32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UINT: { auto data = readR32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SINT: { auto data = readR32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_USCALED: { auto data = readR32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SSCALED: { auto data = readR32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UNORM: { auto data = readR32G32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SNORM: { auto data = readR32G32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UINT: { auto data = readR32G32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SINT: { auto data = readR32G32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_USCALED: { auto data = readR32G32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SSCALED: { auto data = readR32G32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UNORM: { auto data = readR32G32B32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SNORM: { auto data = readR32G32B32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UINT: { auto data = readR32G32B32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SINT: { auto data = readR32G32B32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_USCALED: { auto data = readR32G32B32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SSCALED: { auto data = readR32G32B32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UNORM: { auto data = readR32G32B32A32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SNORM: { auto data = readR32G32B32A32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UINT: { auto data = readR32G32B32A32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SINT: { auto data = readR32G32B32A32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_USCALED: { auto data = readR32G32B32A32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SSCALED: { auto data = readR32G32B32A32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_FLOAT: { auto data = readR16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_FLOAT: { auto data = readR16G16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_FLOAT: { auto data = readR16G16B16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto data = readR16G16B16A16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FLOAT: { auto data = readR32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FLOAT: { auto data = readR32G32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FLOAT: { auto data = readR32G32B32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto data = readR32G32B32A32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FIXED: { auto data = readR32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FIXED: { auto data = readR32G32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FIXED: { auto data = readR32G32B32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FIXED: { auto data = readR32G32B32A32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SINT: { auto data = readR10G10B10A2_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_UINT: { auto data = readR10G10B10A2_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SSCALED: { auto data = readR10G10B10A2_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_USCALED: { auto data = readR10G10B10A2_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break;
3080    }
3081
3082
3083}
3084
3085
3086kernel void convertToFloatVertexFormatCS(uint index [[thread_position_in_grid]],
3087                                         constant CopyVertexParams &options [[buffer(0)]],
3088                                         constant uchar *srcBuffer [[buffer(1)]],
3089                                         device uchar *dstBuffer [[buffer(2)]])
3090{
3091    if (index >= options.vertexCount) { return; };
3092    convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer);
3093}
3094
3095
3096vertex void convertToFloatVertexFormatVS(uint index [[vertex_id]],
3097                                         constant CopyVertexParams &options [[buffer(0)]],
3098                                         constant uchar *srcBuffer [[buffer(1)]],
3099                                         device uchar *dstBuffer [[buffer(2)]])
3100{
3101    convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer);
3102}
3103
3104
3105static inline void expandVertexFormatComponents(uint index,
3106                                                constant CopyVertexParams &options,
3107                                                constant uchar *srcBuffer,
3108                                                device uchar *dstBuffer)
3109{
3110    uint srcOffset = options.srcBufferStartOffset + options.srcStride * index;
3111    uint dstOffset = options.dstBufferStartOffset + options.dstStride * index;
3112
3113    uint dstComponentsBeforeAlpha = min(options.dstComponents, 3u);
3114    uint component;
3115    for (component = 0; component < options.srcComponents; ++component,
3116        srcOffset += options.srcComponentBytes, dstOffset += options.srcComponentBytes)
3117    {
3118        for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
3119        {
3120            dstBuffer[dstOffset + byte] = srcBuffer[srcOffset + byte];
3121        }
3122    }
3123
3124    for (; component < dstComponentsBeforeAlpha;
3125         ++component, dstOffset += options.srcComponentBytes)
3126    {
3127        for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
3128        {
3129            dstBuffer[dstOffset + byte] = 0;
3130        }
3131    }
3132
3133    if (component < options.dstComponents)
3134    {
3135
3136        for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
3137        {
3138            dstBuffer[dstOffset + byte] = options.srcDefaultAlphaData[byte];
3139        }
3140    }
3141}
3142
3143
3144kernel void expandVertexFormatComponentsCS(uint index [[thread_position_in_grid]],
3145                                           constant CopyVertexParams &options [[buffer(0)]],
3146                                           constant uchar *srcBuffer [[buffer(1)]],
3147                                           device uchar *dstBuffer [[buffer(2)]])
3148{
3149    if (index >= options.vertexCount) { return; };
3150
3151    expandVertexFormatComponents(index, options, srcBuffer, dstBuffer);
3152}
3153
3154
3155vertex void expandVertexFormatComponentsVS(uint index [[vertex_id]],
3156                                           constant CopyVertexParams &options [[buffer(0)]],
3157                                           constant uchar *srcBuffer [[buffer(1)]],
3158                                           device uchar *dstBuffer [[buffer(2)]])
3159{
3160    expandVertexFormatComponents(index, options, srcBuffer, dstBuffer);
3161}
3162
3163
3164kernel void linearizeBlocks(ushort2 position [[thread_position_in_grid]],
3165                            constant uint2 *dimensions [[buffer(0)]],
3166                            constant uint2 *srcBuffer [[buffer(1)]],
3167                            device uint2 *dstBuffer [[buffer(2)]])
3168{
3169    if (any(uint2(position) >= *dimensions))
3170    {
3171        return;
3172    }
3173    uint2 t = uint2(position);
3174    t = (t | (t << 8)) & 0x00FF00FF;
3175    t = (t | (t << 4)) & 0x0F0F0F0F;
3176    t = (t | (t << 2)) & 0x33333333;
3177    t = (t | (t << 1)) & 0x55555555;
3178    dstBuffer[position.y * (*dimensions).x + position.x] = srcBuffer[(t.x << 1) | t.y];
3179}
3180
3181
3182kernel void saturateDepth(uint2 position [[thread_position_in_grid]],
3183                          constant uint3 *dimensions [[buffer(0)]],
3184                          device float *srcBuffer [[buffer(1)]],
3185                          device float *dstBuffer [[buffer(2)]])
3186{
3187    if (any(position >= (*dimensions).xy))
3188    {
3189        return;
3190    }
3191    const uint srcOffset = position.y * (*dimensions).z + position.x;
3192    const uint dstOffset = position.y * (*dimensions).x + position.x;
3193    dstBuffer[dstOffset] = saturate(srcBuffer[srcOffset]);
3194}
3195# 6 "temp_master_source.metal" 2
3196# 1 ".\\visibility.metal" 1
3197
3198
3199
3200
3201
3202
3203
3204
3205constant bool kCombineWithExistingResult [[function_constant(1000)]];
3206
3207
3208
3209struct CombineVisibilityResultOptions
3210{
3211
3212    uint startOffset;
3213
3214    uint numOffsets;
3215};
3216
3217kernel void combineVisibilityResult(uint idx [[thread_position_in_grid]],
3218                                    constant CombineVisibilityResultOptions &options [[buffer(0)]],
3219                                    constant ushort4 *renderpassVisibilityResult [[buffer(1)]],
3220                                    device ushort4 *finalResults [[buffer(2)]])
3221{
3222    if (idx > 0)
3223    {
3224
3225
3226
3227        return;
3228    }
3229    ushort4 finalResult16x4;
3230
3231    if (kCombineWithExistingResult)
3232    {
3233        finalResult16x4 = finalResults[0];
3234    }
3235    else
3236    {
3237        finalResult16x4 = ushort4(0, 0, 0, 0);
3238    }
3239
3240    for (uint i = 0; i < options.numOffsets; ++i)
3241    {
3242        uint offset = options.startOffset + i;
3243        ushort4 renderpassResult = renderpassVisibilityResult[offset];
3244
3245
3246        finalResult16x4 = finalResult16x4 | renderpassResult;
3247    }
3248    finalResults[0] = finalResult16x4;
3249}
3250# 7 "temp_master_source.metal" 2
3251# 1 ".\\rewrite_indices.metal" 1
3252# 11 ".\\rewrite_indices.metal"
3253# 1 ".\\rewrite_indices_shared.h" 1
3254# 12 ".\\rewrite_indices.metal" 2
3255using namespace metal;
3256
3257constant uint fixIndexBufferKey [[ function_constant(2000) ]];
3258constant bool indexBufferIsUint16 = (((fixIndexBufferKey >> 0U) & 0x03U) == 2U);
3259constant bool indexBufferIsUint32 = (((fixIndexBufferKey >> 0U) & 0x03U) == 3U);
3260constant bool outIndexBufferIsUint16 = (((fixIndexBufferKey >> 2U) & 0x03U) == 2U);
3261constant bool outIndexBufferIsUint32 = (((fixIndexBufferKey >> 2U) & 0x03U) == 3U);
3262constant bool doPrimRestart = (fixIndexBufferKey & 0x00100U);
3263constant uint fixIndexBufferMode = (fixIndexBufferKey >> 4U) & 0x0FU;
3264
3265
3266static inline uint readIdx(
3267                           const device ushort *indexBufferUint16,
3268                           const device uint *indexBufferUint32,
3269                           const uint restartIndex,
3270                           const uint indexCount,
3271                           uint idx,
3272                           thread bool &foundRestart,
3273                           thread uint &indexThatRestartedFirst
3274                           )
3275{
3276    uint inIndex = idx;
3277    if(inIndex < indexCount)
3278    {
3279        if(indexBufferIsUint16)
3280        {
3281            inIndex = indexBufferUint16[inIndex];
3282        }
3283        else if(indexBufferIsUint32)
3284        {
3285            inIndex = indexBufferUint32[inIndex];
3286        }
3287    }
3288    else
3289    {
3290        foundRestart = true;
3291        indexThatRestartedFirst = idx;
3292    }
3293    if(doPrimRestart && !foundRestart && inIndex == restartIndex)
3294    {
3295        foundRestart = true;
3296        indexThatRestartedFirst = idx;
3297    }
3298    return inIndex;
3299}
3300
3301static inline void outputPrimitive(
3302                                   const device ushort *indexBufferUint16,
3303                                   const device uint *indexBufferUint32,
3304                                   device ushort *outIndexBufferUint16,
3305                                   device uint *outIndexBufferUint32,
3306                                   const uint restartIndex,
3307                                   const uint indexCount,
3308                                   thread uint &baseIndex,
3309                                   uint onIndex,
3310                                   thread uint &onOutIndex
3311                                   )
3312{
3313    if(baseIndex > onIndex) return;
3314    bool foundRestart = false;
3315    uint indexThatRestartedFirst = 0;
3316# 86 ".\\rewrite_indices.metal"
3317    switch(fixIndexBufferMode)
3318    {
3319        case 0x00U:
3320        {
3321            auto tmpIndex = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex, foundRestart, indexThatRestartedFirst);
3322            if(foundRestart)
3323            {
3324                baseIndex = indexThatRestartedFirst + 1;
3325                return;
3326            }
3327
3328            ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex; } onOutIndex++; });
3329        }
3330        break;
3331        case 0x01U:
3332        {
3333            auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst);
3334            auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst);
3335            if(foundRestart)
3336            {
3337                baseIndex = indexThatRestartedFirst + 1;
3338                return;
3339            }
3340            if((onIndex - baseIndex) & 1) return;
3341
3342            if(fixIndexBufferKey & 0x00200U)
3343            {
3344                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3345                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3346            }
3347            else
3348            {
3349                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3350                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3351            }
3352        }
3353        break;
3354        case 0x03U:
3355        {
3356            auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst);
3357            auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst);
3358            if(foundRestart)
3359            {
3360                baseIndex = indexThatRestartedFirst + 1;
3361                return;
3362            }
3363
3364            if(fixIndexBufferKey & 0x00200U)
3365            {
3366                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3367                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3368            }
3369            else
3370            {
3371                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3372                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3373            }
3374        }
3375        break;
3376        case 0x04U:
3377        {
3378            auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst);
3379            auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst);
3380            auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst);
3381            if(foundRestart)
3382            {
3383                baseIndex = indexThatRestartedFirst + 1;
3384                return;
3385            }
3386            if(((onIndex - baseIndex) % 3) != 0) return;
3387
3388            if(fixIndexBufferKey & 0x00200U)
3389            {
3390                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
3391                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3392                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3393            }
3394            else
3395            {
3396                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3397                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3398                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
3399            }
3400        }
3401        break;
3402        case 0x05U:
3403        {
3404            uint isOdd = ((onIndex - baseIndex) & 1);
3405            auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0 + isOdd, foundRestart, indexThatRestartedFirst);
3406            auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1 - isOdd, foundRestart, indexThatRestartedFirst);
3407            auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst);
3408            if(foundRestart)
3409            {
3410                baseIndex = indexThatRestartedFirst + 1;
3411                return;
3412            }
3413
3414            if(fixIndexBufferKey & 0x00200U)
3415            {
3416                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
3417                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3418                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3419            }
3420            else
3421            {
3422
3423                if(isOdd)
3424                {
3425                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3426                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
3427                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3428                }
3429                else
3430                {
3431                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
3432                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
3433                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
3434                }
3435            }
3436
3437            assert(onOutIndex <= (onIndex + 1) * 3);
3438            assert(onOutIndex <= (indexCount - 2) * 3);
3439        }
3440        break;
3441
3442    }
3443
3444
3445}
3446
3447kernel void fixIndexBuffer(
3448                           const device ushort *indexBufferUint16 [[ buffer(0), function_constant(indexBufferIsUint16) ]],
3449                           const device uint *indexBufferUint32 [[ buffer(0), function_constant(indexBufferIsUint32) ]],
3450                           device ushort *outIndexBufferUint16 [[ buffer(1), function_constant(outIndexBufferIsUint16) ]],
3451                           device uint *outIndexBufferUint32 [[ buffer(1), function_constant(outIndexBufferIsUint32) ]],
3452                           constant uint &indexCount [[ buffer(2) ]],
3453                           constant uint &primCount [[ buffer(3) ]],
3454                           uint prim [[thread_position_in_grid]])
3455{
3456    constexpr uint restartIndex = 0xFFFFFFFF;
3457    uint baseIndex = 0;
3458    uint onIndex = onIndex;
3459    uint onOutIndex = onOutIndex;
3460    if(prim < primCount)
3461    {
3462        switch(fixIndexBufferMode)
3463        {
3464            case 0x00U:
3465                onIndex = prim;
3466                onOutIndex = prim;
3467                break;
3468            case 0x01U:
3469                onIndex = prim * 2;
3470                onOutIndex = prim * 2;
3471                break;
3472            case 0x03U:
3473                onIndex = prim;
3474                onOutIndex = prim * 2;
3475                break;
3476            case 0x04U:
3477                onIndex = prim * 3;
3478                onOutIndex = prim * 3;
3479                break;
3480            case 0x05U:
3481                onIndex = prim;
3482                onOutIndex = prim * 3;
3483                break;
3484        }
3485        outputPrimitive(indexBufferUint16, indexBufferUint32, outIndexBufferUint16, outIndexBufferUint32, restartIndex, indexCount, baseIndex, onIndex, onOutIndex);
3486    }
3487}
3488
3489
3490
3491static inline void generatePrimitive(
3492                                   device ushort *outIndexBufferUint16,
3493                                   device uint *outIndexBufferUint32,
3494                                   const uint firstVertex,
3495                                   const uint indexCount,
3496                                   thread uint &baseIndex,
3497                                   uint onIndex,
3498                                   uint primCount,
3499                                   thread uint &onOutIndex
3500                                   )
3501{
3502    if(baseIndex > onIndex) return;
3503# 284 ".\\rewrite_indices.metal"
3504    switch(fixIndexBufferMode)
3505    {
3506        case 0x00U:
3507        {
3508            ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = onIndex + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = onIndex + firstVertex; } onOutIndex++; });
3509        }
3510        break;
3511        case 0x01U:
3512        {
3513            auto tmpIndex0 = onIndex + 0;
3514            auto tmpIndex1 = onIndex + 1;
3515            if(fixIndexBufferKey & 0x00200U)
3516            {
3517                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3518                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3519            }
3520            else
3521            {
3522                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3523                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3524            }
3525        }
3526        break;
3527        case 0x02U:
3528        {
3529            auto tmpIndex0 = onIndex + 0;
3530            auto tmpIndex1 = (onIndex + 1) % primCount;
3531            if(fixIndexBufferKey & 0x00200U)
3532            {
3533                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3534                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3535            }
3536            else
3537            {
3538                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3539                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3540            }
3541        }
3542        break;
3543        case 0x03U:
3544        {
3545            auto tmpIndex0 = onIndex + 0;
3546            auto tmpIndex1 = onIndex + 1;
3547            if(fixIndexBufferKey & 0x00200U)
3548            {
3549                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3550                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3551            }
3552            else
3553            {
3554                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3555                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3556            }
3557        }
3558        break;
3559        case 0x04U:
3560        {
3561            auto tmpIndex0 = onIndex + 0;
3562            auto tmpIndex1 = onIndex + 1;
3563            auto tmpIndex2 = onIndex + 2;
3564            if(fixIndexBufferKey & 0x00200U)
3565            {
3566                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3567                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3568                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3569            }
3570            else
3571            {
3572                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3573                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3574                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3575            }
3576        }
3577        break;
3578        case 0x05U:
3579        {
3580            uint isOdd = ((onIndex - baseIndex) & 1);
3581            auto tmpIndex0 = onIndex + 0 + isOdd;
3582            auto tmpIndex1 = onIndex + 1 - isOdd;
3583            auto tmpIndex2 = onIndex + 2;
3584            if(fixIndexBufferKey & 0x00200U)
3585            {
3586                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3587                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3588                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3589            }
3590            else
3591            {
3592                if(isOdd)
3593                {
3594                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3595                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3596                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3597                }
3598                else
3599                {
3600                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3601                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3602                    ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3603                }
3604            }
3605
3606            assert(onOutIndex <= (onIndex + 1) * 3);
3607            assert(onOutIndex <= (indexCount - 2) * 3);
3608            break;
3609        }
3610        case 0x06U:
3611        {
3612            auto tmpIndex0 = 0;
3613            auto tmpIndex1 = onIndex + 1;
3614            auto tmpIndex2 = onIndex + 2;
3615
3616            if(fixIndexBufferKey & 0x00200U)
3617            {
3618                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3619                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3620                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3621            }
3622            else
3623            {
3624                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1 + firstVertex; } onOutIndex++; });
3625                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2 + firstVertex; } onOutIndex++; });
3626                ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0 + firstVertex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0 + firstVertex; } onOutIndex++; });
3627            }
3628        }
3629        break;
3630
3631    }
3632
3633}
3634
3635
3636
3637kernel void genIndexBuffer(
3638                           device ushort *outIndexBufferUint16 [[ buffer(1), function_constant(outIndexBufferIsUint16) ]],
3639                           device uint *outIndexBufferUint32 [[ buffer(1), function_constant(outIndexBufferIsUint32) ]],
3640                           constant uint &indexCount [[ buffer(2) ]],
3641                           constant uint &primCount [[ buffer(3) ]],
3642                           constant uint &firstVertex [[ buffer(4) ]],
3643                           uint prim [[thread_position_in_grid]])
3644{
3645    uint baseIndex = 0;
3646    uint onIndex = onIndex;
3647    uint onOutIndex = onOutIndex;
3648    if(prim < primCount)
3649    {
3650        switch(fixIndexBufferMode)
3651        {
3652            case 0x00U:
3653                onIndex = prim;
3654                onOutIndex = prim;
3655                break;
3656            case 0x01U:
3657                onIndex = prim * 2;
3658                onOutIndex = prim * 2;
3659                break;
3660            case 0x03U:
3661                onIndex = prim;
3662                onOutIndex = prim * 2;
3663                break;
3664            case 0x02U:
3665                onIndex = prim;
3666                onOutIndex = prim * 2;
3667                break;
3668            case 0x04U:
3669                onIndex = prim * 3;
3670                onOutIndex = prim * 3;
3671                break;
3672            case 0x05U:
3673                onIndex = prim;
3674                onOutIndex = prim * 3;
3675                break;
3676            case 0x06U:
3677                onIndex = prim;
3678                onOutIndex = prim * 3;
3679                break;
3680        }
3681        generatePrimitive(outIndexBufferUint16, outIndexBufferUint32, firstVertex, indexCount, baseIndex, onIndex, primCount, onOutIndex);
3682    }
3683}
3684# 8 "temp_master_source.metal" 2
3685
3686
3687