xref: /aosp_15_r20/external/angle/src/libANGLE/renderer/metal/shaders/copy_buffer.metal (revision 8975f5c5ed3d1c378011245431ada316dfb6f244)
1//
2// Copyright 2020 The ANGLE Project. All rights reserved.
3// Use of this source code is governed by a BSD-style license that can be
4// found in the LICENSE file.
5//
6// copy_buffer.metal: implements compute shader that copy formatted data from buffer to texture,
7// from texture to buffer and from buffer to buffer.
8// NOTE(hqle): This file is a bit hard to read but there are a lot of repeated works, and it would
9// be a pain to implement without the use of macros.
10//
11
12@@#include <metal_pack>
13
14#include "common.h"
15#include "format_autogen.h"
16
17using namespace rx::mtl_shader;
18
19constant int kCopyFormatType [[function_constant(10)]];
20
21/* -------- copy pixel data between buffer and texture ---------*/
22constant int kCopyTextureType [[function_constant(20)]];
23constant bool kCopyTextureType2D      = kCopyTextureType == kTextureType2D;
24constant bool kCopyTextureType2DArray = kCopyTextureType == kTextureType2DArray;
25constant bool kCopyTextureType2DMS    = kCopyTextureType == kTextureType2DMultisample;
26constant bool kCopyTextureTypeCube    = kCopyTextureType == kTextureTypeCube;
27constant bool kCopyTextureType3D      = kCopyTextureType == kTextureType3D;
28
29struct CopyPixelParams
30{
31    uint3 copySize;
32    uint3 textureOffset;
33
34    uint bufferStartOffset;
35    uint pixelSize;
36    uint bufferRowPitch;
37    uint bufferDepthPitch;
38};
39
40struct WritePixelParams
41{
42    uint2 copySize;
43    uint2 textureOffset;
44
45    uint bufferStartOffset;
46
47    uint pixelSize;
48    uint bufferRowPitch;
49
50    uint textureLevel;
51    uint textureLayer;
52
53    bool reverseTextureRowOrder;
54};
55
56// clang-format off
57#define TEXTURE_PARAMS(TYPE, ACCESS, NAME_PREFIX)               \
58    texture2d<TYPE, ACCESS> NAME_PREFIX##Texture2d              \
59    [[texture(0), function_constant(kCopyTextureType2D)]],      \
60    texture2d_array<TYPE, ACCESS> NAME_PREFIX##Texture2dArray   \
61    [[texture(0), function_constant(kCopyTextureType2DArray)]], \
62    texture3d<TYPE, ACCESS> NAME_PREFIX##Texture3d              \
63    [[texture(0), function_constant(kCopyTextureType3D)]],      \
64    texturecube<TYPE, ACCESS> NAME_PREFIX##TextureCube          \
65    [[texture(0), function_constant(kCopyTextureTypeCube)]]
66
67#define FORWARD_TEXTURE_PARAMS(NAME_PREFIX) \
68    NAME_PREFIX##Texture2d,                 \
69    NAME_PREFIX##Texture2dArray,            \
70    NAME_PREFIX##Texture3d,                 \
71    NAME_PREFIX##TextureCube
72
73// Params for reading from buffer to texture
74#define DEST_TEXTURE_PARAMS(TYPE)  TEXTURE_PARAMS(TYPE, access::write, dst)
75#define FORWARD_DEST_TEXTURE_PARAMS FORWARD_TEXTURE_PARAMS(dst)
76
77#define COMMON_READ_KERNEL_PARAMS(TEXTURE_TYPE)     \
78    ushort3 gIndices [[thread_position_in_grid]],   \
79    constant CopyPixelParams &options[[buffer(0)]], \
80    constant uchar *buffer [[buffer(1)]],           \
81    DEST_TEXTURE_PARAMS(TEXTURE_TYPE)
82
83#define COMMON_READ_FUNC_PARAMS        \
84    uint bufferOffset,                 \
85    constant uchar *buffer
86
87#define FORWARD_COMMON_READ_FUNC_PARAMS bufferOffset, buffer
88
89// Params for writing to buffer by coping from texture.
90// (NOTE: it has additional multisample source texture parameter)
91#define SRC_TEXTURE_PARAMS(TYPE)                             \
92    TEXTURE_PARAMS(TYPE, access::read, src),                 \
93    texture2d_ms<TYPE, access::read> srcTexture2dMS          \
94    [[texture(0), function_constant(kCopyTextureType2DMS)]]  \
95
96#define FORWARD_SRC_TEXTURE_PARAMS FORWARD_TEXTURE_PARAMS(src), srcTexture2dMS
97
98#define COMMON_WRITE_KERNEL_PARAMS(TEXTURE_TYPE)     \
99    ushort2 gIndices [[thread_position_in_grid]],    \
100    constant WritePixelParams &options[[buffer(0)]], \
101    SRC_TEXTURE_PARAMS(TEXTURE_TYPE),                \
102    device uchar *buffer [[buffer(1)]]               \
103
104#define COMMON_WRITE_FUNC_PARAMS(TYPE) \
105    ushort2 gIndices,                  \
106    constant WritePixelParams &options,\
107    uint bufferOffset,                 \
108    vec<TYPE, 4> color,                \
109    device uchar *buffer               \
110
111#define COMMON_WRITE_FLOAT_FUNC_PARAMS COMMON_WRITE_FUNC_PARAMS(float)
112#define COMMON_WRITE_SINT_FUNC_PARAMS COMMON_WRITE_FUNC_PARAMS(int)
113#define COMMON_WRITE_UINT_FUNC_PARAMS COMMON_WRITE_FUNC_PARAMS(uint)
114
115#define FORWARD_COMMON_WRITE_FUNC_PARAMS gIndices, options, bufferOffset, color, buffer
116
117// clang-format on
118
119// Write to texture code based on texture type:
120template <typename T>
121static inline void textureWrite(ushort3 gIndices,
122                                constant CopyPixelParams &options,
123                                vec<T, 4> color,
124                                DEST_TEXTURE_PARAMS(T))
125{
126    uint3 writeIndices = options.textureOffset + uint3(gIndices);
127    switch (kCopyTextureType)
128    {
129        case kTextureType2D:
130            dstTexture2d.write(color, writeIndices.xy);
131            break;
132        case kTextureType2DArray:
133            dstTexture2dArray.write(color, writeIndices.xy, writeIndices.z);
134            break;
135        case kTextureType3D:
136            dstTexture3d.write(color, writeIndices);
137            break;
138        case kTextureTypeCube:
139            dstTextureCube.write(color, writeIndices.xy, writeIndices.z);
140            break;
141    }
142}
143
144// Read from texture code based on texture type:
145template <typename T>
146static inline vec<T, 4> textureRead(ushort2 gIndices,
147                                    constant WritePixelParams &options,
148                                    SRC_TEXTURE_PARAMS(T))
149{
150    vec<T, 4> color;
151    uint2 coords = uint2(gIndices);
152    if (options.reverseTextureRowOrder)
153    {
154        coords.y = options.copySize.y - 1 - gIndices.y;
155    }
156    coords += options.textureOffset;
157    switch (kCopyTextureType)
158    {
159        case kTextureType2D:
160            color = srcTexture2d.read(coords.xy, options.textureLevel);
161            break;
162        case kTextureType2DArray:
163            color = srcTexture2dArray.read(coords.xy, options.textureLayer, options.textureLevel);
164            break;
165        case kTextureType2DMultisample:
166            color = resolveTextureMS(srcTexture2dMS, coords.xy);
167            break;
168        case kTextureType3D:
169            color = srcTexture3d.read(uint3(coords, options.textureLayer), options.textureLevel);
170            break;
171        case kTextureTypeCube:
172            color = srcTextureCube.read(coords.xy, options.textureLayer, options.textureLevel);
173            break;
174    }
175    return color;
176}
177
178// Calculate offset into buffer:
179#define CALC_BUFFER_READ_OFFSET(pixelSize)                               \
180    options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + \
181                                 gIndices.y * options.bufferRowPitch + gIndices.x * pixelSize)
182
183#define CALC_BUFFER_WRITE_OFFSET(pixelSize) \
184    options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * pixelSize)
185
186// Per format handling code:
187#define READ_FORMAT_SWITCH_CASE(format)                                      \
188    case FormatID::format: {                                                 \
189        auto color = read##format(FORWARD_COMMON_READ_FUNC_PARAMS);          \
190        textureWrite(gIndices, options, color, FORWARD_DEST_TEXTURE_PARAMS); \
191    }                                                                        \
192    break;
193
194#define WRITE_FORMAT_SWITCH_CASE(format)                                         \
195    case FormatID::format: {                                                     \
196        auto color = textureRead(gIndices, options, FORWARD_SRC_TEXTURE_PARAMS); \
197        write##format(FORWARD_COMMON_WRITE_FUNC_PARAMS);                         \
198    }                                                                            \
199    break;
200
201#define READ_KERNEL_GUARD                                                       \
202    if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || \
203        gIndices.z >= options.copySize.z)                                       \
204    {                                                                           \
205        return;                                                                 \
206    }
207
208#define WRITE_KERNEL_GUARD                                                    \
209    if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) \
210    {                                                                         \
211        return;                                                               \
212    }
213
214// R5G6B5
215static inline float4 readR5G6B5_UNORM(COMMON_READ_FUNC_PARAMS)
216{
217    float4 color;
218    ushort src = bytesToShort<ushort>(buffer, bufferOffset);
219
220    color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src));
221    color.g = normalizedToFloat<6>(getShiftedData<6, 5>(src));
222    color.b = normalizedToFloat<5>(getShiftedData<5, 0>(src));
223    color.a = 1.0;
224    return color;
225}
226static inline void writeR5G6B5_UNORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
227{
228    ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) |
229                 shiftData<6, 5>(floatToNormalized<6, ushort>(color.g)) |
230                 shiftData<5, 0>(floatToNormalized<5, ushort>(color.b));
231
232    shortToBytes(dst, bufferOffset, buffer);
233}
234
235// R4G4B4A4
236static inline float4 readR4G4B4A4_UNORM(COMMON_READ_FUNC_PARAMS)
237{
238    float4 color;
239    ushort src = bytesToShort<ushort>(buffer, bufferOffset);
240
241    color.r = normalizedToFloat<4>(getShiftedData<4, 12>(src));
242    color.g = normalizedToFloat<4>(getShiftedData<4, 8>(src));
243    color.b = normalizedToFloat<4>(getShiftedData<4, 4>(src));
244    color.a = normalizedToFloat<4>(getShiftedData<4, 0>(src));
245    return color;
246}
247static inline void writeR4G4B4A4_UNORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
248{
249    ushort dst = shiftData<4, 12>(floatToNormalized<4, ushort>(color.r)) |
250                 shiftData<4, 8>(floatToNormalized<4, ushort>(color.g)) |
251                 shiftData<4, 4>(floatToNormalized<4, ushort>(color.b)) |
252                 shiftData<4, 0>(floatToNormalized<4, ushort>(color.a));
253    ;
254
255    shortToBytes(dst, bufferOffset, buffer);
256}
257
258// R5G5B5A1
259static inline float4 readR5G5B5A1_UNORM(COMMON_READ_FUNC_PARAMS)
260{
261    float4 color;
262    ushort src = bytesToShort<ushort>(buffer, bufferOffset);
263
264    color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src));
265    color.g = normalizedToFloat<5>(getShiftedData<5, 6>(src));
266    color.b = normalizedToFloat<5>(getShiftedData<5, 1>(src));
267    color.a = normalizedToFloat<1>(getShiftedData<1, 0>(src));
268    return color;
269}
270static inline void writeR5G5B5A1_UNORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
271{
272    ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) |
273                 shiftData<5, 6>(floatToNormalized<5, ushort>(color.g)) |
274                 shiftData<5, 1>(floatToNormalized<5, ushort>(color.b)) |
275                 shiftData<1, 0>(floatToNormalized<1, ushort>(color.a));
276    ;
277
278    shortToBytes(dst, bufferOffset, buffer);
279}
280
281// R10G10B10A2_SINT
282static inline int4 readR10G10B10A2_SINT(COMMON_READ_FUNC_PARAMS)
283{
284    int4 color;
285    int src = bytesToInt<int>(buffer, bufferOffset);
286
287    constexpr int3 rgbSignMask(0x200);        // 1 set at the 9 bit
288    constexpr int3 negativeMask(0xfffffc00);  // All bits from 10 to 31 set to 1
289    constexpr int alphaSignMask = 0x2;
290    constexpr int alphaNegMask  = 0xfffffffc;
291
292    color.r = getShiftedData<10, 0>(src);
293    color.g = getShiftedData<10, 10>(src);
294    color.b = getShiftedData<10, 20>(src);
295
296    int3 isRgbNegative = (color.rgb & rgbSignMask) >> 9;
297    color.rgb          = (isRgbNegative * negativeMask) | color.rgb;
298
299    color.a             = getShiftedData<2, 30>(src);
300    int isAlphaNegative = color.a & alphaSignMask >> 1;
301    color.a             = (isAlphaNegative * alphaNegMask) | color.a;
302    return color;
303}
304// R10G10B10A2_UINT
305static inline uint4 readR10G10B10A2_UINT(COMMON_READ_FUNC_PARAMS)
306{
307    uint4 color;
308    uint src = bytesToInt<uint>(buffer, bufferOffset);
309
310    color.r = getShiftedData<10, 0>(src);
311    color.g = getShiftedData<10, 10>(src);
312    color.b = getShiftedData<10, 20>(src);
313    color.a = getShiftedData<2, 30>(src);
314    return color;
315}
316
317// R8G8B8A8 generic
318static inline float4 readR8G8B8A8(COMMON_READ_FUNC_PARAMS, bool isSRGB)
319{
320    float4 color;
321    uint src = bytesToInt<uint>(buffer, bufferOffset);
322
323    if (isSRGB)
324    {
325        color = unpack_unorm4x8_srgb_to_float(src);
326    }
327    else
328    {
329        color = unpack_unorm4x8_to_float(src);
330    }
331    return color;
332}
333static inline void writeR8G8B8A8(COMMON_WRITE_FLOAT_FUNC_PARAMS, bool isSRGB)
334{
335    uint dst;
336
337    if (isSRGB)
338    {
339        dst = pack_float_to_srgb_unorm4x8(color);
340    }
341    else
342    {
343        dst = pack_float_to_unorm4x8(color);
344    }
345
346    intToBytes(dst, bufferOffset, buffer);
347}
348
349static inline float4 readR8G8B8(COMMON_READ_FUNC_PARAMS, bool isSRGB)
350{
351    float4 color;
352    color.r = normalizedToFloat<uchar>(buffer[bufferOffset]);
353    color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]);
354    color.b = normalizedToFloat<uchar>(buffer[bufferOffset + 2]);
355    color.a = 1.0;
356
357    if (isSRGB)
358    {
359        color = sRGBtoLinear(color);
360    }
361    return color;
362}
363static inline void writeR8G8B8(COMMON_WRITE_FLOAT_FUNC_PARAMS, bool isSRGB)
364{
365    color.a = 1.0;
366    uint dst;
367
368    if (isSRGB)
369    {
370        dst = pack_float_to_srgb_unorm4x8(color);
371    }
372    else
373    {
374        dst = pack_float_to_unorm4x8(color);
375    }
376    int24bitToBytes(dst, bufferOffset, buffer);
377}
378
379// RGBA8_SNORM
380static inline float4 readR8G8B8A8_SNORM(COMMON_READ_FUNC_PARAMS)
381{
382    float4 color;
383    uint src = bytesToInt<uint>(buffer, bufferOffset);
384
385    color = unpack_snorm4x8_to_float(src);
386
387    return color;
388}
389static inline void writeR8G8B8A8_SNORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
390{
391    uint dst = pack_float_to_snorm4x8(color);
392
393    intToBytes(dst, bufferOffset, buffer);
394}
395
396// RGB8_SNORM
397static inline float4 readR8G8B8_SNORM(COMMON_READ_FUNC_PARAMS)
398{
399    float4 color;
400    color.r = normalizedToFloat<7, char>(buffer[bufferOffset]);
401    color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]);
402    color.b = normalizedToFloat<7, char>(buffer[bufferOffset + 2]);
403    color.a = 1.0;
404
405    return color;
406}
407static inline void writeR8G8B8_SNORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
408{
409    uint dst = pack_float_to_snorm4x8(color);
410
411    int24bitToBytes(dst, bufferOffset, buffer);
412}
413
414// RGBA8
415static inline float4 readR8G8B8A8_UNORM(COMMON_READ_FUNC_PARAMS)
416{
417    return readR8G8B8A8(FORWARD_COMMON_READ_FUNC_PARAMS, false);
418}
419static inline void writeR8G8B8A8_UNORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
420{
421    return writeR8G8B8A8(FORWARD_COMMON_WRITE_FUNC_PARAMS, false);
422}
423
424static inline float4 readR8G8B8A8_UNORM_SRGB(COMMON_READ_FUNC_PARAMS)
425{
426    return readR8G8B8A8(FORWARD_COMMON_READ_FUNC_PARAMS, true);
427}
428static inline void writeR8G8B8A8_UNORM_SRGB(COMMON_WRITE_FLOAT_FUNC_PARAMS)
429{
430    return writeR8G8B8A8(FORWARD_COMMON_WRITE_FUNC_PARAMS, true);
431}
432
433// BGRA8
434static inline float4 readB8G8R8A8_UNORM(COMMON_READ_FUNC_PARAMS)
435{
436    return readR8G8B8A8(FORWARD_COMMON_READ_FUNC_PARAMS, false).bgra;
437}
438static inline void writeB8G8R8A8_UNORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
439{
440    color.rgba = color.bgra;
441    return writeR8G8B8A8(FORWARD_COMMON_WRITE_FUNC_PARAMS, false);
442}
443
444static inline float4 readB8G8R8A8_UNORM_SRGB(COMMON_READ_FUNC_PARAMS)
445{
446    return readR8G8B8A8(FORWARD_COMMON_READ_FUNC_PARAMS, true).bgra;
447}
448static inline void writeB8G8R8A8_UNORM_SRGB(COMMON_WRITE_FLOAT_FUNC_PARAMS)
449{
450    color.rgba = color.bgra;
451    return writeR8G8B8A8(FORWARD_COMMON_WRITE_FUNC_PARAMS, true);
452}
453
454// RGB8
455static inline float4 readR8G8B8_UNORM(COMMON_READ_FUNC_PARAMS)
456{
457    return readR8G8B8(FORWARD_COMMON_READ_FUNC_PARAMS, false);
458}
459static inline void writeR8G8B8_UNORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
460{
461    return writeR8G8B8(FORWARD_COMMON_WRITE_FUNC_PARAMS, false);
462}
463
464static inline float4 readR8G8B8_UNORM_SRGB(COMMON_READ_FUNC_PARAMS)
465{
466    return readR8G8B8(FORWARD_COMMON_READ_FUNC_PARAMS, true);
467}
468static inline void writeR8G8B8_UNORM_SRGB(COMMON_WRITE_FLOAT_FUNC_PARAMS)
469{
470    return writeR8G8B8(FORWARD_COMMON_WRITE_FUNC_PARAMS, true);
471}
472
473// L8
474static inline float4 readL8_UNORM(COMMON_READ_FUNC_PARAMS)
475{
476    float4 color;
477    color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset]));
478    color.a   = 1.0;
479    return color;
480}
481static inline void writeL8_UNORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
482{
483    buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
484}
485
486// A8
487static inline void writeA8_UNORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
488{
489    buffer[bufferOffset] = floatToNormalized<uchar>(color.a);
490}
491
492// L8A8
493static inline float4 readL8A8_UNORM(COMMON_READ_FUNC_PARAMS)
494{
495    float4 color;
496    color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset]));
497    color.a   = normalizedToFloat<uchar>(buffer[bufferOffset + 1]);
498    return color;
499}
500static inline void writeL8A8_UNORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
501{
502    buffer[bufferOffset]     = floatToNormalized<uchar>(color.r);
503    buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.a);
504}
505
506// R8
507static inline float4 readR8_UNORM(COMMON_READ_FUNC_PARAMS)
508{
509    float4 color;
510    color.r = normalizedToFloat<uchar>(buffer[bufferOffset]);
511    color.g = color.b = 0.0;
512    color.a           = 1.0;
513    return color;
514}
515static inline void writeR8_UNORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
516{
517    buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
518}
519
520static inline float4 readR8_SNORM(COMMON_READ_FUNC_PARAMS)
521{
522    float4 color;
523    color.r = normalizedToFloat<7, char>(buffer[bufferOffset]);
524    color.g = color.b = 0.0;
525    color.a           = 1.0;
526    return color;
527}
528static inline void writeR8_SNORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
529{
530    buffer[bufferOffset] = as_type<uchar>(floatToNormalized<char>(color.r));
531}
532
533// R8_SINT
534static inline int4 readR8_SINT(COMMON_READ_FUNC_PARAMS)
535{
536    int4 color;
537    color.r = as_type<char>(buffer[bufferOffset]);
538    color.g = color.b = 0;
539    color.a           = 1;
540    return color;
541}
542static inline void writeR8_SINT(COMMON_WRITE_SINT_FUNC_PARAMS)
543{
544    buffer[bufferOffset] = static_cast<uchar>(color.r);
545}
546
547// R8_UINT
548static inline uint4 readR8_UINT(COMMON_READ_FUNC_PARAMS)
549{
550    uint4 color;
551    color.r = as_type<uchar>(buffer[bufferOffset]);
552    color.g = color.b = 0;
553    color.a           = 1;
554    return color;
555}
556static inline void writeR8_UINT(COMMON_WRITE_UINT_FUNC_PARAMS)
557{
558    buffer[bufferOffset] = static_cast<uchar>(color.r);
559}
560
561// R8G8
562static inline float4 readR8G8_UNORM(COMMON_READ_FUNC_PARAMS)
563{
564    float4 color;
565    color.r = normalizedToFloat<uchar>(buffer[bufferOffset]);
566    color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]);
567    color.b = 0.0;
568    color.a = 1.0;
569    return color;
570}
571static inline void writeR8G8_UNORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
572{
573    buffer[bufferOffset]     = floatToNormalized<uchar>(color.r);
574    buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.g);
575}
576
577static inline float4 readR8G8_SNORM(COMMON_READ_FUNC_PARAMS)
578{
579    float4 color;
580    color.r = normalizedToFloat<7, char>(buffer[bufferOffset]);
581    color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]);
582    color.b = 0.0;
583    color.a = 1.0;
584    return color;
585}
586static inline void writeR8G8_SNORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
587{
588    buffer[bufferOffset]     = as_type<uchar>(floatToNormalized<char>(color.r));
589    buffer[bufferOffset + 1] = as_type<uchar>(floatToNormalized<char>(color.g));
590}
591
592// RG8_SINT
593static inline int4 readR8G8_SINT(COMMON_READ_FUNC_PARAMS)
594{
595    int4 color;
596    color.r = as_type<char>(buffer[bufferOffset]);
597    color.g = as_type<char>(buffer[bufferOffset + 1]);
598    color.b = 0;
599    color.a = 1;
600    return color;
601}
602static inline void writeR8G8_SINT(COMMON_WRITE_SINT_FUNC_PARAMS)
603{
604    buffer[bufferOffset]     = static_cast<uchar>(color.r);
605    buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
606}
607
608// RG8_UINT
609static inline uint4 readR8G8_UINT(COMMON_READ_FUNC_PARAMS)
610{
611    uint4 color;
612    color.r = as_type<uchar>(buffer[bufferOffset]);
613    color.g = as_type<uchar>(buffer[bufferOffset + 1]);
614    color.b = 0;
615    color.a = 1;
616    return color;
617}
618static inline void writeR8G8_UINT(COMMON_WRITE_UINT_FUNC_PARAMS)
619{
620    buffer[bufferOffset]     = static_cast<uchar>(color.r);
621    buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
622}
623
624// R8G8B8_SINT
625static inline int4 readR8G8B8_SINT(COMMON_READ_FUNC_PARAMS)
626{
627    int4 color;
628    color.r = as_type<char>(buffer[bufferOffset]);
629    color.g = as_type<char>(buffer[bufferOffset + 1]);
630    color.b = as_type<char>(buffer[bufferOffset + 2]);
631    color.a = 1;
632    return color;
633}
634
635// R8G8B8_UINT
636static inline uint4 readR8G8B8_UINT(COMMON_READ_FUNC_PARAMS)
637{
638    uint4 color;
639    color.r = as_type<uchar>(buffer[bufferOffset]);
640    color.g = as_type<uchar>(buffer[bufferOffset + 1]);
641    color.b = as_type<uchar>(buffer[bufferOffset + 2]);
642    color.a = 1;
643    return color;
644}
645
646// R8G8G8A8_SINT
647static inline int4 readR8G8B8A8_SINT(COMMON_READ_FUNC_PARAMS)
648{
649    int4 color;
650    color.r = as_type<char>(buffer[bufferOffset]);
651    color.g = as_type<char>(buffer[bufferOffset + 1]);
652    color.b = as_type<char>(buffer[bufferOffset + 2]);
653    color.a = as_type<char>(buffer[bufferOffset + 3]);
654    return color;
655}
656static inline void writeR8G8B8A8_SINT(COMMON_WRITE_SINT_FUNC_PARAMS)
657{
658    buffer[bufferOffset]     = static_cast<uchar>(color.r);
659    buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
660    buffer[bufferOffset + 2] = static_cast<uchar>(color.b);
661    buffer[bufferOffset + 3] = static_cast<uchar>(color.a);
662}
663
664// R8G8G8A8_UINT
665static inline uint4 readR8G8B8A8_UINT(COMMON_READ_FUNC_PARAMS)
666{
667    uint4 color;
668    color.r = as_type<uchar>(buffer[bufferOffset]);
669    color.g = as_type<uchar>(buffer[bufferOffset + 1]);
670    color.b = as_type<uchar>(buffer[bufferOffset + 2]);
671    color.a = as_type<uchar>(buffer[bufferOffset + 3]);
672    return color;
673}
674static inline void writeR8G8B8A8_UINT(COMMON_WRITE_UINT_FUNC_PARAMS)
675{
676    buffer[bufferOffset]     = static_cast<uchar>(color.r);
677    buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
678    buffer[bufferOffset + 2] = static_cast<uchar>(color.b);
679    buffer[bufferOffset + 3] = static_cast<uchar>(color.a);
680}
681
682// R16_FLOAT
683static inline float4 readR16_FLOAT(COMMON_READ_FUNC_PARAMS)
684{
685    float4 color;
686    color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
687    color.g = color.b = 0.0;
688    color.a           = 1.0;
689    return color;
690}
691static inline void writeR16_FLOAT(COMMON_WRITE_FLOAT_FUNC_PARAMS)
692{
693    shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
694}
695// R16_NORM
696template <typename ShortType>
697static inline float4 readR16_NORM(COMMON_READ_FUNC_PARAMS)
698{
699    float4 color;
700    color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
701    color.g = color.b = 0.0;
702    color.a           = 1.0;
703    return color;
704}
705#define readR16_SNORM readR16_NORM<short>
706#define readR16_UNORM readR16_NORM<ushort>
707
708template<typename ShortType>
709static inline void writeR16_NORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
710{
711    shortToBytes(floatToNormalized<ShortType>(color.r), bufferOffset, buffer);
712}
713#define writeR16_SNORM writeR16_NORM<short>
714#define writeR16_UNORM writeR16_NORM<ushort>
715
716// R16_SINT
717static inline int4 readR16_SINT(COMMON_READ_FUNC_PARAMS)
718{
719    int4 color;
720    color.r = bytesToShort<short>(buffer, bufferOffset);
721    color.g = color.b = 0;
722    color.a           = 1;
723    return color;
724}
725static inline void writeR16_SINT(COMMON_WRITE_SINT_FUNC_PARAMS)
726{
727    shortToBytes(static_cast<short>(color.r), bufferOffset, buffer);
728}
729
730// R16_UINT
731static inline uint4 readR16_UINT(COMMON_READ_FUNC_PARAMS)
732{
733    uint4 color;
734    color.r = bytesToShort<ushort>(buffer, bufferOffset);
735    color.g = color.b = 0;
736    color.a           = 1;
737    return color;
738}
739static inline void writeR16_UINT(COMMON_WRITE_UINT_FUNC_PARAMS)
740{
741    shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer);
742}
743
744// A16_FLOAT
745static inline float4 readA16_FLOAT(COMMON_READ_FUNC_PARAMS)
746{
747    float4 color;
748    color.a   = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
749    color.rgb = 0.0;
750    return color;
751}
752static inline void writeA16_FLOAT(COMMON_WRITE_FLOAT_FUNC_PARAMS)
753{
754    shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset, buffer);
755}
756
757// L16_FLOAT
758static inline float4 readL16_FLOAT(COMMON_READ_FUNC_PARAMS)
759{
760    float4 color;
761    color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
762    color.a   = 1.0;
763    return color;
764}
765static inline void writeL16_FLOAT(COMMON_WRITE_FLOAT_FUNC_PARAMS)
766{
767    shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
768}
769
770// L16A16_FLOAT
771static inline float4 readL16A16_FLOAT(COMMON_READ_FUNC_PARAMS)
772{
773    float4 color;
774    color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
775    color.a   = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
776    return color;
777}
778static inline void writeL16A16_FLOAT(COMMON_WRITE_FLOAT_FUNC_PARAMS)
779{
780    shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
781    shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 2, buffer);
782}
783
784// R16G16_FLOAT
785static inline float4 readR16G16_FLOAT(COMMON_READ_FUNC_PARAMS)
786{
787    float4 color;
788    color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
789    color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
790    color.b = 0.0;
791    color.a = 1.0;
792    return color;
793}
794static inline void writeR16G16_FLOAT(COMMON_WRITE_FLOAT_FUNC_PARAMS)
795{
796    shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
797    shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer);
798}
799
800// R16G16_NORM
801template <typename ShortType>
802static inline float4 readR16G16_NORM(COMMON_READ_FUNC_PARAMS)
803{
804    float4 color;
805    color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
806    color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2));
807    color.b = 0.0;
808    color.a = 1.0;
809    return color;
810}
811#define readR16G16_SNORM readR16G16_NORM<short>
812#define readR16G16_UNORM readR16G16_NORM<ushort>
813
814template<typename ShortType>
815static inline void writeR16G16_NORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
816{
817    shortToBytes(floatToNormalized<ShortType>(color.r), bufferOffset, buffer);
818    shortToBytes(floatToNormalized<ShortType>(color.g), bufferOffset + 2, buffer);
819}
820#define writeR16G16_SNORM writeR16G16_NORM<short>
821#define writeR16G16_UNORM writeR16G16_NORM<ushort>
822
823// R16G16_SINT
824static inline int4 readR16G16_SINT(COMMON_READ_FUNC_PARAMS)
825{
826    int4 color;
827    color.r = bytesToShort<short>(buffer, bufferOffset);
828    color.g = bytesToShort<short>(buffer, bufferOffset + 2);
829    color.b = 0;
830    color.a = 1;
831    return color;
832}
833static inline void writeR16G16_SINT(COMMON_WRITE_SINT_FUNC_PARAMS)
834{
835    shortToBytes(static_cast<short>(color.r), bufferOffset, buffer);
836    shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer);
837}
838
839// R16G16_UINT
840static inline uint4 readR16G16_UINT(COMMON_READ_FUNC_PARAMS)
841{
842    uint4 color;
843    color.r = bytesToShort<ushort>(buffer, bufferOffset);
844    color.g = bytesToShort<ushort>(buffer, bufferOffset + 2);
845    color.b = 0;
846    color.a = 1;
847    return color;
848}
849static inline void writeR16G16_UINT(COMMON_WRITE_UINT_FUNC_PARAMS)
850{
851    shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer);
852    shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer);
853}
854
855// R16G16B16_FLOAT
856static inline float4 readR16G16B16_FLOAT(COMMON_READ_FUNC_PARAMS)
857{
858    float4 color;
859    color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
860    color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
861    color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4));
862    color.a = 1.0;
863    return color;
864}
865
866// R16G16B16_NORM
867template <typename ShortType>
868static inline float4 readR16G16B16_NORM(COMMON_READ_FUNC_PARAMS)
869{
870    float4 color;
871    color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
872    color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2));
873    color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4));
874    color.a = 1.0;
875    return color;
876}
877#define readR16G16B16_SNORM readR16G16B16_NORM<short>
878#define readR16G16B16_UNORM readR16G16B16_NORM<ushort>
879// R16G16B16_SINT
880static inline int4 readR16G16B16_SINT(COMMON_READ_FUNC_PARAMS)
881{
882    int4 color;
883    color.r = bytesToShort<short>(buffer, bufferOffset);
884    color.g = bytesToShort<short>(buffer, bufferOffset + 2);
885    color.b = bytesToShort<short>(buffer, bufferOffset + 4);
886    color.a = 1;
887    return color;
888}
889
890// R16G16B16_UINT
891static inline uint4 readR16G16B16_UINT(COMMON_READ_FUNC_PARAMS)
892{
893    uint4 color;
894    color.r = bytesToShort<ushort>(buffer, bufferOffset);
895    color.g = bytesToShort<ushort>(buffer, bufferOffset + 2);
896    color.b = bytesToShort<ushort>(buffer, bufferOffset + 4);
897    color.a = 1;
898    return color;
899}
900
901// R16G16B16A16_FLOAT
902static inline float4 readR16G16B16A16_FLOAT(COMMON_READ_FUNC_PARAMS)
903{
904    float4 color;
905    color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
906    color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
907    color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4));
908    color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 6));
909    return color;
910}
911static inline void writeR16G16B16A16_FLOAT(COMMON_WRITE_FLOAT_FUNC_PARAMS)
912{
913    shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
914    shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer);
915    shortToBytes(as_type<ushort>(static_cast<half>(color.b)), bufferOffset + 4, buffer);
916    shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 6, buffer);
917}
918
919// R16G16B16A16_NORM
920template <typename ShortType>
921static inline float4 readR16G16B16A16_NORM(COMMON_READ_FUNC_PARAMS)
922{
923    float4 color;
924    color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
925    color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2));
926    color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4));
927    color.a = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 6));
928    return color;
929}
930#define readR16G16B16A16_SNORM readR16G16B16A16_NORM<short>
931#define readR16G16B16A16_UNORM readR16G16B16A16_NORM<ushort>
932
933template<typename ShortType>
934static inline void writeR16G16B16A16_NORM(COMMON_WRITE_FLOAT_FUNC_PARAMS)
935{
936    shortToBytes(floatToNormalized<ShortType>(color.r), bufferOffset, buffer);
937    shortToBytes(floatToNormalized<ShortType>(color.g), bufferOffset + 2, buffer);
938    shortToBytes(floatToNormalized<ShortType>(color.b), bufferOffset + 4, buffer);
939    shortToBytes(floatToNormalized<ShortType>(color.a), bufferOffset + 6, buffer);
940}
941#define writeR16G16B16A16_SNORM writeR16G16B16A16_NORM<short>
942#define writeR16G16B16A16_UNORM writeR16G16B16A16_NORM<ushort>
943
944// R16G16B16A16_SINT
945static inline int4 readR16G16B16A16_SINT(COMMON_READ_FUNC_PARAMS)
946{
947    int4 color;
948    color.r = bytesToShort<short>(buffer, bufferOffset);
949    color.g = bytesToShort<short>(buffer, bufferOffset + 2);
950    color.b = bytesToShort<short>(buffer, bufferOffset + 4);
951    color.a = bytesToShort<short>(buffer, bufferOffset + 6);
952    return color;
953}
954static inline void writeR16G16B16A16_SINT(COMMON_WRITE_SINT_FUNC_PARAMS)
955{
956    shortToBytes(static_cast<short>(color.r), bufferOffset, buffer);
957    shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer);
958    shortToBytes(static_cast<short>(color.b), bufferOffset + 4, buffer);
959    shortToBytes(static_cast<short>(color.a), bufferOffset + 6, buffer);
960}
961
962// R16G16B16A16_UINT
963static inline uint4 readR16G16B16A16_UINT(COMMON_READ_FUNC_PARAMS)
964{
965    uint4 color;
966    color.r = bytesToShort<ushort>(buffer, bufferOffset);
967    color.g = bytesToShort<ushort>(buffer, bufferOffset + 2);
968    color.b = bytesToShort<ushort>(buffer, bufferOffset + 4);
969    color.a = bytesToShort<ushort>(buffer, bufferOffset + 6);
970    return color;
971}
972static inline void writeR16G16B16A16_UINT(COMMON_WRITE_UINT_FUNC_PARAMS)
973{
974    shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer);
975    shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer);
976    shortToBytes(static_cast<ushort>(color.b), bufferOffset + 4, buffer);
977    shortToBytes(static_cast<ushort>(color.a), bufferOffset + 6, buffer);
978}
979
980// R32_FLOAT
981static inline float4 readR32_FLOAT(COMMON_READ_FUNC_PARAMS)
982{
983    float4 color;
984    color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
985    color.g = color.b = 0.0;
986    color.a           = 1.0;
987    return color;
988}
989static inline void writeR32_FLOAT(COMMON_WRITE_FLOAT_FUNC_PARAMS)
990{
991    intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
992}
993
994// R32_NORM
995template <typename IntType>
996static inline float4 readR32_NORM(COMMON_READ_FUNC_PARAMS)
997{
998    float4 color;
999    color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
1000    color.g = color.b = 0.0;
1001    color.a           = 1.0;
1002    return color;
1003}
1004#define readR32_SNORM readR32_NORM<int>
1005#define readR32_UNORM readR32_NORM<uint>
1006
1007// A32_FLOAT
1008static inline float4 readA32_FLOAT(COMMON_READ_FUNC_PARAMS)
1009{
1010    float4 color;
1011    color.a   = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
1012    color.rgb = 0.0;
1013    return color;
1014}
1015static inline void writeA32_FLOAT(COMMON_WRITE_FLOAT_FUNC_PARAMS)
1016{
1017    intToBytes(as_type<uint>(color.a), bufferOffset, buffer);
1018}
1019
1020// L32_FLOAT
1021static inline float4 readL32_FLOAT(COMMON_READ_FUNC_PARAMS)
1022{
1023    float4 color;
1024    color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
1025    color.a   = 1.0;
1026    return color;
1027}
1028static inline void writeL32_FLOAT(COMMON_WRITE_FLOAT_FUNC_PARAMS)
1029{
1030    intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
1031}
1032
1033// R32_SINT
1034static inline int4 readR32_SINT(COMMON_READ_FUNC_PARAMS)
1035{
1036    int4 color;
1037    color.r = bytesToInt<int>(buffer, bufferOffset);
1038    color.g = color.b = 0;
1039    color.a           = 1;
1040    return color;
1041}
1042static inline void writeR32_SINT(COMMON_WRITE_SINT_FUNC_PARAMS)
1043{
1044    intToBytes(color.r, bufferOffset, buffer);
1045}
1046
1047// R32_FIXED
1048static inline float4 readR32_FIXED(COMMON_READ_FUNC_PARAMS)
1049{
1050    float4 color;
1051    constexpr float kDivisor = 1.0f / (1 << 16);
1052    color.r                  = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
1053    color.g = color.b = 0.0;
1054    color.a           = 1.0;
1055    return color;
1056}
1057
1058// R32_UINT
1059static inline uint4 readR32_UINT(COMMON_READ_FUNC_PARAMS)
1060{
1061    uint4 color;
1062    color.r = bytesToInt<uint>(buffer, bufferOffset);
1063    color.g = color.b = 0;
1064    color.a           = 1;
1065    return color;
1066}
1067static inline void writeR32_UINT(COMMON_WRITE_UINT_FUNC_PARAMS)
1068{
1069    intToBytes(color.r, bufferOffset, buffer);
1070}
1071
1072// L32A32_FLOAT
1073static inline float4 readL32A32_FLOAT(COMMON_READ_FUNC_PARAMS)
1074{
1075    float4 color;
1076    color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
1077    color.a   = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
1078    return color;
1079}
1080static inline void writeL32A32_FLOAT(COMMON_WRITE_FLOAT_FUNC_PARAMS)
1081{
1082    intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
1083    intToBytes(as_type<uint>(color.a), bufferOffset + 4, buffer);
1084}
1085
1086// R32G32_FLOAT
1087static inline float4 readR32G32_FLOAT(COMMON_READ_FUNC_PARAMS)
1088{
1089    float4 color;
1090    color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
1091    color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
1092    color.b = 0.0;
1093    color.a = 1.0;
1094    return color;
1095}
1096static inline void writeR32G32_FLOAT(COMMON_WRITE_FLOAT_FUNC_PARAMS)
1097{
1098    intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
1099    intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer);
1100}
1101
1102// R32G32_NORM
1103template <typename IntType>
1104static inline float4 readR32G32_NORM(COMMON_READ_FUNC_PARAMS)
1105{
1106    float4 color;
1107    color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
1108    color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4));
1109    color.b = 0.0;
1110    color.a = 1.0;
1111    return color;
1112}
1113#define readR32G32_SNORM readR32G32_NORM<int>
1114#define readR32G32_UNORM readR32G32_NORM<uint>
1115
1116// R32G32_SINT
1117static inline int4 readR32G32_SINT(COMMON_READ_FUNC_PARAMS)
1118{
1119    int4 color;
1120    color.r = bytesToInt<int>(buffer, bufferOffset);
1121    color.g = bytesToInt<int>(buffer, bufferOffset + 4);
1122    color.b = 0;
1123    color.a = 1;
1124    return color;
1125}
1126static inline void writeR32G32_SINT(COMMON_WRITE_SINT_FUNC_PARAMS)
1127{
1128    intToBytes(color.r, bufferOffset, buffer);
1129    intToBytes(color.g, bufferOffset + 4, buffer);
1130}
1131
1132// R32G32_FIXED
1133static inline float4 readR32G32_FIXED(COMMON_READ_FUNC_PARAMS)
1134{
1135    float4 color;
1136    constexpr float kDivisor = 1.0f / (1 << 16);
1137    color.r                  = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
1138    color.g                  = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor;
1139    color.b                  = 0.0;
1140    color.a                  = 1.0;
1141    return color;
1142}
1143
1144// R32G32_UINT
1145static inline uint4 readR32G32_UINT(COMMON_READ_FUNC_PARAMS)
1146{
1147    uint4 color;
1148    color.r = bytesToInt<uint>(buffer, bufferOffset);
1149    color.g = bytesToInt<uint>(buffer, bufferOffset + 4);
1150    color.b = 0;
1151    color.a = 1;
1152    return color;
1153}
1154static inline void writeR32G32_UINT(COMMON_WRITE_UINT_FUNC_PARAMS)
1155{
1156    intToBytes(color.r, bufferOffset, buffer);
1157    intToBytes(color.g, bufferOffset + 4, buffer);
1158}
1159
1160// R32G32B32_FLOAT
1161static inline float4 readR32G32B32_FLOAT(COMMON_READ_FUNC_PARAMS)
1162{
1163    float4 color;
1164    color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
1165    color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
1166    color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8));
1167    color.a = 1.0;
1168    return color;
1169}
1170
1171// R32G32B32_NORM
1172template <typename IntType>
1173static inline float4 readR32G32B32_NORM(COMMON_READ_FUNC_PARAMS)
1174{
1175    float4 color;
1176    color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
1177    color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4));
1178    color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8));
1179    color.a = 1.0;
1180    return color;
1181}
1182#define readR32G32B32_SNORM readR32G32B32_NORM<int>
1183#define readR32G32B32_UNORM readR32G32B32_NORM<uint>
1184
1185// R32G32B32_SINT
1186static inline int4 readR32G32B32_SINT(COMMON_READ_FUNC_PARAMS)
1187{
1188    int4 color;
1189    color.r = bytesToInt<int>(buffer, bufferOffset);
1190    color.g = bytesToInt<int>(buffer, bufferOffset + 4);
1191    color.b = bytesToInt<int>(buffer, bufferOffset + 8);
1192    color.a = 1;
1193    return color;
1194}
1195
1196// R32G32B32_FIXED
1197static inline float4 readR32G32B32_FIXED(COMMON_READ_FUNC_PARAMS)
1198{
1199    float4 color;
1200    constexpr float kDivisor = 1.0f / (1 << 16);
1201    color.r                  = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
1202    color.g                  = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor;
1203    color.b                  = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor;
1204    color.a                  = 1.0;
1205    return color;
1206}
1207
1208// R32G32B32_UINT
1209static inline uint4 readR32G32B32_UINT(COMMON_READ_FUNC_PARAMS)
1210{
1211    uint4 color;
1212    color.r = bytesToInt<uint>(buffer, bufferOffset);
1213    color.g = bytesToInt<uint>(buffer, bufferOffset + 4);
1214    color.b = bytesToInt<uint>(buffer, bufferOffset + 8);
1215    color.a = 1;
1216    return color;
1217}
1218
1219// R32G32B32A32_FLOAT
1220static inline float4 readR32G32B32A32_FLOAT(COMMON_READ_FUNC_PARAMS)
1221{
1222    float4 color;
1223    color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
1224    color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
1225    color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8));
1226    color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 12));
1227    return color;
1228}
1229static inline void writeR32G32B32A32_FLOAT(COMMON_WRITE_FLOAT_FUNC_PARAMS)
1230{
1231    intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
1232    intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer);
1233    intToBytes(as_type<uint>(color.b), bufferOffset + 8, buffer);
1234    intToBytes(as_type<uint>(color.a), bufferOffset + 12, buffer);
1235}
1236
1237// R32G32B32A32_NORM
1238template <typename IntType>
1239static inline float4 readR32G32B32A32_NORM(COMMON_READ_FUNC_PARAMS)
1240{
1241    float4 color;
1242    color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
1243    color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4));
1244    color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8));
1245    color.a = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 12));
1246    return color;
1247}
1248#define readR32G32B32A32_SNORM readR32G32B32A32_NORM<int>
1249#define readR32G32B32A32_UNORM readR32G32B32A32_NORM<uint>
1250
1251// R32G32B32A32_SINT
1252static inline int4 readR32G32B32A32_SINT(COMMON_READ_FUNC_PARAMS)
1253{
1254    int4 color;
1255    color.r = bytesToInt<int>(buffer, bufferOffset);
1256    color.g = bytesToInt<int>(buffer, bufferOffset + 4);
1257    color.b = bytesToInt<int>(buffer, bufferOffset + 8);
1258    color.a = bytesToInt<int>(buffer, bufferOffset + 12);
1259    return color;
1260}
1261static inline void writeR32G32B32A32_SINT(COMMON_WRITE_SINT_FUNC_PARAMS)
1262{
1263    intToBytes(color.r, bufferOffset, buffer);
1264    intToBytes(color.g, bufferOffset + 4, buffer);
1265    intToBytes(color.b, bufferOffset + 8, buffer);
1266    intToBytes(color.a, bufferOffset + 12, buffer);
1267}
1268// R32G32B32A32_FIXED
1269static inline float4 readR32G32B32A32_FIXED(COMMON_READ_FUNC_PARAMS)
1270{
1271    float4 color;
1272    constexpr float kDivisor = 1.0f / (1 << 16);
1273    color.r                  = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
1274    color.g                  = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor;
1275    color.b                  = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor;
1276    color.a                  = bytesToInt<int>(buffer, bufferOffset + 12) * kDivisor;
1277    return color;
1278}
1279
1280// R32G32B32A32_UINT
1281static inline uint4 readR32G32B32A32_UINT(COMMON_READ_FUNC_PARAMS)
1282{
1283    uint4 color;
1284    color.r = bytesToInt<uint>(buffer, bufferOffset);
1285    color.g = bytesToInt<uint>(buffer, bufferOffset + 4);
1286    color.b = bytesToInt<uint>(buffer, bufferOffset + 8);
1287    color.a = bytesToInt<uint>(buffer, bufferOffset + 12);
1288    return color;
1289}
1290static inline void writeR32G32B32A32_UINT(COMMON_WRITE_UINT_FUNC_PARAMS)
1291{
1292    intToBytes(color.r, bufferOffset, buffer);
1293    intToBytes(color.g, bufferOffset + 4, buffer);
1294    intToBytes(color.b, bufferOffset + 8, buffer);
1295    intToBytes(color.a, bufferOffset + 12, buffer);
1296}
1297
1298#define ALIAS_READ_SINT_FUNC(FORMAT)                                   \
1299    static inline int4 read##FORMAT##_SSCALED(COMMON_READ_FUNC_PARAMS) \
1300    {                                                                  \
1301        return read##FORMAT##_SINT(FORWARD_COMMON_READ_FUNC_PARAMS);   \
1302    }
1303
1304#define ALIAS_READ_UINT_FUNC(FORMAT)                                    \
1305    static inline uint4 read##FORMAT##_USCALED(COMMON_READ_FUNC_PARAMS) \
1306    {                                                                   \
1307        return read##FORMAT##_UINT(FORWARD_COMMON_READ_FUNC_PARAMS);    \
1308    }
1309
1310#define ALIAS_READ_INT_FUNC(FORMAT) \
1311    ALIAS_READ_SINT_FUNC(FORMAT)    \
1312    ALIAS_READ_UINT_FUNC(FORMAT)
1313
1314#define ALIAS_READ_INT_FUNCS(BITS)                 \
1315    ALIAS_READ_INT_FUNC(R##BITS)                   \
1316    ALIAS_READ_INT_FUNC(R##BITS##G##BITS)          \
1317    ALIAS_READ_INT_FUNC(R##BITS##G##BITS##B##BITS) \
1318    ALIAS_READ_INT_FUNC(R##BITS##G##BITS##B##BITS##A##BITS)
1319
1320ALIAS_READ_INT_FUNCS(8)
1321ALIAS_READ_INT_FUNCS(16)
1322ALIAS_READ_INT_FUNCS(32)
1323
1324ALIAS_READ_INT_FUNC(R10G10B10A2)
1325
1326// Copy pixels from buffer to texture
1327kernel void readFromBufferToFloatTexture(COMMON_READ_KERNEL_PARAMS(float))
1328{
1329    READ_KERNEL_GUARD
1330
1331#define SUPPORTED_FORMATS(PROC) \
1332    PROC(R5G6B5_UNORM)          \
1333    PROC(R8G8B8A8_UNORM)        \
1334    PROC(R8G8B8A8_UNORM_SRGB)   \
1335    PROC(R8G8B8A8_SNORM)        \
1336    PROC(B8G8R8A8_UNORM)        \
1337    PROC(B8G8R8A8_UNORM_SRGB)   \
1338    PROC(R8G8B8_UNORM)          \
1339    PROC(R8G8B8_UNORM_SRGB)     \
1340    PROC(R8G8B8_SNORM)          \
1341    PROC(L8_UNORM)              \
1342    PROC(L8A8_UNORM)            \
1343    PROC(R5G5B5A1_UNORM)        \
1344    PROC(R4G4B4A4_UNORM)        \
1345    PROC(R8_UNORM)              \
1346    PROC(R8_SNORM)              \
1347    PROC(R8G8_UNORM)            \
1348    PROC(R8G8_SNORM)            \
1349    PROC(R16_FLOAT)             \
1350    PROC(R16_SNORM)             \
1351    PROC(R16_UNORM)             \
1352    PROC(A16_FLOAT)             \
1353    PROC(L16_FLOAT)             \
1354    PROC(L16A16_FLOAT)          \
1355    PROC(R16G16_FLOAT)          \
1356    PROC(R16G16_SNORM)          \
1357    PROC(R16G16_UNORM)          \
1358    PROC(R16G16B16_FLOAT)       \
1359    PROC(R16G16B16_SNORM)       \
1360    PROC(R16G16B16_UNORM)       \
1361    PROC(R16G16B16A16_FLOAT)    \
1362    PROC(R16G16B16A16_SNORM)    \
1363    PROC(R16G16B16A16_UNORM)    \
1364    PROC(R32_FLOAT)             \
1365    PROC(A32_FLOAT)             \
1366    PROC(L32_FLOAT)             \
1367    PROC(L32A32_FLOAT)          \
1368    PROC(R32G32_FLOAT)          \
1369    PROC(R32G32B32_FLOAT)       \
1370    PROC(R32G32B32A32_FLOAT)
1371
1372    uint bufferOffset = CALC_BUFFER_READ_OFFSET(options.pixelSize);
1373
1374    switch (kCopyFormatType)
1375    {
1376        SUPPORTED_FORMATS(READ_FORMAT_SWITCH_CASE)
1377    }
1378
1379#undef SUPPORTED_FORMATS
1380}
1381
1382kernel void readFromBufferToIntTexture(COMMON_READ_KERNEL_PARAMS(int))
1383{
1384    READ_KERNEL_GUARD
1385
1386#define SUPPORTED_FORMATS(PROC) \
1387    PROC(R8_SINT)               \
1388    PROC(R8G8_SINT)             \
1389    PROC(R8G8B8_SINT)           \
1390    PROC(R8G8B8A8_SINT)         \
1391    PROC(R16_SINT)              \
1392    PROC(R16G16_SINT)           \
1393    PROC(R16G16B16_SINT)        \
1394    PROC(R16G16B16A16_SINT)     \
1395    PROC(R32_SINT)              \
1396    PROC(R32G32_SINT)           \
1397    PROC(R32G32B32_SINT)        \
1398    PROC(R32G32B32A32_SINT)
1399
1400    uint bufferOffset = CALC_BUFFER_READ_OFFSET(options.pixelSize);
1401
1402    switch (kCopyFormatType)
1403    {
1404        SUPPORTED_FORMATS(READ_FORMAT_SWITCH_CASE)
1405    }
1406
1407#undef SUPPORTED_FORMATS
1408}
1409
1410kernel void readFromBufferToUIntTexture(COMMON_READ_KERNEL_PARAMS(uint))
1411{
1412    READ_KERNEL_GUARD
1413
1414#define SUPPORTED_FORMATS(PROC) \
1415    PROC(R8_UINT)               \
1416    PROC(R8G8_UINT)             \
1417    PROC(R8G8B8_UINT)           \
1418    PROC(R8G8B8A8_UINT)         \
1419    PROC(R16_UINT)              \
1420    PROC(R16G16_UINT)           \
1421    PROC(R16G16B16_UINT)        \
1422    PROC(R16G16B16A16_UINT)     \
1423    PROC(R32_UINT)              \
1424    PROC(R32G32_UINT)           \
1425    PROC(R32G32B32_UINT)        \
1426    PROC(R32G32B32A32_UINT)
1427
1428    uint bufferOffset = CALC_BUFFER_READ_OFFSET(options.pixelSize);
1429
1430    switch (kCopyFormatType)
1431    {
1432        SUPPORTED_FORMATS(READ_FORMAT_SWITCH_CASE)
1433    }
1434
1435#undef SUPPORTED_FORMATS
1436}
1437
1438// Copy pixels from texture to buffer
1439kernel void writeFromFloatTextureToBuffer(COMMON_WRITE_KERNEL_PARAMS(float))
1440{
1441    WRITE_KERNEL_GUARD
1442
1443#define SUPPORTED_FORMATS(PROC) \
1444    PROC(R5G6B5_UNORM)          \
1445    PROC(R8G8B8A8_UNORM)        \
1446    PROC(R8G8B8A8_UNORM_SRGB)   \
1447    PROC(R8G8B8A8_SNORM)        \
1448    PROC(B8G8R8A8_UNORM)        \
1449    PROC(B8G8R8A8_UNORM_SRGB)   \
1450    PROC(R8G8B8_UNORM)          \
1451    PROC(R8G8B8_UNORM_SRGB)     \
1452    PROC(R8G8B8_SNORM)          \
1453    PROC(L8_UNORM)              \
1454    PROC(A8_UNORM)              \
1455    PROC(L8A8_UNORM)            \
1456    PROC(R5G5B5A1_UNORM)        \
1457    PROC(R4G4B4A4_UNORM)        \
1458    PROC(R8_UNORM)              \
1459    PROC(R8_SNORM)              \
1460    PROC(R8G8_UNORM)            \
1461    PROC(R8G8_SNORM)            \
1462    PROC(R16_FLOAT)             \
1463    PROC(R16_SNORM)             \
1464    PROC(R16_UNORM)             \
1465    PROC(A16_FLOAT)             \
1466    PROC(L16_FLOAT)             \
1467    PROC(L16A16_FLOAT)          \
1468    PROC(R16G16_FLOAT)          \
1469    PROC(R16G16_SNORM)          \
1470    PROC(R16G16_UNORM)          \
1471    PROC(R16G16B16A16_FLOAT)    \
1472    PROC(R16G16B16A16_SNORM)    \
1473    PROC(R16G16B16A16_UNORM)    \
1474    PROC(R32_FLOAT)             \
1475    PROC(A32_FLOAT)             \
1476    PROC(L32_FLOAT)             \
1477    PROC(L32A32_FLOAT)          \
1478    PROC(R32G32_FLOAT)          \
1479    PROC(R32G32B32A32_FLOAT)
1480
1481    uint bufferOffset = CALC_BUFFER_WRITE_OFFSET(options.pixelSize);
1482
1483    switch (kCopyFormatType)
1484    {
1485        SUPPORTED_FORMATS(WRITE_FORMAT_SWITCH_CASE)
1486    }
1487
1488#undef SUPPORTED_FORMATS
1489}
1490
1491kernel void writeFromIntTextureToBuffer(COMMON_WRITE_KERNEL_PARAMS(int))
1492{
1493    WRITE_KERNEL_GUARD
1494
1495#define SUPPORTED_FORMATS(PROC) \
1496    PROC(R8_SINT)               \
1497    PROC(R8G8_SINT)             \
1498    PROC(R8G8B8A8_SINT)         \
1499    PROC(R16_SINT)              \
1500    PROC(R16G16_SINT)           \
1501    PROC(R16G16B16A16_SINT)     \
1502    PROC(R32_SINT)              \
1503    PROC(R32G32_SINT)           \
1504    PROC(R32G32B32A32_SINT)
1505
1506    uint bufferOffset = CALC_BUFFER_WRITE_OFFSET(options.pixelSize);
1507
1508    switch (kCopyFormatType)
1509    {
1510        SUPPORTED_FORMATS(WRITE_FORMAT_SWITCH_CASE)
1511    }
1512
1513#undef SUPPORTED_FORMATS
1514}
1515
1516kernel void writeFromUIntTextureToBuffer(COMMON_WRITE_KERNEL_PARAMS(uint))
1517{
1518    WRITE_KERNEL_GUARD
1519
1520#define SUPPORTED_FORMATS(PROC) \
1521    PROC(R8_UINT)               \
1522    PROC(R8G8_UINT)             \
1523    PROC(R8G8B8A8_UINT)         \
1524    PROC(R16_UINT)              \
1525    PROC(R16G16_UINT)           \
1526    PROC(R16G16B16A16_UINT)     \
1527    PROC(R32_UINT)              \
1528    PROC(R32G32_UINT)           \
1529    PROC(R32G32B32A32_UINT)
1530
1531    uint bufferOffset = CALC_BUFFER_WRITE_OFFSET(options.pixelSize);
1532
1533    switch (kCopyFormatType)
1534    {
1535        SUPPORTED_FORMATS(WRITE_FORMAT_SWITCH_CASE)
1536    }
1537
1538#undef SUPPORTED_FORMATS
1539}
1540
1541/** -----  vertex format conversion --------*/
1542struct CopyVertexParams
1543{
1544    uint srcBufferStartOffset;
1545    uint srcStride;
1546    uint srcComponentBytes;  // unused when convert to float
1547    uint srcComponents;      // unused when convert to float
1548    // Default source alpha when expanding the number of components.
1549    // if source has less than 32 bits per component, only those bits are usable in
1550    // srcDefaultAlpha
1551    uchar4 srcDefaultAlphaData;  // unused when convert to float
1552
1553    uint dstBufferStartOffset;
1554    uint dstStride;
1555    uint dstComponents;
1556
1557    uint vertexCount;
1558};
1559
1560#define INT_FORMAT_PROC(FORMAT, PROC) \
1561    PROC(FORMAT##_UNORM)              \
1562    PROC(FORMAT##_SNORM)              \
1563    PROC(FORMAT##_UINT)               \
1564    PROC(FORMAT##_SINT)               \
1565    PROC(FORMAT##_USCALED)            \
1566    PROC(FORMAT##_SSCALED)
1567
1568#define PURE_INT_FORMAT_PROC(FORMAT, PROC) \
1569    PROC(FORMAT##_UINT)                    \
1570    PROC(FORMAT##_SINT)
1571
1572#define FLOAT_FORMAT_PROC(FORMAT, PROC) PROC(FORMAT##_FLOAT)
1573#define FIXED_FORMAT_PROC(FORMAT, PROC) PROC(FORMAT##_FIXED)
1574
1575#define FORMAT_BITS_PROC(BITS, PROC1, PROC2) \
1576    PROC1(R##BITS, PROC2)                    \
1577    PROC1(R##BITS##G##BITS, PROC2)           \
1578    PROC1(R##BITS##G##BITS##B##BITS, PROC2)  \
1579    PROC1(R##BITS##G##BITS##B##BITS##A##BITS, PROC2)
1580
1581template <typename IntType>
1582static inline void writeFloatVertex(constant CopyVertexParams &options,
1583                                    uint idx,
1584                                    vec<IntType, 4> data,
1585                                    device uchar *dst)
1586{
1587    uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset;
1588
1589    for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4)
1590    {
1591        floatToBytes(static_cast<float>(data[component]), dstOffset, dst);
1592    }
1593}
1594
1595template <>
1596inline void writeFloatVertex(constant CopyVertexParams &options,
1597                             uint idx,
1598                             vec<float, 4> data,
1599                             device uchar *dst)
1600{
1601    uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset;
1602
1603    for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4)
1604    {
1605        floatToBytes(data[component], dstOffset, dst);
1606    }
1607}
1608
1609// Function to convert from any vertex format to float vertex format
1610static inline void convertToFloatVertexFormat(uint index,
1611                                              constant CopyVertexParams &options,
1612                                              constant uchar *srcBuffer,
1613                                              device uchar *dstBuffer)
1614{
1615#define SUPPORTED_FORMATS(PROC)                   \
1616    FORMAT_BITS_PROC(8, INT_FORMAT_PROC, PROC)    \
1617    FORMAT_BITS_PROC(16, INT_FORMAT_PROC, PROC)   \
1618    FORMAT_BITS_PROC(32, INT_FORMAT_PROC, PROC)   \
1619    FORMAT_BITS_PROC(16, FLOAT_FORMAT_PROC, PROC) \
1620    FORMAT_BITS_PROC(32, FLOAT_FORMAT_PROC, PROC) \
1621    FORMAT_BITS_PROC(32, FIXED_FORMAT_PROC, PROC) \
1622    PROC(R10G10B10A2_SINT)                        \
1623    PROC(R10G10B10A2_UINT)                        \
1624    PROC(R10G10B10A2_SSCALED)                     \
1625    PROC(R10G10B10A2_USCALED)
1626
1627    uint bufferOffset = options.srcBufferStartOffset + options.srcStride * index;
1628
1629#define COMVERT_FLOAT_VERTEX_SWITCH_CASE(FORMAT)           \
1630    case FormatID::FORMAT: {                               \
1631        auto data = read##FORMAT(bufferOffset, srcBuffer); \
1632        writeFloatVertex(options, index, data, dstBuffer); \
1633    }                                                      \
1634    break;
1635
1636    switch (kCopyFormatType)
1637    {
1638        SUPPORTED_FORMATS(COMVERT_FLOAT_VERTEX_SWITCH_CASE)
1639    }
1640
1641#undef SUPPORTED_FORMATS
1642}
1643
1644// Kernel to convert from any vertex format to float vertex format
1645kernel void convertToFloatVertexFormatCS(uint index [[thread_position_in_grid]],
1646                                         constant CopyVertexParams &options [[buffer(0)]],
1647                                         constant uchar *srcBuffer [[buffer(1)]],
1648                                         device uchar *dstBuffer [[buffer(2)]])
1649{
1650    ANGLE_KERNEL_GUARD(index, options.vertexCount);
1651    convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer);
1652}
1653
1654// Vertex shader to convert from any vertex format to float vertex format
1655vertex void convertToFloatVertexFormatVS(uint index [[vertex_id]],
1656                                         constant CopyVertexParams &options [[buffer(0)]],
1657                                         constant uchar *srcBuffer [[buffer(1)]],
1658                                         device uchar *dstBuffer [[buffer(2)]])
1659{
1660    convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer);
1661}
1662
1663// Function to expand (or just simply copy) the components of the vertex
1664static inline void expandVertexFormatComponents(uint index,
1665                                                constant CopyVertexParams &options,
1666                                                constant uchar *srcBuffer,
1667                                                device uchar *dstBuffer)
1668{
1669    uint srcOffset = options.srcBufferStartOffset + options.srcStride * index;
1670    uint dstOffset = options.dstBufferStartOffset + options.dstStride * index;
1671
1672    uint dstComponentsBeforeAlpha = min(options.dstComponents, 3u);
1673    uint component;
1674    for (component = 0; component < options.srcComponents; ++component,
1675        srcOffset += options.srcComponentBytes, dstOffset += options.srcComponentBytes)
1676    {
1677        for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
1678        {
1679            dstBuffer[dstOffset + byte] = srcBuffer[srcOffset + byte];
1680        }
1681    }
1682
1683    for (; component < dstComponentsBeforeAlpha;
1684         ++component, dstOffset += options.srcComponentBytes)
1685    {
1686        for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
1687        {
1688            dstBuffer[dstOffset + byte] = 0;
1689        }
1690    }
1691
1692    if (component < options.dstComponents)
1693    {
1694        // Last alpha component
1695        for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
1696        {
1697            dstBuffer[dstOffset + byte] = options.srcDefaultAlphaData[byte];
1698        }
1699    }
1700}
1701
1702// Kernel to expand (or just simply copy) the components of the vertex
1703kernel void expandVertexFormatComponentsCS(uint index [[thread_position_in_grid]],
1704                                           constant CopyVertexParams &options [[buffer(0)]],
1705                                           constant uchar *srcBuffer [[buffer(1)]],
1706                                           device uchar *dstBuffer [[buffer(2)]])
1707{
1708    ANGLE_KERNEL_GUARD(index, options.vertexCount);
1709
1710    expandVertexFormatComponents(index, options, srcBuffer, dstBuffer);
1711}
1712
1713// Vertex shader to expand (or just simply copy) the components of the vertex
1714vertex void expandVertexFormatComponentsVS(uint index [[vertex_id]],
1715                                           constant CopyVertexParams &options [[buffer(0)]],
1716                                           constant uchar *srcBuffer [[buffer(1)]],
1717                                           device uchar *dstBuffer [[buffer(2)]])
1718{
1719    expandVertexFormatComponents(index, options, srcBuffer, dstBuffer);
1720}
1721
1722// Kernel to linearize PVRTC1 texture blocks
1723kernel void linearizeBlocks(ushort2 position [[thread_position_in_grid]],
1724                            constant uint2 *dimensions [[buffer(0)]],
1725                            constant uint2 *srcBuffer [[buffer(1)]],
1726                            device uint2 *dstBuffer [[buffer(2)]])
1727{
1728    if (any(uint2(position) >= *dimensions))
1729    {
1730        return;
1731    }
1732    uint2 t = uint2(position);
1733    t = (t | (t << 8)) & 0x00FF00FF;
1734    t = (t | (t << 4)) & 0x0F0F0F0F;
1735    t = (t | (t << 2)) & 0x33333333;
1736    t = (t | (t << 1)) & 0x55555555;
1737    dstBuffer[position.y * (*dimensions).x + position.x] = srcBuffer[(t.x << 1) | t.y];
1738}
1739
1740// Kernel to saturate floating-point depth data
1741kernel void saturateDepth(uint2 position [[thread_position_in_grid]],
1742                          constant uint3 *dimensions [[buffer(0)]],
1743                          device float *srcBuffer [[buffer(1)]],
1744                          device float *dstBuffer [[buffer(2)]])
1745{
1746    if (any(position >= (*dimensions).xy))
1747    {
1748        return;
1749    }
1750    const uint srcOffset = position.y * (*dimensions).z + position.x;
1751    const uint dstOffset = position.y * (*dimensions).x + position.x;
1752    dstBuffer[dstOffset] = saturate(srcBuffer[srcOffset]);
1753}
1754