xref: /aosp_15_r20/external/angle/src/image_util/loadimage.inc (revision 8975f5c5ed3d1c378011245431ada316dfb6f244)
1//
2// Copyright 2014 The ANGLE Project Authors. All rights reserved.
3// Use of this source code is governed by a BSD-style license that can be
4// found in the LICENSE file.
5//
6
7#include "common/mathutil.h"
8
9#include <string.h>
10
11namespace angle
12{
13
14namespace priv
15{
16
17template <typename T>
18inline T *OffsetDataPointer(uint8_t *data, size_t y, size_t z, size_t rowPitch, size_t depthPitch)
19{
20    return reinterpret_cast<T*>(data + (y * rowPitch) + (z * depthPitch));
21}
22
23template <typename T>
24inline const T *OffsetDataPointer(const uint8_t *data, size_t y, size_t z, size_t rowPitch, size_t depthPitch)
25{
26    return reinterpret_cast<const T*>(data + (y * rowPitch) + (z * depthPitch));
27}
28
29}  // namespace priv
30
31template <typename type, size_t componentCount>
32inline void LoadToNative(const ImageLoadContext &context, size_t width, size_t height, size_t depth,
33                         const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch,
34                         uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch)
35{
36    const size_t rowSize = width * sizeof(type) * componentCount;
37    const size_t layerSize = rowSize * height;
38    const size_t imageSize = layerSize * depth;
39
40    if (layerSize == inputDepthPitch && layerSize == outputDepthPitch)
41    {
42        ASSERT(rowSize == inputRowPitch && rowSize == outputRowPitch);
43        memcpy(output, input, imageSize);
44    }
45    else if (rowSize == inputRowPitch && rowSize == outputRowPitch)
46    {
47        for (size_t z = 0; z < depth; z++)
48        {
49            const type *source = priv::OffsetDataPointer<type>(input, 0, z, inputRowPitch, inputDepthPitch);
50            type *dest = priv::OffsetDataPointer<type>(output, 0, z, outputRowPitch, outputDepthPitch);
51
52            memcpy(dest, source, layerSize);
53        }
54    }
55    else
56    {
57        for (size_t z = 0; z < depth; z++)
58        {
59            for (size_t y = 0; y < height; y++)
60            {
61                const type *source = priv::OffsetDataPointer<type>(input, y, z, inputRowPitch, inputDepthPitch);
62                type *dest = priv::OffsetDataPointer<type>(output, y, z, outputRowPitch, outputDepthPitch);
63                memcpy(dest, source, width * sizeof(type) * componentCount);
64            }
65        }
66    }
67}
68
69template <typename type>
70inline void LoadToNative3To4Impl(const ImageLoadContext &context,
71                                 const uint32_t fourthComponentBits,
72                                 size_t width,
73                                 size_t height,
74                                 size_t depth,
75                                 const uint8_t *input,
76                                 size_t inputRowPitch,
77                                 size_t inputDepthPitch,
78                                 uint8_t *output,
79                                 size_t outputRowPitch,
80                                 size_t outputDepthPitch)
81{
82    const type fourthValue = gl::bitCast<type>(fourthComponentBits);
83
84    for (size_t z = 0; z < depth; z++)
85    {
86        for (size_t y = 0; y < height; y++)
87        {
88            const type *source =
89                priv::OffsetDataPointer<type>(input, y, z, inputRowPitch, inputDepthPitch);
90            type *dest =
91                priv::OffsetDataPointer<type>(output, y, z, outputRowPitch, outputDepthPitch);
92            for (size_t x = 0; x < width; x++)
93            {
94                memcpy(&dest[x * 4], &source[x * 3], sizeof(type) * 3);
95                dest[x * 4 + 3] = fourthValue;
96            }
97        }
98    }
99}
100
101template <typename type, uint32_t fourthComponentBits>
102inline void LoadToNative3To4(const ImageLoadContext &context,
103                             size_t width,
104                             size_t height,
105                             size_t depth,
106                             const uint8_t *input,
107                             size_t inputRowPitch,
108                             size_t inputDepthPitch,
109                             uint8_t *output,
110                             size_t outputRowPitch,
111                             size_t outputDepthPitch)
112{
113    LoadToNative3To4Impl<type>(context, fourthComponentBits, width, height, depth, input,
114                               inputRowPitch, inputDepthPitch, output, outputRowPitch,
115                               outputDepthPitch);
116}
117
118inline void LoadToNativeByte3To4Impl(const ImageLoadContext &context,
119                                      const uint8_t fourthValue,
120                                      size_t width,
121                                      size_t height,
122                                      size_t depth,
123                                      const uint8_t *input,
124                                      size_t inputRowPitch,
125                                      size_t inputDepthPitch,
126                                      uint8_t *output,
127                                      size_t outputRowPitch,
128                                      size_t outputDepthPitch)
129{
130    // This function is used for both signed and unsigned byte copies.
131    ASSERT(IsLittleEndian());
132    uint32_t fourthValue32 = static_cast<uint32_t>(fourthValue) << 24;
133
134    // To prevent undefined behavior, if the output address is not aligned by 4, the copy would be
135    // done using the default function instead.
136    if (reinterpret_cast<uintptr_t>(output) % 4 != 0)
137    {
138        LoadToNative3To4Impl<uint8_t>(context, fourthValue, width, height, depth, input,
139                                      inputRowPitch, inputDepthPitch, output, outputRowPitch,
140                                      outputDepthPitch);
141        return;
142    }
143
144    for (size_t z = 0; z < depth; z++)
145    {
146        for (size_t y = 0; y < height; y++)
147        {
148            const uint8_t *source8 =
149                priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
150            uint8_t *dest8 =
151                priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
152
153            // If the uint8_t addresses are not aligned to 4 bytes, there may be undefined behavior
154            // if they are used to copy 32-bit data. In that case, pixels are copied to the output
155            // one at a time until 4-byte alignment has been achieved for the source.
156            size_t pixelIndex = 0;
157
158            uint32_t source4Mod = reinterpret_cast<uintptr_t>(source8) % 4;
159            while (source4Mod != 0 && pixelIndex < width)
160            {
161                dest8[0] = source8[0];
162                dest8[1] = source8[1];
163                dest8[2] = source8[2];
164                dest8[3] = fourthValue;
165
166                source8 += 3;
167                source4Mod = (source4Mod + 3) % 4;
168                dest8 += 4;
169                pixelIndex++;
170            }
171
172            if (pixelIndex == width)
173            {
174                continue;
175            }
176
177            // In the following loop, 4 RGB pixels will be read in each iteration. If the remaining
178            // pixels are not a multiple of 4, the rest at the end of the row will be copied one at
179            // a time.
180            const uint32_t *source32 = reinterpret_cast<const uint32_t *>(source8);
181            uint32_t *dest32         = reinterpret_cast<uint32_t *>(dest8);
182
183            size_t remainingWidth = width - pixelIndex;
184            if (remainingWidth >= 4)
185            {
186                size_t fourByteCopyThreshold = remainingWidth - 4;
187                for (; pixelIndex <= fourByteCopyThreshold; pixelIndex += 4)
188                {
189                    // Three 32-bit values from the input contain 4 RGB pixels in total. This
190                    // translates to four 32-bits on the output.
191                    // (RGBR GBRG BRGB -> RGBA RGBA RGBA RGBA)
192                    uint32_t newPixelData[3];
193                    uint32_t rgbaPixelData[4];
194                    memcpy(&newPixelData[0], &source32[0], sizeof(uint32_t) * 3);
195
196                    rgbaPixelData[0] = (newPixelData[0] & 0x00FFFFFF) | fourthValue32;
197                    rgbaPixelData[1] = (newPixelData[0] >> 24) |
198                                       ((newPixelData[1] & 0x0000FFFF) << 8) | fourthValue32;
199                    rgbaPixelData[2] = (newPixelData[1] >> 16) |
200                                       ((newPixelData[2] & 0x000000FF) << 16) | fourthValue32;
201                    rgbaPixelData[3] = (newPixelData[2] >> 8) | fourthValue32;
202
203                    memcpy(&dest32[0], &rgbaPixelData[0], sizeof(uint32_t) * 4);
204
205                    source32 += 3;
206                    dest32 += 4;
207                }
208            }
209
210            // We should copy the remaining pixels at the end one by one.
211            source8 = reinterpret_cast<const uint8_t *>(source32);
212            dest8   = reinterpret_cast<uint8_t *>(dest32);
213            for (; pixelIndex < width; pixelIndex++)
214            {
215                dest8[0] = source8[0];
216                dest8[1] = source8[1];
217                dest8[2] = source8[2];
218                dest8[3] = fourthValue;
219
220                source8 += 3;
221                dest8 += 4;
222            }
223        }
224    }
225}
226
227template <>
228inline void LoadToNative3To4<uint8_t, 0xFF>(const ImageLoadContext &context,
229                                            size_t width,
230                                            size_t height,
231                                            size_t depth,
232                                            const uint8_t *input,
233                                            size_t inputRowPitch,
234                                            size_t inputDepthPitch,
235                                            uint8_t *output,
236                                            size_t outputRowPitch,
237                                            size_t outputDepthPitch)
238{
239    LoadToNativeByte3To4Impl(context, 0xFF, width, height, depth, input, inputRowPitch,
240                              inputDepthPitch, output, outputRowPitch, outputDepthPitch);
241}
242
243template <>
244inline void LoadToNative3To4<uint8_t, 0x01>(const ImageLoadContext &context,
245                                            size_t width,
246                                            size_t height,
247                                            size_t depth,
248                                            const uint8_t *input,
249                                            size_t inputRowPitch,
250                                            size_t inputDepthPitch,
251                                            uint8_t *output,
252                                            size_t outputRowPitch,
253                                            size_t outputDepthPitch)
254{
255    LoadToNativeByte3To4Impl(context, 0x01, width, height, depth, input, inputRowPitch,
256                              inputDepthPitch, output, outputRowPitch, outputDepthPitch);
257}
258
259template <>
260inline void LoadToNative3To4<int8_t, 0x01>(const ImageLoadContext &context,
261                                            size_t width,
262                                            size_t height,
263                                            size_t depth,
264                                            const uint8_t *input,
265                                            size_t inputRowPitch,
266                                            size_t inputDepthPitch,
267                                            uint8_t *output,
268                                            size_t outputRowPitch,
269                                            size_t outputDepthPitch)
270{
271    LoadToNativeByte3To4Impl(context, 0x01, width, height, depth, input, inputRowPitch,
272                              inputDepthPitch, output, outputRowPitch, outputDepthPitch);
273}
274
275template <>
276inline void LoadToNative3To4<int8_t, 0x7F>(const ImageLoadContext &context,
277                                            size_t width,
278                                            size_t height,
279                                            size_t depth,
280                                            const uint8_t *input,
281                                            size_t inputRowPitch,
282                                            size_t inputDepthPitch,
283                                            uint8_t *output,
284                                            size_t outputRowPitch,
285                                            size_t outputDepthPitch)
286{
287    LoadToNativeByte3To4Impl(context, 0x7F, width, height, depth, input, inputRowPitch,
288                              inputDepthPitch, output, outputRowPitch, outputDepthPitch);
289}
290
291template <size_t componentCount>
292inline void Load32FTo16F(const ImageLoadContext &context, size_t width, size_t height, size_t depth,
293                         const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch,
294                         uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch)
295{
296    const size_t elementWidth = componentCount * width;
297
298    for (size_t z = 0; z < depth; z++)
299    {
300        for (size_t y = 0; y < height; y++)
301        {
302            const float *source = priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
303            uint16_t *dest = priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
304
305            for (size_t x = 0; x < elementWidth; x++)
306            {
307                dest[x] = gl::float32ToFloat16(source[x]);
308            }
309        }
310    }
311}
312
313template <typename type,
314          size_t inputComponentCount,
315          size_t outputComponentCount,
316          bool normalized>
317inline void LoadToFloat(const ImageLoadContext &context, size_t width, size_t height, size_t depth,
318                         const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch,
319                         uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch) {
320    typedef std::numeric_limits<type> NL;
321
322    for (size_t z = 0; z < depth; z++)
323    {
324        for (size_t y = 0; y < height; y++)
325        {
326            const type *source_line = priv::OffsetDataPointer<type>(input, y, z, inputRowPitch, inputDepthPitch);
327            float *dest_line = priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
328
329            for (size_t x = 0; x < width; x++)
330            {
331                const type *source_pixel = source_line + x * inputComponentCount;
332                float *dest_pixel = dest_line + x * outputComponentCount;
333
334                for (size_t i = 0; i < inputComponentCount; i++)
335                {
336                    float result = 0;
337                    if (normalized)
338                    {
339                        if (NL::is_signed)
340                        {
341                            result = static_cast<float>(source_pixel[i]) / static_cast<float>(NL::max());
342                            result = result >= -1.0f ? result : -1.0f;
343                        }
344                        else
345                        {
346                            result = static_cast<float>(source_pixel[i]) / static_cast<float>(NL::max());
347                        }
348                    }
349                    else
350                    {
351                        result = static_cast<float>(source_pixel[i]);
352                    }
353                    dest_pixel[i] = result;
354                }
355
356                for (size_t j = inputComponentCount; j < outputComponentCount; j++)
357                {
358                    dest_pixel[j] = j == 3 ? 1.0f : 0.0f;
359                }
360            }
361        }
362    }
363}
364
365template <size_t blockWidth, size_t blockHeight, size_t blockDepth, size_t blockSize>
366inline void LoadCompressedToNative(const ImageLoadContext &context, size_t width, size_t height,
367                                   size_t depth, const uint8_t *input, size_t inputRowPitch,
368                                   size_t inputDepthPitch, uint8_t *output, size_t outputRowPitch,
369                                   size_t outputDepthPitch)
370{
371    const size_t columns = (width + (blockWidth - 1)) / blockWidth;
372    const size_t rows = (height + (blockHeight - 1)) / blockHeight;
373    const size_t layers = (depth + (blockDepth - 1)) / blockDepth;
374
375    const size_t inputLayerSize = inputRowPitch * rows;
376    const size_t inputImageSize = inputDepthPitch * layers;
377
378    const size_t outputLayerSize = outputRowPitch * rows;
379    const size_t outputImageSize = outputDepthPitch * layers;
380
381    if (inputImageSize == outputImageSize)
382    {
383        ASSERT(inputRowPitch == outputRowPitch);
384        ASSERT(inputLayerSize == outputLayerSize && inputLayerSize == inputDepthPitch && outputLayerSize == outputDepthPitch);
385        memcpy(output, input, inputImageSize);
386    }
387    else
388    {
389        // Note: this path should technically never be hit, but it is with the d3d backend.  Once
390        // the issue is fixed, this path should be removed.
391        // http://anglebug.com/42266773
392        for (size_t z = 0; z < layers; ++z)
393        {
394            for (size_t y = 0; y < rows; ++y)
395            {
396                const uint8_t *source = priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
397                uint8_t *dest = priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
398                memcpy(dest, source, columns * blockSize);
399            }
400        }
401    }
402}
403
404template <typename type, uint32_t firstBits, uint32_t secondBits, uint32_t thirdBits, uint32_t fourthBits>
405inline void Initialize4ComponentData(size_t width, size_t height, size_t depth,
406                                     uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch)
407{
408    type writeValues[4] =
409    {
410        gl::bitCast<type>(firstBits),
411        gl::bitCast<type>(secondBits),
412        gl::bitCast<type>(thirdBits),
413        gl::bitCast<type>(fourthBits),
414    };
415
416    for (size_t z = 0; z < depth; z++)
417    {
418        for (size_t y = 0; y < height; y++)
419        {
420            type *destRow = priv::OffsetDataPointer<type>(output, y, z, outputRowPitch, outputDepthPitch);
421            for (size_t x = 0; x < width; x++)
422            {
423                type* destPixel = destRow + x * 4;
424
425                // This could potentially be optimized by generating an entire row of initialization
426                // data and copying row by row instead of pixel by pixel.
427                memcpy(destPixel, writeValues, sizeof(type) * 4);
428            }
429        }
430    }
431}
432
433template <size_t blockWidth, size_t blockHeight>
434inline void LoadASTCToRGBA8(const ImageLoadContext &context,
435                            size_t width,
436                            size_t height,
437                            size_t depth,
438                            const uint8_t *input,
439                            size_t inputRowPitch,
440                            size_t inputDepthPitch,
441                            uint8_t *output,
442                            size_t outputRowPitch,
443                            size_t outputDepthPitch)
444{
445    LoadASTCToRGBA8Inner(context, width, height, depth, blockWidth, blockHeight, input, inputRowPitch,
446                         inputDepthPitch, output, outputRowPitch, outputDepthPitch);
447}
448
449template <uint32_t indexBits, uint32_t redBlueBits, uint32_t greenBits, uint32_t alphaBits>
450inline void LoadPalettedToRGBA8(const ImageLoadContext &context,
451                                size_t width,
452                                size_t height,
453                                size_t depth,
454                                const uint8_t *input,
455                                size_t inputRowPitch,
456                                size_t inputDepthPitch,
457                                uint8_t *output,
458                                size_t outputRowPitch,
459                                size_t outputDepthPitch)
460{
461    static_assert(indexBits == 4 || indexBits == 8);
462    static_assert(redBlueBits == 4 || redBlueBits == 5 || redBlueBits == 8);
463    static_assert(greenBits == 4 || greenBits == 5 || greenBits == 6 || greenBits == 8);
464    static_assert(alphaBits == 0 || alphaBits == 1 || alphaBits == 4 || alphaBits == 8);
465    constexpr uint32_t colorBits = 2 * redBlueBits + greenBits + alphaBits;
466    static_assert(colorBits == 16 || colorBits == 24 || colorBits == 32);
467
468    LoadPalettedToRGBA8Impl(context, width, height, depth,
469                            indexBits, redBlueBits, greenBits, alphaBits,
470                            input, inputRowPitch, inputDepthPitch,
471                            output, outputRowPitch, outputDepthPitch);
472}
473
474// Temporary overload functions; need to have no-context overloads of the following functions used
475// by Chromium.  A Chromium change will switch to the with-context overloads, and then these can be
476// removed.
477inline void LoadEACR11ToR8(size_t width,
478                           size_t height,
479                           size_t depth,
480                           const uint8_t *input,
481                           size_t inputRowPitch,
482                           size_t inputDepthPitch,
483                           uint8_t *output,
484                           size_t outputRowPitch,
485                           size_t outputDepthPitch)
486{
487    LoadEACR11ToR8({}, width, height, depth, input, inputRowPitch, inputDepthPitch, output,
488                   outputRowPitch, outputDepthPitch);
489}
490
491inline void LoadEACR11SToR8(size_t width,
492                            size_t height,
493                            size_t depth,
494                            const uint8_t *input,
495                            size_t inputRowPitch,
496                            size_t inputDepthPitch,
497                            uint8_t *output,
498                            size_t outputRowPitch,
499                            size_t outputDepthPitch)
500{
501    LoadEACR11SToR8({}, width, height, depth, input, inputRowPitch, inputDepthPitch, output,
502                    outputRowPitch, outputDepthPitch);
503}
504
505inline void LoadEACRG11ToRG8(size_t width,
506                             size_t height,
507                             size_t depth,
508                             const uint8_t *input,
509                             size_t inputRowPitch,
510                             size_t inputDepthPitch,
511                             uint8_t *output,
512                             size_t outputRowPitch,
513                             size_t outputDepthPitch)
514{
515    LoadEACRG11ToRG8({}, width, height, depth, input, inputRowPitch, inputDepthPitch, output,
516                     outputRowPitch, outputDepthPitch);
517}
518
519inline void LoadEACRG11SToRG8(size_t width,
520                              size_t height,
521                              size_t depth,
522                              const uint8_t *input,
523                              size_t inputRowPitch,
524                              size_t inputDepthPitch,
525                              uint8_t *output,
526                              size_t outputRowPitch,
527                              size_t outputDepthPitch)
528{
529    LoadEACRG11SToRG8({}, width, height, depth, input, inputRowPitch, inputDepthPitch, output,
530                      outputRowPitch, outputDepthPitch);
531}
532
533inline void LoadETC2RGB8ToRGBA8(size_t width,
534                                size_t height,
535                                size_t depth,
536                                const uint8_t *input,
537                                size_t inputRowPitch,
538                                size_t inputDepthPitch,
539                                uint8_t *output,
540                                size_t outputRowPitch,
541                                size_t outputDepthPitch)
542{
543    LoadETC2RGB8ToRGBA8({}, width, height, depth, input, inputRowPitch, inputDepthPitch, output,
544                        outputRowPitch, outputDepthPitch);
545}
546
547inline void LoadETC2SRGB8ToRGBA8(size_t width,
548                                 size_t height,
549                                 size_t depth,
550                                 const uint8_t *input,
551                                 size_t inputRowPitch,
552                                 size_t inputDepthPitch,
553                                 uint8_t *output,
554                                 size_t outputRowPitch,
555                                 size_t outputDepthPitch)
556{
557    LoadETC2SRGB8ToRGBA8({}, width, height, depth, input, inputRowPitch, inputDepthPitch, output,
558                         outputRowPitch, outputDepthPitch);
559}
560
561inline void LoadETC2RGBA8ToRGBA8(size_t width,
562                                 size_t height,
563                                 size_t depth,
564                                 const uint8_t *input,
565                                 size_t inputRowPitch,
566                                 size_t inputDepthPitch,
567                                 uint8_t *output,
568                                 size_t outputRowPitch,
569                                 size_t outputDepthPitch)
570{
571    LoadETC2RGBA8ToRGBA8({}, width, height, depth, input, inputRowPitch, inputDepthPitch, output,
572                         outputRowPitch, outputDepthPitch);
573}
574
575inline void LoadETC2RGB8A1ToRGBA8(size_t width,
576                                  size_t height,
577                                  size_t depth,
578                                  const uint8_t *input,
579                                  size_t inputRowPitch,
580                                  size_t inputDepthPitch,
581                                  uint8_t *output,
582                                  size_t outputRowPitch,
583                                  size_t outputDepthPitch)
584{
585    LoadETC2RGB8A1ToRGBA8({}, width, height, depth, input, inputRowPitch, inputDepthPitch, output,
586                          outputRowPitch, outputDepthPitch);
587}
588
589inline void LoadETC2SRGBA8ToSRGBA8(size_t width,
590                                   size_t height,
591                                   size_t depth,
592                                   const uint8_t *input,
593                                   size_t inputRowPitch,
594                                   size_t inputDepthPitch,
595                                   uint8_t *output,
596                                   size_t outputRowPitch,
597                                   size_t outputDepthPitch)
598{
599    LoadETC2SRGBA8ToSRGBA8({}, width, height, depth, input, inputRowPitch, inputDepthPitch, output,
600                           outputRowPitch, outputDepthPitch);
601}
602
603inline void LoadETC2SRGB8A1ToRGBA8(size_t width,
604                                   size_t height,
605                                   size_t depth,
606                                   const uint8_t *input,
607                                   size_t inputRowPitch,
608                                   size_t inputDepthPitch,
609                                   uint8_t *output,
610                                   size_t outputRowPitch,
611                                   size_t outputDepthPitch)
612{
613    LoadETC2SRGB8A1ToRGBA8({}, width, height, depth, input, inputRowPitch, inputDepthPitch, output,
614                           outputRowPitch, outputDepthPitch);
615}
616
617} // namespace angle
618