1// 2// Copyright 2014 The ANGLE Project Authors. All rights reserved. 3// Use of this source code is governed by a BSD-style license that can be 4// found in the LICENSE file. 5// 6 7#include "common/mathutil.h" 8 9#include <string.h> 10 11namespace angle 12{ 13 14namespace priv 15{ 16 17template <typename T> 18inline T *OffsetDataPointer(uint8_t *data, size_t y, size_t z, size_t rowPitch, size_t depthPitch) 19{ 20 return reinterpret_cast<T*>(data + (y * rowPitch) + (z * depthPitch)); 21} 22 23template <typename T> 24inline const T *OffsetDataPointer(const uint8_t *data, size_t y, size_t z, size_t rowPitch, size_t depthPitch) 25{ 26 return reinterpret_cast<const T*>(data + (y * rowPitch) + (z * depthPitch)); 27} 28 29} // namespace priv 30 31template <typename type, size_t componentCount> 32inline void LoadToNative(const ImageLoadContext &context, size_t width, size_t height, size_t depth, 33 const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch, 34 uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch) 35{ 36 const size_t rowSize = width * sizeof(type) * componentCount; 37 const size_t layerSize = rowSize * height; 38 const size_t imageSize = layerSize * depth; 39 40 if (layerSize == inputDepthPitch && layerSize == outputDepthPitch) 41 { 42 ASSERT(rowSize == inputRowPitch && rowSize == outputRowPitch); 43 memcpy(output, input, imageSize); 44 } 45 else if (rowSize == inputRowPitch && rowSize == outputRowPitch) 46 { 47 for (size_t z = 0; z < depth; z++) 48 { 49 const type *source = priv::OffsetDataPointer<type>(input, 0, z, inputRowPitch, inputDepthPitch); 50 type *dest = priv::OffsetDataPointer<type>(output, 0, z, outputRowPitch, outputDepthPitch); 51 52 memcpy(dest, source, layerSize); 53 } 54 } 55 else 56 { 57 for (size_t z = 0; z < depth; z++) 58 { 59 for (size_t y = 0; y < height; y++) 60 { 61 const type *source = priv::OffsetDataPointer<type>(input, y, z, inputRowPitch, inputDepthPitch); 62 type *dest = priv::OffsetDataPointer<type>(output, y, z, outputRowPitch, outputDepthPitch); 63 memcpy(dest, source, width * sizeof(type) * componentCount); 64 } 65 } 66 } 67} 68 69template <typename type> 70inline void LoadToNative3To4Impl(const ImageLoadContext &context, 71 const uint32_t fourthComponentBits, 72 size_t width, 73 size_t height, 74 size_t depth, 75 const uint8_t *input, 76 size_t inputRowPitch, 77 size_t inputDepthPitch, 78 uint8_t *output, 79 size_t outputRowPitch, 80 size_t outputDepthPitch) 81{ 82 const type fourthValue = gl::bitCast<type>(fourthComponentBits); 83 84 for (size_t z = 0; z < depth; z++) 85 { 86 for (size_t y = 0; y < height; y++) 87 { 88 const type *source = 89 priv::OffsetDataPointer<type>(input, y, z, inputRowPitch, inputDepthPitch); 90 type *dest = 91 priv::OffsetDataPointer<type>(output, y, z, outputRowPitch, outputDepthPitch); 92 for (size_t x = 0; x < width; x++) 93 { 94 memcpy(&dest[x * 4], &source[x * 3], sizeof(type) * 3); 95 dest[x * 4 + 3] = fourthValue; 96 } 97 } 98 } 99} 100 101template <typename type, uint32_t fourthComponentBits> 102inline void LoadToNative3To4(const ImageLoadContext &context, 103 size_t width, 104 size_t height, 105 size_t depth, 106 const uint8_t *input, 107 size_t inputRowPitch, 108 size_t inputDepthPitch, 109 uint8_t *output, 110 size_t outputRowPitch, 111 size_t outputDepthPitch) 112{ 113 LoadToNative3To4Impl<type>(context, fourthComponentBits, width, height, depth, input, 114 inputRowPitch, inputDepthPitch, output, outputRowPitch, 115 outputDepthPitch); 116} 117 118inline void LoadToNativeByte3To4Impl(const ImageLoadContext &context, 119 const uint8_t fourthValue, 120 size_t width, 121 size_t height, 122 size_t depth, 123 const uint8_t *input, 124 size_t inputRowPitch, 125 size_t inputDepthPitch, 126 uint8_t *output, 127 size_t outputRowPitch, 128 size_t outputDepthPitch) 129{ 130 // This function is used for both signed and unsigned byte copies. 131 ASSERT(IsLittleEndian()); 132 uint32_t fourthValue32 = static_cast<uint32_t>(fourthValue) << 24; 133 134 // To prevent undefined behavior, if the output address is not aligned by 4, the copy would be 135 // done using the default function instead. 136 if (reinterpret_cast<uintptr_t>(output) % 4 != 0) 137 { 138 LoadToNative3To4Impl<uint8_t>(context, fourthValue, width, height, depth, input, 139 inputRowPitch, inputDepthPitch, output, outputRowPitch, 140 outputDepthPitch); 141 return; 142 } 143 144 for (size_t z = 0; z < depth; z++) 145 { 146 for (size_t y = 0; y < height; y++) 147 { 148 const uint8_t *source8 = 149 priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch); 150 uint8_t *dest8 = 151 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch); 152 153 // If the uint8_t addresses are not aligned to 4 bytes, there may be undefined behavior 154 // if they are used to copy 32-bit data. In that case, pixels are copied to the output 155 // one at a time until 4-byte alignment has been achieved for the source. 156 size_t pixelIndex = 0; 157 158 uint32_t source4Mod = reinterpret_cast<uintptr_t>(source8) % 4; 159 while (source4Mod != 0 && pixelIndex < width) 160 { 161 dest8[0] = source8[0]; 162 dest8[1] = source8[1]; 163 dest8[2] = source8[2]; 164 dest8[3] = fourthValue; 165 166 source8 += 3; 167 source4Mod = (source4Mod + 3) % 4; 168 dest8 += 4; 169 pixelIndex++; 170 } 171 172 if (pixelIndex == width) 173 { 174 continue; 175 } 176 177 // In the following loop, 4 RGB pixels will be read in each iteration. If the remaining 178 // pixels are not a multiple of 4, the rest at the end of the row will be copied one at 179 // a time. 180 const uint32_t *source32 = reinterpret_cast<const uint32_t *>(source8); 181 uint32_t *dest32 = reinterpret_cast<uint32_t *>(dest8); 182 183 size_t remainingWidth = width - pixelIndex; 184 if (remainingWidth >= 4) 185 { 186 size_t fourByteCopyThreshold = remainingWidth - 4; 187 for (; pixelIndex <= fourByteCopyThreshold; pixelIndex += 4) 188 { 189 // Three 32-bit values from the input contain 4 RGB pixels in total. This 190 // translates to four 32-bits on the output. 191 // (RGBR GBRG BRGB -> RGBA RGBA RGBA RGBA) 192 uint32_t newPixelData[3]; 193 uint32_t rgbaPixelData[4]; 194 memcpy(&newPixelData[0], &source32[0], sizeof(uint32_t) * 3); 195 196 rgbaPixelData[0] = (newPixelData[0] & 0x00FFFFFF) | fourthValue32; 197 rgbaPixelData[1] = (newPixelData[0] >> 24) | 198 ((newPixelData[1] & 0x0000FFFF) << 8) | fourthValue32; 199 rgbaPixelData[2] = (newPixelData[1] >> 16) | 200 ((newPixelData[2] & 0x000000FF) << 16) | fourthValue32; 201 rgbaPixelData[3] = (newPixelData[2] >> 8) | fourthValue32; 202 203 memcpy(&dest32[0], &rgbaPixelData[0], sizeof(uint32_t) * 4); 204 205 source32 += 3; 206 dest32 += 4; 207 } 208 } 209 210 // We should copy the remaining pixels at the end one by one. 211 source8 = reinterpret_cast<const uint8_t *>(source32); 212 dest8 = reinterpret_cast<uint8_t *>(dest32); 213 for (; pixelIndex < width; pixelIndex++) 214 { 215 dest8[0] = source8[0]; 216 dest8[1] = source8[1]; 217 dest8[2] = source8[2]; 218 dest8[3] = fourthValue; 219 220 source8 += 3; 221 dest8 += 4; 222 } 223 } 224 } 225} 226 227template <> 228inline void LoadToNative3To4<uint8_t, 0xFF>(const ImageLoadContext &context, 229 size_t width, 230 size_t height, 231 size_t depth, 232 const uint8_t *input, 233 size_t inputRowPitch, 234 size_t inputDepthPitch, 235 uint8_t *output, 236 size_t outputRowPitch, 237 size_t outputDepthPitch) 238{ 239 LoadToNativeByte3To4Impl(context, 0xFF, width, height, depth, input, inputRowPitch, 240 inputDepthPitch, output, outputRowPitch, outputDepthPitch); 241} 242 243template <> 244inline void LoadToNative3To4<uint8_t, 0x01>(const ImageLoadContext &context, 245 size_t width, 246 size_t height, 247 size_t depth, 248 const uint8_t *input, 249 size_t inputRowPitch, 250 size_t inputDepthPitch, 251 uint8_t *output, 252 size_t outputRowPitch, 253 size_t outputDepthPitch) 254{ 255 LoadToNativeByte3To4Impl(context, 0x01, width, height, depth, input, inputRowPitch, 256 inputDepthPitch, output, outputRowPitch, outputDepthPitch); 257} 258 259template <> 260inline void LoadToNative3To4<int8_t, 0x01>(const ImageLoadContext &context, 261 size_t width, 262 size_t height, 263 size_t depth, 264 const uint8_t *input, 265 size_t inputRowPitch, 266 size_t inputDepthPitch, 267 uint8_t *output, 268 size_t outputRowPitch, 269 size_t outputDepthPitch) 270{ 271 LoadToNativeByte3To4Impl(context, 0x01, width, height, depth, input, inputRowPitch, 272 inputDepthPitch, output, outputRowPitch, outputDepthPitch); 273} 274 275template <> 276inline void LoadToNative3To4<int8_t, 0x7F>(const ImageLoadContext &context, 277 size_t width, 278 size_t height, 279 size_t depth, 280 const uint8_t *input, 281 size_t inputRowPitch, 282 size_t inputDepthPitch, 283 uint8_t *output, 284 size_t outputRowPitch, 285 size_t outputDepthPitch) 286{ 287 LoadToNativeByte3To4Impl(context, 0x7F, width, height, depth, input, inputRowPitch, 288 inputDepthPitch, output, outputRowPitch, outputDepthPitch); 289} 290 291template <size_t componentCount> 292inline void Load32FTo16F(const ImageLoadContext &context, size_t width, size_t height, size_t depth, 293 const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch, 294 uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch) 295{ 296 const size_t elementWidth = componentCount * width; 297 298 for (size_t z = 0; z < depth; z++) 299 { 300 for (size_t y = 0; y < height; y++) 301 { 302 const float *source = priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch); 303 uint16_t *dest = priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch); 304 305 for (size_t x = 0; x < elementWidth; x++) 306 { 307 dest[x] = gl::float32ToFloat16(source[x]); 308 } 309 } 310 } 311} 312 313template <typename type, 314 size_t inputComponentCount, 315 size_t outputComponentCount, 316 bool normalized> 317inline void LoadToFloat(const ImageLoadContext &context, size_t width, size_t height, size_t depth, 318 const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch, 319 uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch) { 320 typedef std::numeric_limits<type> NL; 321 322 for (size_t z = 0; z < depth; z++) 323 { 324 for (size_t y = 0; y < height; y++) 325 { 326 const type *source_line = priv::OffsetDataPointer<type>(input, y, z, inputRowPitch, inputDepthPitch); 327 float *dest_line = priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch); 328 329 for (size_t x = 0; x < width; x++) 330 { 331 const type *source_pixel = source_line + x * inputComponentCount; 332 float *dest_pixel = dest_line + x * outputComponentCount; 333 334 for (size_t i = 0; i < inputComponentCount; i++) 335 { 336 float result = 0; 337 if (normalized) 338 { 339 if (NL::is_signed) 340 { 341 result = static_cast<float>(source_pixel[i]) / static_cast<float>(NL::max()); 342 result = result >= -1.0f ? result : -1.0f; 343 } 344 else 345 { 346 result = static_cast<float>(source_pixel[i]) / static_cast<float>(NL::max()); 347 } 348 } 349 else 350 { 351 result = static_cast<float>(source_pixel[i]); 352 } 353 dest_pixel[i] = result; 354 } 355 356 for (size_t j = inputComponentCount; j < outputComponentCount; j++) 357 { 358 dest_pixel[j] = j == 3 ? 1.0f : 0.0f; 359 } 360 } 361 } 362 } 363} 364 365template <size_t blockWidth, size_t blockHeight, size_t blockDepth, size_t blockSize> 366inline void LoadCompressedToNative(const ImageLoadContext &context, size_t width, size_t height, 367 size_t depth, const uint8_t *input, size_t inputRowPitch, 368 size_t inputDepthPitch, uint8_t *output, size_t outputRowPitch, 369 size_t outputDepthPitch) 370{ 371 const size_t columns = (width + (blockWidth - 1)) / blockWidth; 372 const size_t rows = (height + (blockHeight - 1)) / blockHeight; 373 const size_t layers = (depth + (blockDepth - 1)) / blockDepth; 374 375 const size_t inputLayerSize = inputRowPitch * rows; 376 const size_t inputImageSize = inputDepthPitch * layers; 377 378 const size_t outputLayerSize = outputRowPitch * rows; 379 const size_t outputImageSize = outputDepthPitch * layers; 380 381 if (inputImageSize == outputImageSize) 382 { 383 ASSERT(inputRowPitch == outputRowPitch); 384 ASSERT(inputLayerSize == outputLayerSize && inputLayerSize == inputDepthPitch && outputLayerSize == outputDepthPitch); 385 memcpy(output, input, inputImageSize); 386 } 387 else 388 { 389 // Note: this path should technically never be hit, but it is with the d3d backend. Once 390 // the issue is fixed, this path should be removed. 391 // http://anglebug.com/42266773 392 for (size_t z = 0; z < layers; ++z) 393 { 394 for (size_t y = 0; y < rows; ++y) 395 { 396 const uint8_t *source = priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch); 397 uint8_t *dest = priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch); 398 memcpy(dest, source, columns * blockSize); 399 } 400 } 401 } 402} 403 404template <typename type, uint32_t firstBits, uint32_t secondBits, uint32_t thirdBits, uint32_t fourthBits> 405inline void Initialize4ComponentData(size_t width, size_t height, size_t depth, 406 uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch) 407{ 408 type writeValues[4] = 409 { 410 gl::bitCast<type>(firstBits), 411 gl::bitCast<type>(secondBits), 412 gl::bitCast<type>(thirdBits), 413 gl::bitCast<type>(fourthBits), 414 }; 415 416 for (size_t z = 0; z < depth; z++) 417 { 418 for (size_t y = 0; y < height; y++) 419 { 420 type *destRow = priv::OffsetDataPointer<type>(output, y, z, outputRowPitch, outputDepthPitch); 421 for (size_t x = 0; x < width; x++) 422 { 423 type* destPixel = destRow + x * 4; 424 425 // This could potentially be optimized by generating an entire row of initialization 426 // data and copying row by row instead of pixel by pixel. 427 memcpy(destPixel, writeValues, sizeof(type) * 4); 428 } 429 } 430 } 431} 432 433template <size_t blockWidth, size_t blockHeight> 434inline void LoadASTCToRGBA8(const ImageLoadContext &context, 435 size_t width, 436 size_t height, 437 size_t depth, 438 const uint8_t *input, 439 size_t inputRowPitch, 440 size_t inputDepthPitch, 441 uint8_t *output, 442 size_t outputRowPitch, 443 size_t outputDepthPitch) 444{ 445 LoadASTCToRGBA8Inner(context, width, height, depth, blockWidth, blockHeight, input, inputRowPitch, 446 inputDepthPitch, output, outputRowPitch, outputDepthPitch); 447} 448 449template <uint32_t indexBits, uint32_t redBlueBits, uint32_t greenBits, uint32_t alphaBits> 450inline void LoadPalettedToRGBA8(const ImageLoadContext &context, 451 size_t width, 452 size_t height, 453 size_t depth, 454 const uint8_t *input, 455 size_t inputRowPitch, 456 size_t inputDepthPitch, 457 uint8_t *output, 458 size_t outputRowPitch, 459 size_t outputDepthPitch) 460{ 461 static_assert(indexBits == 4 || indexBits == 8); 462 static_assert(redBlueBits == 4 || redBlueBits == 5 || redBlueBits == 8); 463 static_assert(greenBits == 4 || greenBits == 5 || greenBits == 6 || greenBits == 8); 464 static_assert(alphaBits == 0 || alphaBits == 1 || alphaBits == 4 || alphaBits == 8); 465 constexpr uint32_t colorBits = 2 * redBlueBits + greenBits + alphaBits; 466 static_assert(colorBits == 16 || colorBits == 24 || colorBits == 32); 467 468 LoadPalettedToRGBA8Impl(context, width, height, depth, 469 indexBits, redBlueBits, greenBits, alphaBits, 470 input, inputRowPitch, inputDepthPitch, 471 output, outputRowPitch, outputDepthPitch); 472} 473 474// Temporary overload functions; need to have no-context overloads of the following functions used 475// by Chromium. A Chromium change will switch to the with-context overloads, and then these can be 476// removed. 477inline void LoadEACR11ToR8(size_t width, 478 size_t height, 479 size_t depth, 480 const uint8_t *input, 481 size_t inputRowPitch, 482 size_t inputDepthPitch, 483 uint8_t *output, 484 size_t outputRowPitch, 485 size_t outputDepthPitch) 486{ 487 LoadEACR11ToR8({}, width, height, depth, input, inputRowPitch, inputDepthPitch, output, 488 outputRowPitch, outputDepthPitch); 489} 490 491inline void LoadEACR11SToR8(size_t width, 492 size_t height, 493 size_t depth, 494 const uint8_t *input, 495 size_t inputRowPitch, 496 size_t inputDepthPitch, 497 uint8_t *output, 498 size_t outputRowPitch, 499 size_t outputDepthPitch) 500{ 501 LoadEACR11SToR8({}, width, height, depth, input, inputRowPitch, inputDepthPitch, output, 502 outputRowPitch, outputDepthPitch); 503} 504 505inline void LoadEACRG11ToRG8(size_t width, 506 size_t height, 507 size_t depth, 508 const uint8_t *input, 509 size_t inputRowPitch, 510 size_t inputDepthPitch, 511 uint8_t *output, 512 size_t outputRowPitch, 513 size_t outputDepthPitch) 514{ 515 LoadEACRG11ToRG8({}, width, height, depth, input, inputRowPitch, inputDepthPitch, output, 516 outputRowPitch, outputDepthPitch); 517} 518 519inline void LoadEACRG11SToRG8(size_t width, 520 size_t height, 521 size_t depth, 522 const uint8_t *input, 523 size_t inputRowPitch, 524 size_t inputDepthPitch, 525 uint8_t *output, 526 size_t outputRowPitch, 527 size_t outputDepthPitch) 528{ 529 LoadEACRG11SToRG8({}, width, height, depth, input, inputRowPitch, inputDepthPitch, output, 530 outputRowPitch, outputDepthPitch); 531} 532 533inline void LoadETC2RGB8ToRGBA8(size_t width, 534 size_t height, 535 size_t depth, 536 const uint8_t *input, 537 size_t inputRowPitch, 538 size_t inputDepthPitch, 539 uint8_t *output, 540 size_t outputRowPitch, 541 size_t outputDepthPitch) 542{ 543 LoadETC2RGB8ToRGBA8({}, width, height, depth, input, inputRowPitch, inputDepthPitch, output, 544 outputRowPitch, outputDepthPitch); 545} 546 547inline void LoadETC2SRGB8ToRGBA8(size_t width, 548 size_t height, 549 size_t depth, 550 const uint8_t *input, 551 size_t inputRowPitch, 552 size_t inputDepthPitch, 553 uint8_t *output, 554 size_t outputRowPitch, 555 size_t outputDepthPitch) 556{ 557 LoadETC2SRGB8ToRGBA8({}, width, height, depth, input, inputRowPitch, inputDepthPitch, output, 558 outputRowPitch, outputDepthPitch); 559} 560 561inline void LoadETC2RGBA8ToRGBA8(size_t width, 562 size_t height, 563 size_t depth, 564 const uint8_t *input, 565 size_t inputRowPitch, 566 size_t inputDepthPitch, 567 uint8_t *output, 568 size_t outputRowPitch, 569 size_t outputDepthPitch) 570{ 571 LoadETC2RGBA8ToRGBA8({}, width, height, depth, input, inputRowPitch, inputDepthPitch, output, 572 outputRowPitch, outputDepthPitch); 573} 574 575inline void LoadETC2RGB8A1ToRGBA8(size_t width, 576 size_t height, 577 size_t depth, 578 const uint8_t *input, 579 size_t inputRowPitch, 580 size_t inputDepthPitch, 581 uint8_t *output, 582 size_t outputRowPitch, 583 size_t outputDepthPitch) 584{ 585 LoadETC2RGB8A1ToRGBA8({}, width, height, depth, input, inputRowPitch, inputDepthPitch, output, 586 outputRowPitch, outputDepthPitch); 587} 588 589inline void LoadETC2SRGBA8ToSRGBA8(size_t width, 590 size_t height, 591 size_t depth, 592 const uint8_t *input, 593 size_t inputRowPitch, 594 size_t inputDepthPitch, 595 uint8_t *output, 596 size_t outputRowPitch, 597 size_t outputDepthPitch) 598{ 599 LoadETC2SRGBA8ToSRGBA8({}, width, height, depth, input, inputRowPitch, inputDepthPitch, output, 600 outputRowPitch, outputDepthPitch); 601} 602 603inline void LoadETC2SRGB8A1ToRGBA8(size_t width, 604 size_t height, 605 size_t depth, 606 const uint8_t *input, 607 size_t inputRowPitch, 608 size_t inputDepthPitch, 609 uint8_t *output, 610 size_t outputRowPitch, 611 size_t outputDepthPitch) 612{ 613 LoadETC2SRGB8A1ToRGBA8({}, width, height, depth, input, inputRowPitch, inputDepthPitch, output, 614 outputRowPitch, outputDepthPitch); 615} 616 617} // namespace angle 618