1 //
2 // Copyright 2002 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6
7 // mathutil.h: Math and bit manipulation functions.
8
9 #ifndef COMMON_MATHUTIL_H_
10 #define COMMON_MATHUTIL_H_
11
12 #include <math.h>
13 #include <stdint.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <algorithm>
17 #include <limits>
18
19 #include <anglebase/numerics/safe_math.h>
20
21 #include "common/debug.h"
22 #include "common/platform.h"
23
24 namespace angle
25 {
26 using base::CheckedNumeric;
27 using base::IsValueInRangeForNumericType;
28 } // namespace angle
29
30 namespace gl
31 {
32
33 const unsigned int Float32One = 0x3F800000;
34 const unsigned short Float16One = 0x3C00;
35
36 template <typename T>
isPow2(T x)37 inline constexpr bool isPow2(T x)
38 {
39 static_assert(std::is_integral<T>::value, "isPow2 must be called on an integer type.");
40 return (x & (x - 1)) == 0 && (x != 0);
41 }
42
43 template <typename T>
log2(T x)44 inline int log2(T x)
45 {
46 static_assert(std::is_integral<T>::value, "log2 must be called on an integer type.");
47 int r = 0;
48 while ((x >> r) > 1)
49 r++;
50 return r;
51 }
52
ceilPow2(unsigned int x)53 inline unsigned int ceilPow2(unsigned int x)
54 {
55 if (x != 0)
56 x--;
57 x |= x >> 1;
58 x |= x >> 2;
59 x |= x >> 4;
60 x |= x >> 8;
61 x |= x >> 16;
62 x++;
63
64 return x;
65 }
66
67 template <typename DestT, typename SrcT>
clampCast(SrcT value)68 inline DestT clampCast(SrcT value)
69 {
70 // For floating-point types with denormalization, min returns the minimum positive normalized
71 // value. To find the value that has no values less than it, use numeric_limits::lowest.
72 constexpr const long double destLo =
73 static_cast<long double>(std::numeric_limits<DestT>::lowest());
74 constexpr const long double destHi =
75 static_cast<long double>(std::numeric_limits<DestT>::max());
76 constexpr const long double srcLo =
77 static_cast<long double>(std::numeric_limits<SrcT>::lowest());
78 constexpr long double srcHi = static_cast<long double>(std::numeric_limits<SrcT>::max());
79
80 if (destHi < srcHi)
81 {
82 DestT destMax = std::numeric_limits<DestT>::max();
83 if (value >= static_cast<SrcT>(destMax))
84 {
85 return destMax;
86 }
87 }
88
89 if (destLo > srcLo)
90 {
91 DestT destLow = std::numeric_limits<DestT>::lowest();
92 if (value <= static_cast<SrcT>(destLow))
93 {
94 return destLow;
95 }
96 }
97
98 return static_cast<DestT>(value);
99 }
100
101 // Specialize clampCast for bool->int conversion to avoid MSVS 2015 performance warning when the max
102 // value is casted to the source type.
103 template <>
clampCast(bool value)104 inline unsigned int clampCast(bool value)
105 {
106 return static_cast<unsigned int>(value);
107 }
108
109 template <>
clampCast(bool value)110 inline int clampCast(bool value)
111 {
112 return static_cast<int>(value);
113 }
114
115 template <typename T, typename MIN, typename MAX>
clamp(T x,MIN min,MAX max)116 inline T clamp(T x, MIN min, MAX max)
117 {
118 // Since NaNs fail all comparison tests, a NaN value will default to min
119 return x > min ? (x > max ? max : x) : min;
120 }
121
122 template <typename T>
clampForBitCount(T value,size_t bitCount)123 T clampForBitCount(T value, size_t bitCount)
124 {
125 static_assert(std::numeric_limits<T>::is_integer, "T must be an integer.");
126
127 if (bitCount == 0)
128 {
129 constexpr T kZero = 0;
130 return kZero;
131 }
132 ASSERT(bitCount <= sizeof(T) * 8);
133
134 constexpr bool kIsSigned = std::numeric_limits<T>::is_signed;
135 ASSERT((bitCount > 1) || !kIsSigned);
136
137 T min = 0;
138 T max = 0;
139 if (bitCount == sizeof(T) * 8)
140 {
141 min = std::numeric_limits<T>::min();
142 max = std::numeric_limits<T>::max();
143 }
144 else
145 {
146 constexpr T kOne = 1;
147 min = (kIsSigned) ? -1 * (kOne << (bitCount - 1)) : 0;
148 max = (kIsSigned) ? (kOne << (bitCount - 1)) - 1 : (kOne << bitCount) - 1;
149 }
150
151 return gl::clamp(value, min, max);
152 }
153
clamp01(float x)154 inline float clamp01(float x)
155 {
156 return clamp(x, 0.0f, 1.0f);
157 }
158
159 template <const int n>
unorm(float x)160 inline unsigned int unorm(float x)
161 {
162 const unsigned int max = 0xFFFFFFFF >> (32 - n);
163
164 if (x > 1)
165 {
166 return max;
167 }
168 else if (x < 0)
169 {
170 return 0;
171 }
172 else
173 {
174 return (unsigned int)(max * x + 0.5f);
175 }
176 }
177
178 template <typename destType, typename sourceType>
bitCast(const sourceType & source)179 destType bitCast(const sourceType &source)
180 {
181 size_t copySize = std::min(sizeof(destType), sizeof(sourceType));
182 destType output;
183 memcpy(&output, &source, copySize);
184 return output;
185 }
186
187 template <typename DestT, typename SrcT>
unsafe_int_to_pointer_cast(SrcT src)188 DestT unsafe_int_to_pointer_cast(SrcT src)
189 {
190 return reinterpret_cast<DestT>(static_cast<uintptr_t>(src));
191 }
192
193 template <typename DestT, typename SrcT>
unsafe_pointer_to_int_cast(SrcT src)194 DestT unsafe_pointer_to_int_cast(SrcT src)
195 {
196 return static_cast<DestT>(reinterpret_cast<uintptr_t>(src));
197 }
198
199 // https://stackoverflow.com/a/37581284
200 template <typename T>
normalize(T value)201 static constexpr double normalize(T value)
202 {
203 return value < 0 ? -static_cast<double>(value) / std::numeric_limits<T>::min()
204 : static_cast<double>(value) / std::numeric_limits<T>::max();
205 }
206
float32ToFloat16(float fp32)207 inline unsigned short float32ToFloat16(float fp32)
208 {
209 unsigned int fp32i = bitCast<unsigned int>(fp32);
210 unsigned int sign = (fp32i & 0x80000000) >> 16;
211 unsigned int abs = fp32i & 0x7FFFFFFF;
212
213 if (abs > 0x7F800000)
214 { // NaN
215 return 0x7FFF;
216 }
217 else if (abs > 0x47FFEFFF)
218 { // Infinity
219 return static_cast<uint16_t>(sign | 0x7C00);
220 }
221 else if (abs < 0x38800000) // Denormal
222 {
223 unsigned int mantissa = (abs & 0x007FFFFF) | 0x00800000;
224 int e = 113 - (abs >> 23);
225
226 if (e < 24)
227 {
228 abs = mantissa >> e;
229 }
230 else
231 {
232 abs = 0;
233 }
234
235 return static_cast<unsigned short>(sign | (abs + 0x00000FFF + ((abs >> 13) & 1)) >> 13);
236 }
237 else
238 {
239 return static_cast<unsigned short>(
240 sign | (abs + 0xC8000000 + 0x00000FFF + ((abs >> 13) & 1)) >> 13);
241 }
242 }
243
244 float float16ToFloat32(unsigned short h);
245
246 unsigned int convertRGBFloatsTo999E5(float red, float green, float blue);
247 void convert999E5toRGBFloats(unsigned int input, float *red, float *green, float *blue);
248
float32ToFloat11(float fp32)249 inline unsigned short float32ToFloat11(float fp32)
250 {
251 const unsigned int float32MantissaMask = 0x7FFFFF;
252 const unsigned int float32ExponentMask = 0x7F800000;
253 const unsigned int float32SignMask = 0x80000000;
254 const unsigned int float32ValueMask = ~float32SignMask;
255 const unsigned int float32ExponentFirstBit = 23;
256 const unsigned int float32ExponentBias = 127;
257
258 const unsigned short float11Max = 0x7BF;
259 const unsigned short float11MantissaMask = 0x3F;
260 const unsigned short float11ExponentMask = 0x7C0;
261 const unsigned short float11BitMask = 0x7FF;
262 const unsigned int float11ExponentBias = 14;
263
264 const unsigned int float32Maxfloat11 = 0x477E0000;
265 const unsigned int float32MinNormfloat11 = 0x38800000;
266 const unsigned int float32MinDenormfloat11 = 0x35000080;
267
268 const unsigned int float32Bits = bitCast<unsigned int>(fp32);
269 const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask;
270
271 unsigned int float32Val = float32Bits & float32ValueMask;
272
273 if ((float32Val & float32ExponentMask) == float32ExponentMask)
274 {
275 // INF or NAN
276 if ((float32Val & float32MantissaMask) != 0)
277 {
278 return float11ExponentMask |
279 (((float32Val >> 17) | (float32Val >> 11) | (float32Val >> 6) | (float32Val)) &
280 float11MantissaMask);
281 }
282 else if (float32Sign)
283 {
284 // -INF is clamped to 0 since float11 is positive only
285 return 0;
286 }
287 else
288 {
289 return float11ExponentMask;
290 }
291 }
292 else if (float32Sign)
293 {
294 // float11 is positive only, so clamp to zero
295 return 0;
296 }
297 else if (float32Val > float32Maxfloat11)
298 {
299 // The number is too large to be represented as a float11, set to max
300 return float11Max;
301 }
302 else if (float32Val < float32MinDenormfloat11)
303 {
304 // The number is too small to be represented as a denormalized float11, set to 0
305 return 0;
306 }
307 else
308 {
309 if (float32Val < float32MinNormfloat11)
310 {
311 // The number is too small to be represented as a normalized float11
312 // Convert it to a denormalized value.
313 const unsigned int shift = (float32ExponentBias - float11ExponentBias) -
314 (float32Val >> float32ExponentFirstBit);
315 ASSERT(shift < 32);
316 float32Val =
317 ((1 << float32ExponentFirstBit) | (float32Val & float32MantissaMask)) >> shift;
318 }
319 else
320 {
321 // Rebias the exponent to represent the value as a normalized float11
322 float32Val += 0xC8000000;
323 }
324
325 return ((float32Val + 0xFFFF + ((float32Val >> 17) & 1)) >> 17) & float11BitMask;
326 }
327 }
328
float32ToFloat10(float fp32)329 inline unsigned short float32ToFloat10(float fp32)
330 {
331 const unsigned int float32MantissaMask = 0x7FFFFF;
332 const unsigned int float32ExponentMask = 0x7F800000;
333 const unsigned int float32SignMask = 0x80000000;
334 const unsigned int float32ValueMask = ~float32SignMask;
335 const unsigned int float32ExponentFirstBit = 23;
336 const unsigned int float32ExponentBias = 127;
337
338 const unsigned short float10Max = 0x3DF;
339 const unsigned short float10MantissaMask = 0x1F;
340 const unsigned short float10ExponentMask = 0x3E0;
341 const unsigned short float10BitMask = 0x3FF;
342 const unsigned int float10ExponentBias = 14;
343
344 const unsigned int float32Maxfloat10 = 0x477C0000;
345 const unsigned int float32MinNormfloat10 = 0x38800000;
346 const unsigned int float32MinDenormfloat10 = 0x35800040;
347
348 const unsigned int float32Bits = bitCast<unsigned int>(fp32);
349 const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask;
350
351 unsigned int float32Val = float32Bits & float32ValueMask;
352
353 if ((float32Val & float32ExponentMask) == float32ExponentMask)
354 {
355 // INF or NAN
356 if ((float32Val & float32MantissaMask) != 0)
357 {
358 return float10ExponentMask |
359 (((float32Val >> 18) | (float32Val >> 13) | (float32Val >> 3) | (float32Val)) &
360 float10MantissaMask);
361 }
362 else if (float32Sign)
363 {
364 // -INF is clamped to 0 since float10 is positive only
365 return 0;
366 }
367 else
368 {
369 return float10ExponentMask;
370 }
371 }
372 else if (float32Sign)
373 {
374 // float10 is positive only, so clamp to zero
375 return 0;
376 }
377 else if (float32Val > float32Maxfloat10)
378 {
379 // The number is too large to be represented as a float10, set to max
380 return float10Max;
381 }
382 else if (float32Val < float32MinDenormfloat10)
383 {
384 // The number is too small to be represented as a denormalized float10, set to 0
385 return 0;
386 }
387 else
388 {
389 if (float32Val < float32MinNormfloat10)
390 {
391 // The number is too small to be represented as a normalized float10
392 // Convert it to a denormalized value.
393 const unsigned int shift = (float32ExponentBias - float10ExponentBias) -
394 (float32Val >> float32ExponentFirstBit);
395 ASSERT(shift < 32);
396 float32Val =
397 ((1 << float32ExponentFirstBit) | (float32Val & float32MantissaMask)) >> shift;
398 }
399 else
400 {
401 // Rebias the exponent to represent the value as a normalized float10
402 float32Val += 0xC8000000;
403 }
404
405 return ((float32Val + 0x1FFFF + ((float32Val >> 18) & 1)) >> 18) & float10BitMask;
406 }
407 }
408
float11ToFloat32(unsigned short fp11)409 inline float float11ToFloat32(unsigned short fp11)
410 {
411 unsigned short exponent = (fp11 >> 6) & 0x1F;
412 unsigned short mantissa = fp11 & 0x3F;
413
414 if (exponent == 0x1F)
415 {
416 // INF or NAN
417 return bitCast<float>(0x7f800000 | (mantissa << 17));
418 }
419 else
420 {
421 if (exponent != 0)
422 {
423 // normalized
424 }
425 else if (mantissa != 0)
426 {
427 // The value is denormalized
428 exponent = 1;
429
430 do
431 {
432 exponent--;
433 mantissa <<= 1;
434 } while ((mantissa & 0x40) == 0);
435
436 mantissa = mantissa & 0x3F;
437 }
438 else // The value is zero
439 {
440 exponent = static_cast<unsigned short>(-112);
441 }
442
443 return bitCast<float>(((exponent + 112) << 23) | (mantissa << 17));
444 }
445 }
446
float10ToFloat32(unsigned short fp10)447 inline float float10ToFloat32(unsigned short fp10)
448 {
449 unsigned short exponent = (fp10 >> 5) & 0x1F;
450 unsigned short mantissa = fp10 & 0x1F;
451
452 if (exponent == 0x1F)
453 {
454 // INF or NAN
455 return bitCast<float>(0x7f800000 | (mantissa << 17));
456 }
457 else
458 {
459 if (exponent != 0)
460 {
461 // normalized
462 }
463 else if (mantissa != 0)
464 {
465 // The value is denormalized
466 exponent = 1;
467
468 do
469 {
470 exponent--;
471 mantissa <<= 1;
472 } while ((mantissa & 0x20) == 0);
473
474 mantissa = mantissa & 0x1F;
475 }
476 else // The value is zero
477 {
478 exponent = static_cast<unsigned short>(-112);
479 }
480
481 return bitCast<float>(((exponent + 112) << 23) | (mantissa << 18));
482 }
483 }
484
485 // Converts to and from float and 16.16 fixed point format.
ConvertFixedToFloat(int32_t fixedInput)486 inline float ConvertFixedToFloat(int32_t fixedInput)
487 {
488 return static_cast<float>(fixedInput) / 65536.0f;
489 }
490
ConvertFloatToFixed(float floatInput)491 inline uint32_t ConvertFloatToFixed(float floatInput)
492 {
493 static constexpr uint32_t kHighest = 32767 * 65536 + 65535;
494 static constexpr uint32_t kLowest = static_cast<uint32_t>(-32768 * 65536 + 65535);
495
496 if (floatInput > 32767.65535)
497 {
498 return kHighest;
499 }
500 else if (floatInput < -32768.65535)
501 {
502 return kLowest;
503 }
504 else
505 {
506 return static_cast<uint32_t>(floatInput * 65536);
507 }
508 }
509
510 template <typename T>
normalizedToFloat(T input)511 inline float normalizedToFloat(T input)
512 {
513 static_assert(std::numeric_limits<T>::is_integer, "T must be an integer.");
514
515 if constexpr (sizeof(T) > 2)
516 {
517 // float has only a 23 bit mantissa, so we need to do the calculation in double precision
518 constexpr double inverseMax = 1.0 / std::numeric_limits<T>::max();
519 return static_cast<float>(input * inverseMax);
520 }
521 else
522 {
523 constexpr float inverseMax = 1.0f / std::numeric_limits<T>::max();
524 return input * inverseMax;
525 }
526 }
527
528 template <unsigned int inputBitCount, typename T>
normalizedToFloat(T input)529 inline float normalizedToFloat(T input)
530 {
531 static_assert(std::numeric_limits<T>::is_integer, "T must be an integer.");
532 static_assert(inputBitCount < (sizeof(T) * 8), "T must have more bits than inputBitCount.");
533 ASSERT((input & ~((1 << inputBitCount) - 1)) == 0);
534
535 if (inputBitCount > 23)
536 {
537 // float has only a 23 bit mantissa, so we need to do the calculation in double precision
538 constexpr double inverseMax = 1.0 / ((1 << inputBitCount) - 1);
539 return static_cast<float>(input * inverseMax);
540 }
541 else
542 {
543 constexpr float inverseMax = 1.0f / ((1 << inputBitCount) - 1);
544 return input * inverseMax;
545 }
546 }
547
548 template <typename T, typename R>
roundToNearest(T input)549 inline R roundToNearest(T input)
550 {
551 static_assert(std::is_floating_point<T>::value);
552 static_assert(std::numeric_limits<R>::is_integer);
553 #if defined(__aarch64__) || defined(_M_ARM64)
554 // On armv8, this expression is compiled to a dedicated round-to-nearest instruction
555 return static_cast<R>(std::round(input));
556 #else
557 static_assert(0.49999997f < 0.5f);
558 static_assert(0.49999997f + 0.5f == 1.0f);
559 static_assert(0.49999999999999994 < 0.5);
560 static_assert(0.49999999999999994 + 0.5 == 1.0);
561 constexpr T bias = sizeof(T) == 8 ? 0.49999999999999994 : 0.49999997f;
562 return static_cast<R>(input + (std::is_signed<R>::value ? std::copysign(bias, input) : bias));
563 #endif
564 }
565
566 template <typename T>
floatToNormalized(float input)567 inline T floatToNormalized(float input)
568 {
569 if constexpr (sizeof(T) > 2)
570 {
571 // float has only a 23 bit mantissa, so we need to do the calculation in double precision
572 return roundToNearest<double, T>(std::numeric_limits<T>::max() *
573 static_cast<double>(input));
574 }
575 else
576 {
577 return roundToNearest<float, T>(std::numeric_limits<T>::max() * input);
578 }
579 }
580
581 template <unsigned int outputBitCount, typename T>
floatToNormalized(float input)582 inline T floatToNormalized(float input)
583 {
584 static_assert(outputBitCount < (sizeof(T) * 8), "T must have more bits than outputBitCount.");
585 static_assert(outputBitCount > (std::is_unsigned<T>::value ? 0 : 1),
586 "outputBitCount must be at least 1 not counting the sign bit.");
587 constexpr unsigned int bits = std::is_unsigned<T>::value ? outputBitCount : outputBitCount - 1;
588
589 if (bits > 23)
590 {
591 // float has only a 23 bit mantissa, so we need to do the calculation in double precision
592 return roundToNearest<double, T>(((1 << bits) - 1) * static_cast<double>(input));
593 }
594 else
595 {
596 return roundToNearest<float, T>(((1 << bits) - 1) * input);
597 }
598 }
599
600 template <unsigned int inputBitCount, unsigned int inputBitStart, typename T>
getShiftedData(T input)601 inline T getShiftedData(T input)
602 {
603 static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8),
604 "T must have at least as many bits as inputBitCount + inputBitStart.");
605 const T mask = (1 << inputBitCount) - 1;
606 return (input >> inputBitStart) & mask;
607 }
608
609 template <unsigned int inputBitCount, unsigned int inputBitStart, typename T>
shiftData(T input)610 inline T shiftData(T input)
611 {
612 static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8),
613 "T must have at least as many bits as inputBitCount + inputBitStart.");
614 const T mask = (1 << inputBitCount) - 1;
615 return (input & mask) << inputBitStart;
616 }
617
CountLeadingZeros(uint32_t x)618 inline unsigned int CountLeadingZeros(uint32_t x)
619 {
620 // Use binary search to find the amount of leading zeros.
621 unsigned int zeros = 32u;
622 uint32_t y;
623
624 y = x >> 16u;
625 if (y != 0)
626 {
627 zeros = zeros - 16u;
628 x = y;
629 }
630 y = x >> 8u;
631 if (y != 0)
632 {
633 zeros = zeros - 8u;
634 x = y;
635 }
636 y = x >> 4u;
637 if (y != 0)
638 {
639 zeros = zeros - 4u;
640 x = y;
641 }
642 y = x >> 2u;
643 if (y != 0)
644 {
645 zeros = zeros - 2u;
646 x = y;
647 }
648 y = x >> 1u;
649 if (y != 0)
650 {
651 return zeros - 2u;
652 }
653 return zeros - x;
654 }
655
average(unsigned char a,unsigned char b)656 inline unsigned char average(unsigned char a, unsigned char b)
657 {
658 return ((a ^ b) >> 1) + (a & b);
659 }
660
average(signed char a,signed char b)661 inline signed char average(signed char a, signed char b)
662 {
663 return ((short)a + (short)b) / 2;
664 }
665
average(unsigned short a,unsigned short b)666 inline unsigned short average(unsigned short a, unsigned short b)
667 {
668 return ((a ^ b) >> 1) + (a & b);
669 }
670
average(signed short a,signed short b)671 inline signed short average(signed short a, signed short b)
672 {
673 return ((int)a + (int)b) / 2;
674 }
675
average(unsigned int a,unsigned int b)676 inline unsigned int average(unsigned int a, unsigned int b)
677 {
678 return ((a ^ b) >> 1) + (a & b);
679 }
680
average(int a,int b)681 inline int average(int a, int b)
682 {
683 long long average = (static_cast<long long>(a) + static_cast<long long>(b)) / 2LL;
684 return static_cast<int>(average);
685 }
686
average(float a,float b)687 inline float average(float a, float b)
688 {
689 return (a + b) * 0.5f;
690 }
691
averageHalfFloat(unsigned short a,unsigned short b)692 inline unsigned short averageHalfFloat(unsigned short a, unsigned short b)
693 {
694 return float32ToFloat16((float16ToFloat32(a) + float16ToFloat32(b)) * 0.5f);
695 }
696
averageFloat11(unsigned int a,unsigned int b)697 inline unsigned int averageFloat11(unsigned int a, unsigned int b)
698 {
699 return float32ToFloat11((float11ToFloat32(static_cast<unsigned short>(a)) +
700 float11ToFloat32(static_cast<unsigned short>(b))) *
701 0.5f);
702 }
703
averageFloat10(unsigned int a,unsigned int b)704 inline unsigned int averageFloat10(unsigned int a, unsigned int b)
705 {
706 return float32ToFloat10((float10ToFloat32(static_cast<unsigned short>(a)) +
707 float10ToFloat32(static_cast<unsigned short>(b))) *
708 0.5f);
709 }
710
711 template <typename T>
712 class Range
713 {
714 public:
Range()715 Range() {}
Range(T lo,T hi)716 Range(T lo, T hi) : mLow(lo), mHigh(hi) {}
717
718 bool operator==(const Range<T> &other) const
719 {
720 return mLow == other.mLow && mHigh == other.mHigh;
721 }
722
length()723 T length() const { return (empty() ? 0 : (mHigh - mLow)); }
724
intersects(const Range<T> & other)725 bool intersects(const Range<T> &other) const
726 {
727 if (mLow <= other.mLow)
728 {
729 return other.mLow < mHigh;
730 }
731 else
732 {
733 return mLow < other.mHigh;
734 }
735 }
736
intersectsOrContinuous(const Range<T> & other)737 bool intersectsOrContinuous(const Range<T> &other) const
738 {
739 ASSERT(!empty());
740 ASSERT(!other.empty());
741 if (mLow <= other.mLow)
742 {
743 return mHigh >= other.mLow;
744 }
745 else
746 {
747 return mLow <= other.mHigh;
748 }
749 }
750
merge(const Range<T> & other)751 void merge(const Range<T> &other)
752 {
753 if (mLow > other.mLow)
754 {
755 mLow = other.mLow;
756 }
757
758 if (mHigh < other.mHigh)
759 {
760 mHigh = other.mHigh;
761 }
762 }
763
764 // Assumes that end is non-inclusive.. for example, extending to 5 will make "end" 6.
extend(T value)765 void extend(T value)
766 {
767 mLow = value < mLow ? value : mLow;
768 mHigh = value >= mHigh ? (value + 1) : mHigh;
769 }
770
empty()771 bool empty() const { return mHigh <= mLow; }
772
contains(T value)773 bool contains(T value) const { return value >= mLow && value < mHigh; }
774
775 class Iterator final
776 {
777 public:
Iterator(T value)778 Iterator(T value) : mCurrent(value) {}
779
780 Iterator &operator++()
781 {
782 mCurrent++;
783 return *this;
784 }
785 bool operator==(const Iterator &other) const { return mCurrent == other.mCurrent; }
786 bool operator!=(const Iterator &other) const { return mCurrent != other.mCurrent; }
787 T operator*() const { return mCurrent; }
788
789 private:
790 T mCurrent;
791 };
792
begin()793 Iterator begin() const { return Iterator(mLow); }
794
end()795 Iterator end() const { return Iterator(mHigh); }
796
low()797 T low() const { return mLow; }
high()798 T high() const { return mHigh; }
799
invalidate()800 void invalidate()
801 {
802 mLow = std::numeric_limits<T>::max();
803 mHigh = std::numeric_limits<T>::min();
804 }
805
806 private:
807 T mLow;
808 T mHigh;
809 };
810
811 typedef Range<int> RangeI;
812 typedef Range<unsigned int> RangeUI;
813 static_assert(std::is_trivially_copyable<RangeUI>(),
814 "RangeUI should be trivial copyable so that we can memcpy");
815
816 struct IndexRange
817 {
818 struct Undefined
819 {};
IndexRangeIndexRange820 IndexRange(Undefined) {}
IndexRangeIndexRange821 IndexRange() : IndexRange(0, 0, 0) {}
IndexRangeIndexRange822 IndexRange(size_t start_, size_t end_, size_t vertexIndexCount_)
823 : start(start_), end(end_), vertexIndexCount(vertexIndexCount_)
824 {
825 ASSERT(start <= end);
826 }
827
828 // Number of vertices in the range.
vertexCountIndexRange829 size_t vertexCount() const { return (end - start) + 1; }
830
831 // Inclusive range of indices that are not primitive restart
832 size_t start;
833 size_t end;
834
835 // Number of non-primitive restart indices
836 size_t vertexIndexCount;
837 };
838
839 // Combine a floating-point value representing a mantissa (x) and an integer exponent (exp) into a
840 // floating-point value. As in GLSL ldexp() built-in.
Ldexp(float x,int exp)841 inline float Ldexp(float x, int exp)
842 {
843 if (exp > 128)
844 {
845 return std::numeric_limits<float>::infinity();
846 }
847 if (exp < -126)
848 {
849 return 0.0f;
850 }
851 double result = static_cast<double>(x) * std::pow(2.0, static_cast<double>(exp));
852 return static_cast<float>(result);
853 }
854
855 // First, both normalized floating-point values are converted into 16-bit integer values.
856 // Then, the results are packed into the returned 32-bit unsigned integer.
857 // The first float value will be written to the least significant bits of the output;
858 // the last float value will be written to the most significant bits.
859 // The conversion of each value to fixed point is done as follows :
860 // packSnorm2x16 : round(clamp(c, -1, +1) * 32767.0)
packSnorm2x16(float f1,float f2)861 inline uint32_t packSnorm2x16(float f1, float f2)
862 {
863 int16_t leastSignificantBits = static_cast<int16_t>(roundf(clamp(f1, -1.0f, 1.0f) * 32767.0f));
864 int16_t mostSignificantBits = static_cast<int16_t>(roundf(clamp(f2, -1.0f, 1.0f) * 32767.0f));
865 return static_cast<uint32_t>(mostSignificantBits) << 16 |
866 (static_cast<uint32_t>(leastSignificantBits) & 0xFFFF);
867 }
868
869 // First, unpacks a single 32-bit unsigned integer u into a pair of 16-bit unsigned integers. Then,
870 // each component is converted to a normalized floating-point value to generate the returned two
871 // float values. The first float value will be extracted from the least significant bits of the
872 // input; the last float value will be extracted from the most-significant bits. The conversion for
873 // unpacked fixed-point value to floating point is done as follows: unpackSnorm2x16 : clamp(f /
874 // 32767.0, -1, +1)
unpackSnorm2x16(uint32_t u,float * f1,float * f2)875 inline void unpackSnorm2x16(uint32_t u, float *f1, float *f2)
876 {
877 int16_t leastSignificantBits = static_cast<int16_t>(u & 0xFFFF);
878 int16_t mostSignificantBits = static_cast<int16_t>(u >> 16);
879 *f1 = clamp(static_cast<float>(leastSignificantBits) / 32767.0f, -1.0f, 1.0f);
880 *f2 = clamp(static_cast<float>(mostSignificantBits) / 32767.0f, -1.0f, 1.0f);
881 }
882
883 // First, both normalized floating-point values are converted into 16-bit integer values.
884 // Then, the results are packed into the returned 32-bit unsigned integer.
885 // The first float value will be written to the least significant bits of the output;
886 // the last float value will be written to the most significant bits.
887 // The conversion of each value to fixed point is done as follows:
888 // packUnorm2x16 : round(clamp(c, 0, +1) * 65535.0)
packUnorm2x16(float f1,float f2)889 inline uint32_t packUnorm2x16(float f1, float f2)
890 {
891 uint16_t leastSignificantBits = static_cast<uint16_t>(roundf(clamp(f1, 0.0f, 1.0f) * 65535.0f));
892 uint16_t mostSignificantBits = static_cast<uint16_t>(roundf(clamp(f2, 0.0f, 1.0f) * 65535.0f));
893 return static_cast<uint32_t>(mostSignificantBits) << 16 |
894 static_cast<uint32_t>(leastSignificantBits);
895 }
896
897 // First, unpacks a single 32-bit unsigned integer u into a pair of 16-bit unsigned integers. Then,
898 // each component is converted to a normalized floating-point value to generate the returned two
899 // float values. The first float value will be extracted from the least significant bits of the
900 // input; the last float value will be extracted from the most-significant bits. The conversion for
901 // unpacked fixed-point value to floating point is done as follows: unpackUnorm2x16 : f / 65535.0
unpackUnorm2x16(uint32_t u,float * f1,float * f2)902 inline void unpackUnorm2x16(uint32_t u, float *f1, float *f2)
903 {
904 uint16_t leastSignificantBits = static_cast<uint16_t>(u & 0xFFFF);
905 uint16_t mostSignificantBits = static_cast<uint16_t>(u >> 16);
906 *f1 = static_cast<float>(leastSignificantBits) / 65535.0f;
907 *f2 = static_cast<float>(mostSignificantBits) / 65535.0f;
908 }
909
910 // Helper functions intended to be used only here.
911 namespace priv
912 {
913
ToPackedUnorm8(float f)914 inline uint8_t ToPackedUnorm8(float f)
915 {
916 return static_cast<uint8_t>(roundf(clamp(f, 0.0f, 1.0f) * 255.0f));
917 }
918
ToPackedSnorm8(float f)919 inline int8_t ToPackedSnorm8(float f)
920 {
921 return static_cast<int8_t>(roundf(clamp(f, -1.0f, 1.0f) * 127.0f));
922 }
923
924 } // namespace priv
925
926 // Packs 4 normalized unsigned floating-point values to a single 32-bit unsigned integer. Works
927 // similarly to packUnorm2x16. The floats are clamped to the range 0.0 to 1.0, and written to the
928 // unsigned integer starting from the least significant bits.
PackUnorm4x8(float f1,float f2,float f3,float f4)929 inline uint32_t PackUnorm4x8(float f1, float f2, float f3, float f4)
930 {
931 uint8_t bits[4];
932 bits[0] = priv::ToPackedUnorm8(f1);
933 bits[1] = priv::ToPackedUnorm8(f2);
934 bits[2] = priv::ToPackedUnorm8(f3);
935 bits[3] = priv::ToPackedUnorm8(f4);
936 uint32_t result = 0u;
937 for (int i = 0; i < 4; ++i)
938 {
939 int shift = i * 8;
940 result |= (static_cast<uint32_t>(bits[i]) << shift);
941 }
942 return result;
943 }
944
945 // Unpacks 4 normalized unsigned floating-point values from a single 32-bit unsigned integer into f.
946 // Works similarly to unpackUnorm2x16. The floats are unpacked starting from the least significant
947 // bits.
UnpackUnorm4x8(uint32_t u,float * f)948 inline void UnpackUnorm4x8(uint32_t u, float *f)
949 {
950 for (int i = 0; i < 4; ++i)
951 {
952 int shift = i * 8;
953 uint8_t bits = static_cast<uint8_t>((u >> shift) & 0xFF);
954 f[i] = static_cast<float>(bits) / 255.0f;
955 }
956 }
957
958 // Packs 4 normalized signed floating-point values to a single 32-bit unsigned integer. The floats
959 // are clamped to the range -1.0 to 1.0, and written to the unsigned integer starting from the least
960 // significant bits.
PackSnorm4x8(float f1,float f2,float f3,float f4)961 inline uint32_t PackSnorm4x8(float f1, float f2, float f3, float f4)
962 {
963 int8_t bits[4];
964 bits[0] = priv::ToPackedSnorm8(f1);
965 bits[1] = priv::ToPackedSnorm8(f2);
966 bits[2] = priv::ToPackedSnorm8(f3);
967 bits[3] = priv::ToPackedSnorm8(f4);
968 uint32_t result = 0u;
969 for (int i = 0; i < 4; ++i)
970 {
971 int shift = i * 8;
972 result |= ((static_cast<uint32_t>(bits[i]) & 0xFF) << shift);
973 }
974 return result;
975 }
976
977 // Unpacks 4 normalized signed floating-point values from a single 32-bit unsigned integer into f.
978 // Works similarly to unpackSnorm2x16. The floats are unpacked starting from the least significant
979 // bits, and clamped to the range -1.0 to 1.0.
UnpackSnorm4x8(uint32_t u,float * f)980 inline void UnpackSnorm4x8(uint32_t u, float *f)
981 {
982 for (int i = 0; i < 4; ++i)
983 {
984 int shift = i * 8;
985 int8_t bits = static_cast<int8_t>((u >> shift) & 0xFF);
986 f[i] = clamp(static_cast<float>(bits) / 127.0f, -1.0f, 1.0f);
987 }
988 }
989
990 // Returns an unsigned integer obtained by converting the two floating-point values to the 16-bit
991 // floating-point representation found in the OpenGL ES Specification, and then packing these
992 // two 16-bit integers into a 32-bit unsigned integer.
993 // f1: The 16 least-significant bits of the result;
994 // f2: The 16 most-significant bits.
packHalf2x16(float f1,float f2)995 inline uint32_t packHalf2x16(float f1, float f2)
996 {
997 uint16_t leastSignificantBits = static_cast<uint16_t>(float32ToFloat16(f1));
998 uint16_t mostSignificantBits = static_cast<uint16_t>(float32ToFloat16(f2));
999 return static_cast<uint32_t>(mostSignificantBits) << 16 |
1000 static_cast<uint32_t>(leastSignificantBits);
1001 }
1002
1003 // Returns two floating-point values obtained by unpacking a 32-bit unsigned integer into a pair of
1004 // 16-bit values, interpreting those values as 16-bit floating-point numbers according to the OpenGL
1005 // ES Specification, and converting them to 32-bit floating-point values. The first float value is
1006 // obtained from the 16 least-significant bits of u; the second component is obtained from the 16
1007 // most-significant bits of u.
unpackHalf2x16(uint32_t u,float * f1,float * f2)1008 inline void unpackHalf2x16(uint32_t u, float *f1, float *f2)
1009 {
1010 uint16_t leastSignificantBits = static_cast<uint16_t>(u & 0xFFFF);
1011 uint16_t mostSignificantBits = static_cast<uint16_t>(u >> 16);
1012
1013 *f1 = float16ToFloat32(leastSignificantBits);
1014 *f2 = float16ToFloat32(mostSignificantBits);
1015 }
1016
sRGBToLinear(uint8_t srgbValue)1017 inline float sRGBToLinear(uint8_t srgbValue)
1018 {
1019 float value = srgbValue / 255.0f;
1020 if (value <= 0.04045f)
1021 {
1022 value = value / 12.92f;
1023 }
1024 else
1025 {
1026 value = std::pow((value + 0.055f) / 1.055f, 2.4f);
1027 }
1028 ASSERT(value >= 0.0f && value <= 1.0f);
1029 return value;
1030 }
1031
linearToSRGB(float value)1032 inline uint8_t linearToSRGB(float value)
1033 {
1034 ASSERT(value >= 0.0f && value <= 1.0f);
1035 if (value < 0.0031308f)
1036 {
1037 value = value * 12.92f;
1038 }
1039 else
1040 {
1041 value = std::pow(value, 0.41666f) * 1.055f - 0.055f;
1042 }
1043 return static_cast<uint8_t>(value * 255.0f + 0.5f);
1044 }
1045
1046 // Reverse the order of the bits.
BitfieldReverse(uint32_t value)1047 inline uint32_t BitfieldReverse(uint32_t value)
1048 {
1049 // TODO([email protected]): Optimize this if needed. There don't seem to be compiler intrinsics
1050 // for this, and right now it's not used in performance-critical paths.
1051 uint32_t result = 0u;
1052 for (size_t j = 0u; j < 32u; ++j)
1053 {
1054 result |= (((value >> j) & 1u) << (31u - j));
1055 }
1056 return result;
1057 }
1058
1059 // Count the 1 bits.
1060 #if defined(_MSC_VER) && !defined(__clang__)
1061 # if defined(_M_IX86) || defined(_M_X64)
1062 namespace priv
1063 {
1064 // Check POPCNT instruction support and cache the result.
1065 // https://docs.microsoft.com/en-us/cpp/intrinsics/popcnt16-popcnt-popcnt64#remarks
1066 static const bool kHasPopcnt = [] {
1067 int info[4];
1068 __cpuid(&info[0], 1);
1069 return static_cast<bool>(info[2] & 0x800000);
1070 }();
1071 } // namespace priv
1072
1073 // Polyfills for x86/x64 CPUs without POPCNT.
1074 // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
BitCountPolyfill(uint32_t bits)1075 inline int BitCountPolyfill(uint32_t bits)
1076 {
1077 bits = bits - ((bits >> 1) & 0x55555555);
1078 bits = (bits & 0x33333333) + ((bits >> 2) & 0x33333333);
1079 bits = ((bits + (bits >> 4) & 0x0F0F0F0F) * 0x01010101) >> 24;
1080 return static_cast<int>(bits);
1081 }
1082
BitCountPolyfill(uint64_t bits)1083 inline int BitCountPolyfill(uint64_t bits)
1084 {
1085 bits = bits - ((bits >> 1) & 0x5555555555555555ull);
1086 bits = (bits & 0x3333333333333333ull) + ((bits >> 2) & 0x3333333333333333ull);
1087 bits = ((bits + (bits >> 4) & 0x0F0F0F0F0F0F0F0Full) * 0x0101010101010101ull) >> 56;
1088 return static_cast<int>(bits);
1089 }
1090
BitCount(uint32_t bits)1091 inline int BitCount(uint32_t bits)
1092 {
1093 if (priv::kHasPopcnt)
1094 {
1095 return static_cast<int>(__popcnt(bits));
1096 }
1097 return BitCountPolyfill(bits);
1098 }
1099
BitCount(uint64_t bits)1100 inline int BitCount(uint64_t bits)
1101 {
1102 if (priv::kHasPopcnt)
1103 {
1104 # if defined(_M_X64)
1105 return static_cast<int>(__popcnt64(bits));
1106 # else // x86
1107 return static_cast<int>(__popcnt(static_cast<uint32_t>(bits >> 32)) +
1108 __popcnt(static_cast<uint32_t>(bits)));
1109 # endif // defined(_M_X64)
1110 }
1111 return BitCountPolyfill(bits);
1112 }
1113
1114 # elif defined(_M_ARM) || defined(_M_ARM64)
1115
1116 // MSVC's _CountOneBits* intrinsics are not defined for ARM64, moreover they do not use dedicated
1117 // NEON instructions.
1118
BitCount(uint32_t bits)1119 inline int BitCount(uint32_t bits)
1120 {
1121 // cast bits to 8x8 datatype and use VCNT on it
1122 const uint8x8_t vsum = vcnt_u8(vcreate_u8(static_cast<uint64_t>(bits)));
1123
1124 // pairwise sums: 8x8 -> 16x4 -> 32x2
1125 return static_cast<int>(vget_lane_u32(vpaddl_u16(vpaddl_u8(vsum)), 0));
1126 }
1127
BitCount(uint64_t bits)1128 inline int BitCount(uint64_t bits)
1129 {
1130 // cast bits to 8x8 datatype and use VCNT on it
1131 const uint8x8_t vsum = vcnt_u8(vcreate_u8(bits));
1132
1133 // pairwise sums: 8x8 -> 16x4 -> 32x2 -> 64x1
1134 return static_cast<int>(vget_lane_u64(vpaddl_u32(vpaddl_u16(vpaddl_u8(vsum))), 0));
1135 }
1136 # endif // defined(_M_IX86) || defined(_M_X64)
1137 #endif // defined(_MSC_VER) && !defined(__clang__)
1138
1139 #if defined(ANGLE_PLATFORM_POSIX) || defined(__clang__) || defined(__GNUC__)
BitCount(uint32_t bits)1140 inline int BitCount(uint32_t bits)
1141 {
1142 return __builtin_popcount(bits);
1143 }
1144
BitCount(uint64_t bits)1145 inline int BitCount(uint64_t bits)
1146 {
1147 return __builtin_popcountll(bits);
1148 }
1149 #endif // defined(ANGLE_PLATFORM_POSIX) || defined(__clang__) || defined(__GNUC__)
1150
BitCount(uint8_t bits)1151 inline int BitCount(uint8_t bits)
1152 {
1153 return BitCount(static_cast<uint32_t>(bits));
1154 }
1155
BitCount(uint16_t bits)1156 inline int BitCount(uint16_t bits)
1157 {
1158 return BitCount(static_cast<uint32_t>(bits));
1159 }
1160
1161 #if defined(ANGLE_PLATFORM_WINDOWS)
1162 // Return the index of the least significant bit set. Indexing is such that bit 0 is the least
1163 // significant bit. Implemented for different bit widths on different platforms.
ScanForward(uint32_t bits)1164 inline unsigned long ScanForward(uint32_t bits)
1165 {
1166 ASSERT(bits != 0u);
1167 unsigned long firstBitIndex = 0ul;
1168 unsigned char ret = _BitScanForward(&firstBitIndex, bits);
1169 ASSERT(ret != 0u);
1170 return firstBitIndex;
1171 }
1172
ScanForward(uint64_t bits)1173 inline unsigned long ScanForward(uint64_t bits)
1174 {
1175 ASSERT(bits != 0u);
1176 unsigned long firstBitIndex = 0ul;
1177 # if defined(ANGLE_IS_64_BIT_CPU)
1178 unsigned char ret = _BitScanForward64(&firstBitIndex, bits);
1179 # else
1180 unsigned char ret;
1181 if (static_cast<uint32_t>(bits) == 0)
1182 {
1183 ret = _BitScanForward(&firstBitIndex, static_cast<uint32_t>(bits >> 32));
1184 firstBitIndex += 32ul;
1185 }
1186 else
1187 {
1188 ret = _BitScanForward(&firstBitIndex, static_cast<uint32_t>(bits));
1189 }
1190 # endif // defined(ANGLE_IS_64_BIT_CPU)
1191 ASSERT(ret != 0u);
1192 return firstBitIndex;
1193 }
1194
1195 // Return the index of the most significant bit set. Indexing is such that bit 0 is the least
1196 // significant bit.
ScanReverse(uint32_t bits)1197 inline unsigned long ScanReverse(uint32_t bits)
1198 {
1199 ASSERT(bits != 0u);
1200 unsigned long lastBitIndex = 0ul;
1201 unsigned char ret = _BitScanReverse(&lastBitIndex, bits);
1202 ASSERT(ret != 0u);
1203 return lastBitIndex;
1204 }
1205
ScanReverse(uint64_t bits)1206 inline unsigned long ScanReverse(uint64_t bits)
1207 {
1208 ASSERT(bits != 0u);
1209 unsigned long lastBitIndex = 0ul;
1210 # if defined(ANGLE_IS_64_BIT_CPU)
1211 unsigned char ret = _BitScanReverse64(&lastBitIndex, bits);
1212 # else
1213 unsigned char ret;
1214 if (static_cast<uint32_t>(bits >> 32) == 0)
1215 {
1216 ret = _BitScanReverse(&lastBitIndex, static_cast<uint32_t>(bits));
1217 }
1218 else
1219 {
1220 ret = _BitScanReverse(&lastBitIndex, static_cast<uint32_t>(bits >> 32));
1221 lastBitIndex += 32ul;
1222 }
1223 # endif // defined(ANGLE_IS_64_BIT_CPU)
1224 ASSERT(ret != 0u);
1225 return lastBitIndex;
1226 }
1227 #endif // defined(ANGLE_PLATFORM_WINDOWS)
1228
1229 #if defined(ANGLE_PLATFORM_POSIX)
ScanForward(uint32_t bits)1230 inline unsigned long ScanForward(uint32_t bits)
1231 {
1232 ASSERT(bits != 0u);
1233 return static_cast<unsigned long>(__builtin_ctz(bits));
1234 }
1235
ScanForward(uint64_t bits)1236 inline unsigned long ScanForward(uint64_t bits)
1237 {
1238 ASSERT(bits != 0u);
1239 # if defined(ANGLE_IS_64_BIT_CPU)
1240 return static_cast<unsigned long>(__builtin_ctzll(bits));
1241 # else
1242 return static_cast<unsigned long>(static_cast<uint32_t>(bits) == 0
1243 ? __builtin_ctz(static_cast<uint32_t>(bits >> 32)) + 32
1244 : __builtin_ctz(static_cast<uint32_t>(bits)));
1245 # endif // defined(ANGLE_IS_64_BIT_CPU)
1246 }
1247
ScanReverse(uint32_t bits)1248 inline unsigned long ScanReverse(uint32_t bits)
1249 {
1250 ASSERT(bits != 0u);
1251 return static_cast<unsigned long>(sizeof(uint32_t) * CHAR_BIT - 1 - __builtin_clz(bits));
1252 }
1253
ScanReverse(uint64_t bits)1254 inline unsigned long ScanReverse(uint64_t bits)
1255 {
1256 ASSERT(bits != 0u);
1257 # if defined(ANGLE_IS_64_BIT_CPU)
1258 return static_cast<unsigned long>(sizeof(uint64_t) * CHAR_BIT - 1 - __builtin_clzll(bits));
1259 # else
1260 if (static_cast<uint32_t>(bits >> 32) == 0)
1261 {
1262 return sizeof(uint32_t) * CHAR_BIT - 1 - __builtin_clz(static_cast<uint32_t>(bits));
1263 }
1264 else
1265 {
1266 return sizeof(uint32_t) * CHAR_BIT - 1 - __builtin_clz(static_cast<uint32_t>(bits >> 32)) +
1267 32;
1268 }
1269 # endif // defined(ANGLE_IS_64_BIT_CPU)
1270 }
1271 #endif // defined(ANGLE_PLATFORM_POSIX)
1272
ScanForward(uint8_t bits)1273 inline unsigned long ScanForward(uint8_t bits)
1274 {
1275 return ScanForward(static_cast<uint32_t>(bits));
1276 }
1277
ScanForward(uint16_t bits)1278 inline unsigned long ScanForward(uint16_t bits)
1279 {
1280 return ScanForward(static_cast<uint32_t>(bits));
1281 }
1282
ScanReverse(uint8_t bits)1283 inline unsigned long ScanReverse(uint8_t bits)
1284 {
1285 return ScanReverse(static_cast<uint32_t>(bits));
1286 }
1287
ScanReverse(uint16_t bits)1288 inline unsigned long ScanReverse(uint16_t bits)
1289 {
1290 return ScanReverse(static_cast<uint32_t>(bits));
1291 }
1292
1293 // Returns -1 on 0, otherwise the index of the least significant 1 bit as in GLSL.
1294 template <typename T>
FindLSB(T bits)1295 int FindLSB(T bits)
1296 {
1297 static_assert(std::is_integral<T>::value, "must be integral type.");
1298 if (bits == 0u)
1299 {
1300 return -1;
1301 }
1302 else
1303 {
1304 return static_cast<int>(ScanForward(bits));
1305 }
1306 }
1307
1308 // Returns -1 on 0, otherwise the index of the most significant 1 bit as in GLSL.
1309 template <typename T>
FindMSB(T bits)1310 int FindMSB(T bits)
1311 {
1312 static_assert(std::is_integral<T>::value, "must be integral type.");
1313 if (bits == 0u)
1314 {
1315 return -1;
1316 }
1317 else
1318 {
1319 return static_cast<int>(ScanReverse(bits));
1320 }
1321 }
1322
1323 // Returns whether the argument is Not a Number.
1324 // IEEE 754 single precision NaN representation: Exponent(8 bits) - 255, Mantissa(23 bits) -
1325 // non-zero.
isNaN(float f)1326 inline bool isNaN(float f)
1327 {
1328 // Exponent mask: ((1u << 8) - 1u) << 23 = 0x7f800000u
1329 // Mantissa mask: ((1u << 23) - 1u) = 0x7fffffu
1330 return ((bitCast<uint32_t>(f) & 0x7f800000u) == 0x7f800000u) &&
1331 (bitCast<uint32_t>(f) & 0x7fffffu);
1332 }
1333
1334 // Returns whether the argument is infinity.
1335 // IEEE 754 single precision infinity representation: Exponent(8 bits) - 255, Mantissa(23 bits) -
1336 // zero.
isInf(float f)1337 inline bool isInf(float f)
1338 {
1339 // Exponent mask: ((1u << 8) - 1u) << 23 = 0x7f800000u
1340 // Mantissa mask: ((1u << 23) - 1u) = 0x7fffffu
1341 return ((bitCast<uint32_t>(f) & 0x7f800000u) == 0x7f800000u) &&
1342 !(bitCast<uint32_t>(f) & 0x7fffffu);
1343 }
1344
1345 namespace priv
1346 {
1347 template <unsigned int N, unsigned int R>
1348 struct iSquareRoot
1349 {
solveiSquareRoot1350 static constexpr unsigned int solve()
1351 {
1352 return (R * R > N)
1353 ? 0
1354 : ((R * R == N) ? R : static_cast<unsigned int>(iSquareRoot<N, R + 1>::value));
1355 }
1356 enum Result
1357 {
1358 value = iSquareRoot::solve()
1359 };
1360 };
1361
1362 template <unsigned int N>
1363 struct iSquareRoot<N, N>
1364 {
1365 enum result
1366 {
1367 value = N
1368 };
1369 };
1370
1371 } // namespace priv
1372
1373 template <unsigned int N>
1374 constexpr unsigned int iSquareRoot()
1375 {
1376 return priv::iSquareRoot<N, 1>::value;
1377 }
1378
1379 // Sum, difference and multiplication operations for signed ints that wrap on 32-bit overflow.
1380 //
1381 // Unsigned types are defined to do arithmetic modulo 2^n in C++. For signed types, overflow
1382 // behavior is undefined.
1383
1384 template <typename T>
1385 inline T WrappingSum(T lhs, T rhs)
1386 {
1387 uint32_t lhsUnsigned = static_cast<uint32_t>(lhs);
1388 uint32_t rhsUnsigned = static_cast<uint32_t>(rhs);
1389 return static_cast<T>(lhsUnsigned + rhsUnsigned);
1390 }
1391
1392 template <typename T>
1393 inline T WrappingDiff(T lhs, T rhs)
1394 {
1395 uint32_t lhsUnsigned = static_cast<uint32_t>(lhs);
1396 uint32_t rhsUnsigned = static_cast<uint32_t>(rhs);
1397 return static_cast<T>(lhsUnsigned - rhsUnsigned);
1398 }
1399
1400 inline int32_t WrappingMul(int32_t lhs, int32_t rhs)
1401 {
1402 int64_t lhsWide = static_cast<int64_t>(lhs);
1403 int64_t rhsWide = static_cast<int64_t>(rhs);
1404 // The multiplication is guaranteed not to overflow.
1405 int64_t resultWide = lhsWide * rhsWide;
1406 // Implement the desired wrapping behavior by masking out the high-order 32 bits.
1407 resultWide = resultWide & 0xffffffffLL;
1408 // Casting to a narrower signed type is fine since the casted value is representable in the
1409 // narrower type.
1410 return static_cast<int32_t>(resultWide);
1411 }
1412
1413 inline float scaleScreenDimensionToNdc(float dimensionScreen, float viewportDimension)
1414 {
1415 return 2.0f * dimensionScreen / viewportDimension;
1416 }
1417
1418 inline float scaleScreenCoordinateToNdc(float coordinateScreen, float viewportDimension)
1419 {
1420 float halfShifted = coordinateScreen / viewportDimension;
1421 return 2.0f * (halfShifted - 0.5f);
1422 }
1423
1424 } // namespace gl
1425
1426 namespace rx
1427 {
1428
1429 template <typename T>
1430 T roundUp(const T value, const T alignment)
1431 {
1432 auto temp = value + alignment - static_cast<T>(1);
1433 return temp - temp % alignment;
1434 }
1435
1436 template <typename T>
1437 constexpr T roundUpPow2(const T value, const T alignment)
1438 {
1439 ASSERT(gl::isPow2(alignment));
1440 return (value + alignment - 1) & ~(alignment - 1);
1441 }
1442
1443 template <typename T>
1444 constexpr T roundDownPow2(const T value, const T alignment)
1445 {
1446 ASSERT(gl::isPow2(alignment));
1447 return value & ~(alignment - 1);
1448 }
1449
1450 template <typename T>
1451 angle::CheckedNumeric<T> CheckedRoundUp(const T value, const T alignment)
1452 {
1453 angle::CheckedNumeric<T> checkedValue(value);
1454 angle::CheckedNumeric<T> checkedAlignment(alignment);
1455 return roundUp(checkedValue, checkedAlignment);
1456 }
1457
1458 inline constexpr unsigned int UnsignedCeilDivide(unsigned int value, unsigned int divisor)
1459 {
1460 unsigned int divided = value / divisor;
1461 return (divided + ((value % divisor == 0) ? 0 : 1));
1462 }
1463
1464 #if defined(__has_builtin)
1465 # define ANGLE_HAS_BUILTIN(x) __has_builtin(x)
1466 #else
1467 # define ANGLE_HAS_BUILTIN(x) 0
1468 #endif
1469
1470 #if defined(_MSC_VER)
1471
1472 # define ANGLE_ROTL(x, y) _rotl(x, y)
1473 # define ANGLE_ROTL64(x, y) _rotl64(x, y)
1474 # define ANGLE_ROTR16(x, y) _rotr16(x, y)
1475
1476 #elif defined(__clang__) && ANGLE_HAS_BUILTIN(__builtin_rotateleft32) && \
1477 ANGLE_HAS_BUILTIN(__builtin_rotateleft64) && ANGLE_HAS_BUILTIN(__builtin_rotateright16)
1478
1479 # define ANGLE_ROTL(x, y) __builtin_rotateleft32(x, y)
1480 # define ANGLE_ROTL64(x, y) __builtin_rotateleft64(x, y)
1481 # define ANGLE_ROTR16(x, y) __builtin_rotateright16(x, y)
1482
1483 #else
1484
1485 inline uint32_t RotL(uint32_t x, int8_t r)
1486 {
1487 return (x << r) | (x >> (32 - r));
1488 }
1489
1490 inline uint64_t RotL64(uint64_t x, int8_t r)
1491 {
1492 return (x << r) | (x >> (64 - r));
1493 }
1494
1495 inline uint16_t RotR16(uint16_t x, int8_t r)
1496 {
1497 return (x >> r) | (x << (16 - r));
1498 }
1499
1500 # define ANGLE_ROTL(x, y) ::rx::RotL(x, y)
1501 # define ANGLE_ROTL64(x, y) ::rx::RotL64(x, y)
1502 # define ANGLE_ROTR16(x, y) ::rx::RotR16(x, y)
1503
1504 #endif // namespace rx
1505
1506 constexpr unsigned int Log2(unsigned int bytes)
1507 {
1508 return bytes == 1 ? 0 : (1 + Log2(bytes / 2));
1509 }
1510 } // namespace rx
1511
1512 #endif // COMMON_MATHUTIL_H_
1513