xref: /aosp_15_r20/external/XNNPACK/src/xnnpack/math.h (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1*4bdc9457SAndroid Build Coastguard Worker // Copyright (c) Facebook, Inc. and its affiliates.
2*4bdc9457SAndroid Build Coastguard Worker // All rights reserved.
3*4bdc9457SAndroid Build Coastguard Worker //
4*4bdc9457SAndroid Build Coastguard Worker // Copyright 2019 Google LLC
5*4bdc9457SAndroid Build Coastguard Worker //
6*4bdc9457SAndroid Build Coastguard Worker // This source code is licensed under the BSD-style license found in the
7*4bdc9457SAndroid Build Coastguard Worker // LICENSE file in the root directory of this source tree.
8*4bdc9457SAndroid Build Coastguard Worker 
9*4bdc9457SAndroid Build Coastguard Worker #pragma once
10*4bdc9457SAndroid Build Coastguard Worker 
11*4bdc9457SAndroid Build Coastguard Worker #include <stdbool.h>
12*4bdc9457SAndroid Build Coastguard Worker #include <stddef.h>
13*4bdc9457SAndroid Build Coastguard Worker #include <stdint.h>
14*4bdc9457SAndroid Build Coastguard Worker #include <assert.h>
15*4bdc9457SAndroid Build Coastguard Worker 
16*4bdc9457SAndroid Build Coastguard Worker #ifdef _MSC_VER
17*4bdc9457SAndroid Build Coastguard Worker   #include <intrin.h>
18*4bdc9457SAndroid Build Coastguard Worker   #include <stdlib.h> // For _rotl.
19*4bdc9457SAndroid Build Coastguard Worker #endif
20*4bdc9457SAndroid Build Coastguard Worker 
21*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/common.h>
22*4bdc9457SAndroid Build Coastguard Worker 
23*4bdc9457SAndroid Build Coastguard Worker 
24*4bdc9457SAndroid Build Coastguard Worker // stdlib.h from Windows 10 SDK defines min & max macros.
25*4bdc9457SAndroid Build Coastguard Worker // Undefine them before defining the corresponding functions.
26*4bdc9457SAndroid Build Coastguard Worker #ifdef min
27*4bdc9457SAndroid Build Coastguard Worker   #undef min
28*4bdc9457SAndroid Build Coastguard Worker #endif
29*4bdc9457SAndroid Build Coastguard Worker #ifdef max
30*4bdc9457SAndroid Build Coastguard Worker   #undef max
31*4bdc9457SAndroid Build Coastguard Worker #endif
32*4bdc9457SAndroid Build Coastguard Worker 
33*4bdc9457SAndroid Build Coastguard Worker 
min(size_t a,size_t b)34*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static size_t min(size_t a, size_t b) {
35*4bdc9457SAndroid Build Coastguard Worker   return XNN_UNPREDICTABLE(b < a) ? b : a;
36*4bdc9457SAndroid Build Coastguard Worker }
37*4bdc9457SAndroid Build Coastguard Worker 
max(size_t a,size_t b)38*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static size_t max(size_t a, size_t b) {
39*4bdc9457SAndroid Build Coastguard Worker   return XNN_UNPREDICTABLE(b < a) ? a : b;
40*4bdc9457SAndroid Build Coastguard Worker }
41*4bdc9457SAndroid Build Coastguard Worker 
doz(size_t a,size_t b)42*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static size_t doz(size_t a, size_t b) {
43*4bdc9457SAndroid Build Coastguard Worker   return XNN_UNPREDICTABLE(b < a) ? a - b : 0;
44*4bdc9457SAndroid Build Coastguard Worker }
45*4bdc9457SAndroid Build Coastguard Worker 
divide_round_up(size_t n,size_t q)46*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static size_t divide_round_up(size_t n, size_t q) {
47*4bdc9457SAndroid Build Coastguard Worker   return XNN_UNPREDICTABLE(n % q == 0) ? n / q : n / q + 1;
48*4bdc9457SAndroid Build Coastguard Worker }
49*4bdc9457SAndroid Build Coastguard Worker 
round_up(size_t n,size_t q)50*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static size_t round_up(size_t n, size_t q) {
51*4bdc9457SAndroid Build Coastguard Worker   return divide_round_up(n, q) * q;
52*4bdc9457SAndroid Build Coastguard Worker }
53*4bdc9457SAndroid Build Coastguard Worker 
is_po2(size_t n)54*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static bool is_po2(size_t n) {
55*4bdc9457SAndroid Build Coastguard Worker   return (n != 0) && ((n & (n - 1)) == 0);
56*4bdc9457SAndroid Build Coastguard Worker }
round_down_po2(size_t n,size_t q)57*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static size_t round_down_po2(size_t n, size_t q) {
58*4bdc9457SAndroid Build Coastguard Worker   assert(is_po2(q));
59*4bdc9457SAndroid Build Coastguard Worker   return n & -q;
60*4bdc9457SAndroid Build Coastguard Worker }
61*4bdc9457SAndroid Build Coastguard Worker 
round_up_po2(size_t n,size_t q)62*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static size_t round_up_po2(size_t n, size_t q) {
63*4bdc9457SAndroid Build Coastguard Worker   return round_down_po2(n + q - 1, q);
64*4bdc9457SAndroid Build Coastguard Worker }
65*4bdc9457SAndroid Build Coastguard Worker 
subtract_modulo(size_t a,size_t b,size_t m)66*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static size_t subtract_modulo(size_t a, size_t b, size_t m) {
67*4bdc9457SAndroid Build Coastguard Worker   assert(a < m);
68*4bdc9457SAndroid Build Coastguard Worker   assert(b < m);
69*4bdc9457SAndroid Build Coastguard Worker   return XNN_UNPREDICTABLE(a >= b) ? a - b : a - b + m;
70*4bdc9457SAndroid Build Coastguard Worker }
71*4bdc9457SAndroid Build Coastguard Worker 
uint32_as_float(uint32_t i)72*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static float uint32_as_float(uint32_t i) {
73*4bdc9457SAndroid Build Coastguard Worker   union {
74*4bdc9457SAndroid Build Coastguard Worker     uint32_t as_uint32;
75*4bdc9457SAndroid Build Coastguard Worker     float as_float;
76*4bdc9457SAndroid Build Coastguard Worker   } bits = { i };
77*4bdc9457SAndroid Build Coastguard Worker   return bits.as_float;
78*4bdc9457SAndroid Build Coastguard Worker }
79*4bdc9457SAndroid Build Coastguard Worker 
float_as_uint32(float f)80*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static uint32_t float_as_uint32(float f) {
81*4bdc9457SAndroid Build Coastguard Worker   union {
82*4bdc9457SAndroid Build Coastguard Worker     float as_float;
83*4bdc9457SAndroid Build Coastguard Worker     uint32_t as_uint32;
84*4bdc9457SAndroid Build Coastguard Worker   } bits = { f };
85*4bdc9457SAndroid Build Coastguard Worker   return bits.as_uint32;
86*4bdc9457SAndroid Build Coastguard Worker }
87*4bdc9457SAndroid Build Coastguard Worker 
uint64_as_double(uint64_t i)88*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static double uint64_as_double(uint64_t i) {
89*4bdc9457SAndroid Build Coastguard Worker   union {
90*4bdc9457SAndroid Build Coastguard Worker     uint64_t as_uint64;
91*4bdc9457SAndroid Build Coastguard Worker     double as_double;
92*4bdc9457SAndroid Build Coastguard Worker   } bits = { i };
93*4bdc9457SAndroid Build Coastguard Worker   return bits.as_double;
94*4bdc9457SAndroid Build Coastguard Worker }
95*4bdc9457SAndroid Build Coastguard Worker 
double_as_uint64(double f)96*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static uint64_t double_as_uint64(double f) {
97*4bdc9457SAndroid Build Coastguard Worker   union {
98*4bdc9457SAndroid Build Coastguard Worker     double as_double;
99*4bdc9457SAndroid Build Coastguard Worker     uint64_t as_uint64;
100*4bdc9457SAndroid Build Coastguard Worker   } bits = { f };
101*4bdc9457SAndroid Build Coastguard Worker   return bits.as_uint64;
102*4bdc9457SAndroid Build Coastguard Worker }
103*4bdc9457SAndroid Build Coastguard Worker 
math_abs_s32(int32_t n)104*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static uint32_t math_abs_s32(int32_t n) {
105*4bdc9457SAndroid Build Coastguard Worker   #if defined(_MSC_VER)
106*4bdc9457SAndroid Build Coastguard Worker     return (uint32_t) abs((int) n);
107*4bdc9457SAndroid Build Coastguard Worker   #else
108*4bdc9457SAndroid Build Coastguard Worker     return XNN_UNPREDICTABLE(n >= 0) ? (uint32_t) n : -(uint32_t) n;
109*4bdc9457SAndroid Build Coastguard Worker   #endif
110*4bdc9457SAndroid Build Coastguard Worker }
111*4bdc9457SAndroid Build Coastguard Worker 
math_min_s32(int32_t a,int32_t b)112*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static int32_t math_min_s32(int32_t a, int32_t b) {
113*4bdc9457SAndroid Build Coastguard Worker   return XNN_UNPREDICTABLE(a < b) ? a : b;
114*4bdc9457SAndroid Build Coastguard Worker }
115*4bdc9457SAndroid Build Coastguard Worker 
math_max_s32(int32_t a,int32_t b)116*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static int32_t math_max_s32(int32_t a, int32_t b) {
117*4bdc9457SAndroid Build Coastguard Worker   return XNN_UNPREDICTABLE(a > b) ? a : b;
118*4bdc9457SAndroid Build Coastguard Worker }
119*4bdc9457SAndroid Build Coastguard Worker 
math_min_u32(uint32_t a,uint32_t b)120*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static uint32_t math_min_u32(uint32_t a, uint32_t b) {
121*4bdc9457SAndroid Build Coastguard Worker   return XNN_UNPREDICTABLE(a < b) ? a : b;
122*4bdc9457SAndroid Build Coastguard Worker }
123*4bdc9457SAndroid Build Coastguard Worker 
math_max_u32(uint32_t a,uint32_t b)124*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static uint32_t math_max_u32(uint32_t a, uint32_t b) {
125*4bdc9457SAndroid Build Coastguard Worker   return XNN_UNPREDICTABLE(a > b) ? a : b;
126*4bdc9457SAndroid Build Coastguard Worker }
127*4bdc9457SAndroid Build Coastguard Worker 
math_doz_u32(uint32_t a,uint32_t b)128*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static uint32_t math_doz_u32(uint32_t a, uint32_t b) {
129*4bdc9457SAndroid Build Coastguard Worker   return XNN_UNPREDICTABLE(a > b) ? a - b : 0;
130*4bdc9457SAndroid Build Coastguard Worker }
131*4bdc9457SAndroid Build Coastguard Worker 
math_mulext_s32(int32_t a,int32_t b)132*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static int64_t math_mulext_s32(int32_t a, int32_t b) {
133*4bdc9457SAndroid Build Coastguard Worker #if defined(_MSC_VER) && defined(_M_IX86)
134*4bdc9457SAndroid Build Coastguard Worker   return (int64_t) __emul((int) a, (int) b);
135*4bdc9457SAndroid Build Coastguard Worker #else
136*4bdc9457SAndroid Build Coastguard Worker   return (int64_t) a * (int64_t) b;
137*4bdc9457SAndroid Build Coastguard Worker #endif
138*4bdc9457SAndroid Build Coastguard Worker }
139*4bdc9457SAndroid Build Coastguard Worker 
math_mulext_u32(uint32_t a,uint32_t b)140*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static uint64_t math_mulext_u32(uint32_t a, uint32_t b) {
141*4bdc9457SAndroid Build Coastguard Worker #if defined(_MSC_VER) && defined(_M_IX86)
142*4bdc9457SAndroid Build Coastguard Worker   return (uint64_t) __emulu((unsigned int) a, (unsigned int) b);
143*4bdc9457SAndroid Build Coastguard Worker #else
144*4bdc9457SAndroid Build Coastguard Worker   return (uint64_t) a * (uint64_t) b;
145*4bdc9457SAndroid Build Coastguard Worker #endif
146*4bdc9457SAndroid Build Coastguard Worker }
147*4bdc9457SAndroid Build Coastguard Worker 
math_muladd_f32(float x,float y,float acc)148*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static float math_muladd_f32(float x, float y, float acc) {
149*4bdc9457SAndroid Build Coastguard Worker   #if defined(__GNUC__) && defined(__FP_FAST_FMAF)
150*4bdc9457SAndroid Build Coastguard Worker     return __builtin_fmaf(x, y, acc);
151*4bdc9457SAndroid Build Coastguard Worker   #elif defined(__clang__) && defined(__riscv)
152*4bdc9457SAndroid Build Coastguard Worker     return __builtin_fmaf(x, y, acc);
153*4bdc9457SAndroid Build Coastguard Worker   #else
154*4bdc9457SAndroid Build Coastguard Worker     return x * y + acc;
155*4bdc9457SAndroid Build Coastguard Worker   #endif
156*4bdc9457SAndroid Build Coastguard Worker }
157*4bdc9457SAndroid Build Coastguard Worker 
math_min_f32(float a,float b)158*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static float math_min_f32(float a, float b) {
159*4bdc9457SAndroid Build Coastguard Worker   #if defined(__GNUC__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 8)
160*4bdc9457SAndroid Build Coastguard Worker     return __builtin_fminf(a, b);
161*4bdc9457SAndroid Build Coastguard Worker   #elif defined(__clang__) && defined(__riscv)
162*4bdc9457SAndroid Build Coastguard Worker     return __builtin_fminf(a, b);
163*4bdc9457SAndroid Build Coastguard Worker   #else
164*4bdc9457SAndroid Build Coastguard Worker     return XNN_UNPREDICTABLE(b < a) ? b : a;
165*4bdc9457SAndroid Build Coastguard Worker   #endif
166*4bdc9457SAndroid Build Coastguard Worker }
167*4bdc9457SAndroid Build Coastguard Worker 
math_max_f32(float a,float b)168*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static float math_max_f32(float a, float b) {
169*4bdc9457SAndroid Build Coastguard Worker   #if defined(__GNUC__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 8)
170*4bdc9457SAndroid Build Coastguard Worker     return __builtin_fmaxf(a, b);
171*4bdc9457SAndroid Build Coastguard Worker   #elif defined(__clang__) && defined(__riscv)
172*4bdc9457SAndroid Build Coastguard Worker     return __builtin_fmaxf(a, b);
173*4bdc9457SAndroid Build Coastguard Worker   #else
174*4bdc9457SAndroid Build Coastguard Worker     return XNN_UNPREDICTABLE(b < a) ? a : b;
175*4bdc9457SAndroid Build Coastguard Worker   #endif
176*4bdc9457SAndroid Build Coastguard Worker }
177*4bdc9457SAndroid Build Coastguard Worker 
math_min_f64(double a,double b)178*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static double math_min_f64(double a, double b) {
179*4bdc9457SAndroid Build Coastguard Worker   #if defined(__GNUC__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 8)
180*4bdc9457SAndroid Build Coastguard Worker     return __builtin_fmin(a, b);
181*4bdc9457SAndroid Build Coastguard Worker   #elif defined(__clang__) && defined(__riscv)
182*4bdc9457SAndroid Build Coastguard Worker     return __builtin_fmin(a, b);
183*4bdc9457SAndroid Build Coastguard Worker   #else
184*4bdc9457SAndroid Build Coastguard Worker     return XNN_UNPREDICTABLE(b < a) ? b : a;
185*4bdc9457SAndroid Build Coastguard Worker   #endif
186*4bdc9457SAndroid Build Coastguard Worker }
187*4bdc9457SAndroid Build Coastguard Worker 
math_max_f64(double a,double b)188*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static double math_max_f64(double a, double b) {
189*4bdc9457SAndroid Build Coastguard Worker   #if defined(__GNUC__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 8)
190*4bdc9457SAndroid Build Coastguard Worker     return __builtin_fmax(a, b);
191*4bdc9457SAndroid Build Coastguard Worker   #elif defined(__clang__) && defined(__riscv)
192*4bdc9457SAndroid Build Coastguard Worker     return __builtin_fmax(a, b);
193*4bdc9457SAndroid Build Coastguard Worker   #else
194*4bdc9457SAndroid Build Coastguard Worker     return XNN_UNPREDICTABLE(b < a) ? a : b;
195*4bdc9457SAndroid Build Coastguard Worker   #endif
196*4bdc9457SAndroid Build Coastguard Worker }
197*4bdc9457SAndroid Build Coastguard Worker 
math_nonsign_mask_f32()198*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static float math_nonsign_mask_f32() {
199*4bdc9457SAndroid Build Coastguard Worker   #if defined(__INTEL_COMPILER)
200*4bdc9457SAndroid Build Coastguard Worker     // Surprisingly, Intel compiler ignores __builtin_nanf payload
201*4bdc9457SAndroid Build Coastguard Worker     return _castu32_f32(0x7FFFFFFF);
202*4bdc9457SAndroid Build Coastguard Worker   #elif defined(__GNUC__)
203*4bdc9457SAndroid Build Coastguard Worker     return __builtin_nanf("0x7FFFFF");
204*4bdc9457SAndroid Build Coastguard Worker   #else
205*4bdc9457SAndroid Build Coastguard Worker     union {
206*4bdc9457SAndroid Build Coastguard Worker       uint32_t as_word;
207*4bdc9457SAndroid Build Coastguard Worker       float as_float;
208*4bdc9457SAndroid Build Coastguard Worker     } f;
209*4bdc9457SAndroid Build Coastguard Worker     f.as_word = 0x7FFFFFFF;
210*4bdc9457SAndroid Build Coastguard Worker     return f.as_float;
211*4bdc9457SAndroid Build Coastguard Worker   #endif
212*4bdc9457SAndroid Build Coastguard Worker }
213*4bdc9457SAndroid Build Coastguard Worker 
214*4bdc9457SAndroid Build Coastguard Worker 
215*4bdc9457SAndroid Build Coastguard Worker #if defined(__clang__)
216*4bdc9457SAndroid Build Coastguard Worker   #if __clang_major__ == 3 && __clang_minor__ >= 7 || __clang_major__ > 3
217*4bdc9457SAndroid Build Coastguard Worker     #define XNN_IGNORE_SHIFT_BASE_UB __attribute__((__no_sanitize__("shift-base")))
218*4bdc9457SAndroid Build Coastguard Worker   #else
219*4bdc9457SAndroid Build Coastguard Worker     #define XNN_IGNORE_SHIFT_BASE_UB
220*4bdc9457SAndroid Build Coastguard Worker   #endif
221*4bdc9457SAndroid Build Coastguard Worker #elif defined(__GNUC__)
222*4bdc9457SAndroid Build Coastguard Worker   #if __GNUC__ >= 8
223*4bdc9457SAndroid Build Coastguard Worker     #define XNN_IGNORE_SHIFT_BASE_UB __attribute__((__no_sanitize__("shift-base")))
224*4bdc9457SAndroid Build Coastguard Worker   #elif __GNUC__ == 4 && __GNUC_MINOR__ >= 9 || __GNUC__ > 4
225*4bdc9457SAndroid Build Coastguard Worker     // 4.9 <= gcc < 8 support ubsan, but doesn't support no_sanitize attribute
226*4bdc9457SAndroid Build Coastguard Worker     #define XNN_IGNORE_SHIFT_BASE_UB
227*4bdc9457SAndroid Build Coastguard Worker     #ifndef XNN_USE_SHIFT_BASE_UB_WORKAROUND
228*4bdc9457SAndroid Build Coastguard Worker       #define XNN_USE_SHIFT_BASE_UB_WORKAROUND 1
229*4bdc9457SAndroid Build Coastguard Worker     #endif
230*4bdc9457SAndroid Build Coastguard Worker   #else
231*4bdc9457SAndroid Build Coastguard Worker     #define XNN_IGNORE_SHIFT_BASE_UB
232*4bdc9457SAndroid Build Coastguard Worker   #endif
233*4bdc9457SAndroid Build Coastguard Worker #else
234*4bdc9457SAndroid Build Coastguard Worker   #define XNN_IGNORE_SHIFT_BASE_UB
235*4bdc9457SAndroid Build Coastguard Worker #endif
236*4bdc9457SAndroid Build Coastguard Worker 
237*4bdc9457SAndroid Build Coastguard Worker XNN_IGNORE_SHIFT_BASE_UB
math_asr_s32(int32_t x,uint32_t n)238*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static int32_t math_asr_s32(int32_t x, uint32_t n) {
239*4bdc9457SAndroid Build Coastguard Worker   #ifdef XNN_USE_SHIFT_BASE_UB_WORKAROUND
240*4bdc9457SAndroid Build Coastguard Worker     #if XNN_ARCH_X86_64 || XNN_ARCH_ARM64
241*4bdc9457SAndroid Build Coastguard Worker       return (int32_t) ((uint64_t) (int64_t) x >> n);
242*4bdc9457SAndroid Build Coastguard Worker     #else
243*4bdc9457SAndroid Build Coastguard Worker       return x >= 0 ? x >> n : ~(~x >> n);
244*4bdc9457SAndroid Build Coastguard Worker     #endif
245*4bdc9457SAndroid Build Coastguard Worker   #else
246*4bdc9457SAndroid Build Coastguard Worker     return x >> n;
247*4bdc9457SAndroid Build Coastguard Worker   #endif
248*4bdc9457SAndroid Build Coastguard Worker }
249*4bdc9457SAndroid Build Coastguard Worker 
250*4bdc9457SAndroid Build Coastguard Worker XNN_IGNORE_SHIFT_BASE_UB
math_asr_s64(int64_t x,uint32_t n)251*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static int64_t math_asr_s64(int64_t x, uint32_t n) {
252*4bdc9457SAndroid Build Coastguard Worker   #ifdef XNN_USE_SHIFT_BASE_UB_WORKAROUND
253*4bdc9457SAndroid Build Coastguard Worker     return x >= 0 ? x >> n : ~(~x >> n);
254*4bdc9457SAndroid Build Coastguard Worker   #else
255*4bdc9457SAndroid Build Coastguard Worker     return x >> n;
256*4bdc9457SAndroid Build Coastguard Worker   #endif
257*4bdc9457SAndroid Build Coastguard Worker }
258*4bdc9457SAndroid Build Coastguard Worker 
math_clz_u32(uint32_t x)259*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static uint32_t math_clz_u32(uint32_t x) {
260*4bdc9457SAndroid Build Coastguard Worker   #if defined(_MSC_VER) && !defined(__clang__)
261*4bdc9457SAndroid Build Coastguard Worker     unsigned long index;
262*4bdc9457SAndroid Build Coastguard Worker     if XNN_UNPREDICTABLE(_BitScanReverse(&index, (unsigned long) x) != 0) {
263*4bdc9457SAndroid Build Coastguard Worker       return (uint32_t) index ^ 31;
264*4bdc9457SAndroid Build Coastguard Worker     } else {
265*4bdc9457SAndroid Build Coastguard Worker       return 32;
266*4bdc9457SAndroid Build Coastguard Worker     }
267*4bdc9457SAndroid Build Coastguard Worker   #else
268*4bdc9457SAndroid Build Coastguard Worker     if XNN_UNPREDICTABLE(x == 0) {
269*4bdc9457SAndroid Build Coastguard Worker       return 32;
270*4bdc9457SAndroid Build Coastguard Worker     } else {
271*4bdc9457SAndroid Build Coastguard Worker       return (uint32_t) __builtin_clz((unsigned int) x);
272*4bdc9457SAndroid Build Coastguard Worker     }
273*4bdc9457SAndroid Build Coastguard Worker   #endif
274*4bdc9457SAndroid Build Coastguard Worker }
275*4bdc9457SAndroid Build Coastguard Worker 
math_clz_nonzero_u32(uint32_t x)276*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static uint32_t math_clz_nonzero_u32(uint32_t x) {
277*4bdc9457SAndroid Build Coastguard Worker   assert(x != 0);
278*4bdc9457SAndroid Build Coastguard Worker   #if defined(_MSC_VER) && !defined(__clang__)
279*4bdc9457SAndroid Build Coastguard Worker     unsigned long index;
280*4bdc9457SAndroid Build Coastguard Worker     _BitScanReverse(&index, (unsigned long) x);
281*4bdc9457SAndroid Build Coastguard Worker     return (uint32_t) index ^ 31;
282*4bdc9457SAndroid Build Coastguard Worker   #else
283*4bdc9457SAndroid Build Coastguard Worker     return (uint32_t) __builtin_clz((unsigned int) x);
284*4bdc9457SAndroid Build Coastguard Worker   #endif
285*4bdc9457SAndroid Build Coastguard Worker }
286*4bdc9457SAndroid Build Coastguard Worker 
math_ctz_u32(uint32_t x)287*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static uint32_t math_ctz_u32(uint32_t x) {
288*4bdc9457SAndroid Build Coastguard Worker   #if defined(_MSC_VER) && !defined(__clang__)
289*4bdc9457SAndroid Build Coastguard Worker     unsigned long index;
290*4bdc9457SAndroid Build Coastguard Worker     _BitScanForward(&index, (unsigned long) x);
291*4bdc9457SAndroid Build Coastguard Worker     return (uint32_t) index;
292*4bdc9457SAndroid Build Coastguard Worker   #else
293*4bdc9457SAndroid Build Coastguard Worker     return (uint32_t) __builtin_ctz((unsigned int) x);
294*4bdc9457SAndroid Build Coastguard Worker   #endif
295*4bdc9457SAndroid Build Coastguard Worker }
296*4bdc9457SAndroid Build Coastguard Worker 
math_rotl_u32(uint32_t x,int8_t r)297*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static uint32_t math_rotl_u32(uint32_t x, int8_t r)
298*4bdc9457SAndroid Build Coastguard Worker {
299*4bdc9457SAndroid Build Coastguard Worker   #if XNN_COMPILER_MSVC
300*4bdc9457SAndroid Build Coastguard Worker     return _rotl((unsigned int) x, (int) r);
301*4bdc9457SAndroid Build Coastguard Worker   #else
302*4bdc9457SAndroid Build Coastguard Worker     return (x << r) | (x >> (32 - r));
303*4bdc9457SAndroid Build Coastguard Worker   #endif
304*4bdc9457SAndroid Build Coastguard Worker }
305*4bdc9457SAndroid Build Coastguard Worker 
306*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus
math_cvt_sat_u32_f64(double x)307*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static uint32_t math_cvt_sat_u32_f64(double x) {
308*4bdc9457SAndroid Build Coastguard Worker   #if defined(__GNUC__) && defined(__arm__)
309*4bdc9457SAndroid Build Coastguard Worker     float i;  // float instead of uint32_t because vcvt.u32.f64 writes to an S register
310*4bdc9457SAndroid Build Coastguard Worker     __asm__ ("vcvt.u32.f64 %[i], %P[x]"
311*4bdc9457SAndroid Build Coastguard Worker       : [i] "=w" (i)
312*4bdc9457SAndroid Build Coastguard Worker       : [x] "w" (x));
313*4bdc9457SAndroid Build Coastguard Worker     return float_as_uint32(i);
314*4bdc9457SAndroid Build Coastguard Worker   #elif defined(__GNUC__) && defined(__aarch64__)
315*4bdc9457SAndroid Build Coastguard Worker     uint32_t i;
316*4bdc9457SAndroid Build Coastguard Worker     __asm__ ("fcvtnu %w[i], %d[x]"
317*4bdc9457SAndroid Build Coastguard Worker       : [i] "=r" (i)
318*4bdc9457SAndroid Build Coastguard Worker       : [x] "w" (x));
319*4bdc9457SAndroid Build Coastguard Worker     return i;
320*4bdc9457SAndroid Build Coastguard Worker   #elif defined(__GNUC__) && defined(__riscv)
321*4bdc9457SAndroid Build Coastguard Worker     uint32_t i;
322*4bdc9457SAndroid Build Coastguard Worker     __asm__ ("fcvt.wu.d %[i], %[x], rne"
323*4bdc9457SAndroid Build Coastguard Worker       : [i] "=r" (i)
324*4bdc9457SAndroid Build Coastguard Worker       : [x] "f" (x));
325*4bdc9457SAndroid Build Coastguard Worker     return i;
326*4bdc9457SAndroid Build Coastguard Worker   #elif defined(__clang__) && defined(__wasm__) && defined(__wasm_nontrapping_fptoint__)
327*4bdc9457SAndroid Build Coastguard Worker     return __builtin_wasm_trunc_saturate_u_i32_f64(rint(x));
328*4bdc9457SAndroid Build Coastguard Worker   #else
329*4bdc9457SAndroid Build Coastguard Worker     x = math_max_f64(x, 0.0);
330*4bdc9457SAndroid Build Coastguard Worker     x = math_min_f64(x, 4294967295.0);
331*4bdc9457SAndroid Build Coastguard Worker     return (uint32_t) double_as_uint64(x + 0x1.0p+52);
332*4bdc9457SAndroid Build Coastguard Worker   #endif
333*4bdc9457SAndroid Build Coastguard Worker }
334*4bdc9457SAndroid Build Coastguard Worker #endif
335