1*4bdc9457SAndroid Build Coastguard Worker // Copyright (c) Facebook, Inc. and its affiliates.
2*4bdc9457SAndroid Build Coastguard Worker // All rights reserved.
3*4bdc9457SAndroid Build Coastguard Worker //
4*4bdc9457SAndroid Build Coastguard Worker // Copyright 2019 Google LLC
5*4bdc9457SAndroid Build Coastguard Worker //
6*4bdc9457SAndroid Build Coastguard Worker // This source code is licensed under the BSD-style license found in the
7*4bdc9457SAndroid Build Coastguard Worker // LICENSE file in the root directory of this source tree.
8*4bdc9457SAndroid Build Coastguard Worker
9*4bdc9457SAndroid Build Coastguard Worker #pragma once
10*4bdc9457SAndroid Build Coastguard Worker
11*4bdc9457SAndroid Build Coastguard Worker #include <stdbool.h>
12*4bdc9457SAndroid Build Coastguard Worker #include <stddef.h>
13*4bdc9457SAndroid Build Coastguard Worker #include <stdint.h>
14*4bdc9457SAndroid Build Coastguard Worker #include <assert.h>
15*4bdc9457SAndroid Build Coastguard Worker
16*4bdc9457SAndroid Build Coastguard Worker #ifdef _MSC_VER
17*4bdc9457SAndroid Build Coastguard Worker #include <intrin.h>
18*4bdc9457SAndroid Build Coastguard Worker #include <stdlib.h> // For _rotl.
19*4bdc9457SAndroid Build Coastguard Worker #endif
20*4bdc9457SAndroid Build Coastguard Worker
21*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/common.h>
22*4bdc9457SAndroid Build Coastguard Worker
23*4bdc9457SAndroid Build Coastguard Worker
24*4bdc9457SAndroid Build Coastguard Worker // stdlib.h from Windows 10 SDK defines min & max macros.
25*4bdc9457SAndroid Build Coastguard Worker // Undefine them before defining the corresponding functions.
26*4bdc9457SAndroid Build Coastguard Worker #ifdef min
27*4bdc9457SAndroid Build Coastguard Worker #undef min
28*4bdc9457SAndroid Build Coastguard Worker #endif
29*4bdc9457SAndroid Build Coastguard Worker #ifdef max
30*4bdc9457SAndroid Build Coastguard Worker #undef max
31*4bdc9457SAndroid Build Coastguard Worker #endif
32*4bdc9457SAndroid Build Coastguard Worker
33*4bdc9457SAndroid Build Coastguard Worker
min(size_t a,size_t b)34*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static size_t min(size_t a, size_t b) {
35*4bdc9457SAndroid Build Coastguard Worker return XNN_UNPREDICTABLE(b < a) ? b : a;
36*4bdc9457SAndroid Build Coastguard Worker }
37*4bdc9457SAndroid Build Coastguard Worker
max(size_t a,size_t b)38*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static size_t max(size_t a, size_t b) {
39*4bdc9457SAndroid Build Coastguard Worker return XNN_UNPREDICTABLE(b < a) ? a : b;
40*4bdc9457SAndroid Build Coastguard Worker }
41*4bdc9457SAndroid Build Coastguard Worker
doz(size_t a,size_t b)42*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static size_t doz(size_t a, size_t b) {
43*4bdc9457SAndroid Build Coastguard Worker return XNN_UNPREDICTABLE(b < a) ? a - b : 0;
44*4bdc9457SAndroid Build Coastguard Worker }
45*4bdc9457SAndroid Build Coastguard Worker
divide_round_up(size_t n,size_t q)46*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static size_t divide_round_up(size_t n, size_t q) {
47*4bdc9457SAndroid Build Coastguard Worker return XNN_UNPREDICTABLE(n % q == 0) ? n / q : n / q + 1;
48*4bdc9457SAndroid Build Coastguard Worker }
49*4bdc9457SAndroid Build Coastguard Worker
round_up(size_t n,size_t q)50*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static size_t round_up(size_t n, size_t q) {
51*4bdc9457SAndroid Build Coastguard Worker return divide_round_up(n, q) * q;
52*4bdc9457SAndroid Build Coastguard Worker }
53*4bdc9457SAndroid Build Coastguard Worker
is_po2(size_t n)54*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static bool is_po2(size_t n) {
55*4bdc9457SAndroid Build Coastguard Worker return (n != 0) && ((n & (n - 1)) == 0);
56*4bdc9457SAndroid Build Coastguard Worker }
round_down_po2(size_t n,size_t q)57*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static size_t round_down_po2(size_t n, size_t q) {
58*4bdc9457SAndroid Build Coastguard Worker assert(is_po2(q));
59*4bdc9457SAndroid Build Coastguard Worker return n & -q;
60*4bdc9457SAndroid Build Coastguard Worker }
61*4bdc9457SAndroid Build Coastguard Worker
round_up_po2(size_t n,size_t q)62*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static size_t round_up_po2(size_t n, size_t q) {
63*4bdc9457SAndroid Build Coastguard Worker return round_down_po2(n + q - 1, q);
64*4bdc9457SAndroid Build Coastguard Worker }
65*4bdc9457SAndroid Build Coastguard Worker
subtract_modulo(size_t a,size_t b,size_t m)66*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static size_t subtract_modulo(size_t a, size_t b, size_t m) {
67*4bdc9457SAndroid Build Coastguard Worker assert(a < m);
68*4bdc9457SAndroid Build Coastguard Worker assert(b < m);
69*4bdc9457SAndroid Build Coastguard Worker return XNN_UNPREDICTABLE(a >= b) ? a - b : a - b + m;
70*4bdc9457SAndroid Build Coastguard Worker }
71*4bdc9457SAndroid Build Coastguard Worker
uint32_as_float(uint32_t i)72*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static float uint32_as_float(uint32_t i) {
73*4bdc9457SAndroid Build Coastguard Worker union {
74*4bdc9457SAndroid Build Coastguard Worker uint32_t as_uint32;
75*4bdc9457SAndroid Build Coastguard Worker float as_float;
76*4bdc9457SAndroid Build Coastguard Worker } bits = { i };
77*4bdc9457SAndroid Build Coastguard Worker return bits.as_float;
78*4bdc9457SAndroid Build Coastguard Worker }
79*4bdc9457SAndroid Build Coastguard Worker
float_as_uint32(float f)80*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static uint32_t float_as_uint32(float f) {
81*4bdc9457SAndroid Build Coastguard Worker union {
82*4bdc9457SAndroid Build Coastguard Worker float as_float;
83*4bdc9457SAndroid Build Coastguard Worker uint32_t as_uint32;
84*4bdc9457SAndroid Build Coastguard Worker } bits = { f };
85*4bdc9457SAndroid Build Coastguard Worker return bits.as_uint32;
86*4bdc9457SAndroid Build Coastguard Worker }
87*4bdc9457SAndroid Build Coastguard Worker
uint64_as_double(uint64_t i)88*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static double uint64_as_double(uint64_t i) {
89*4bdc9457SAndroid Build Coastguard Worker union {
90*4bdc9457SAndroid Build Coastguard Worker uint64_t as_uint64;
91*4bdc9457SAndroid Build Coastguard Worker double as_double;
92*4bdc9457SAndroid Build Coastguard Worker } bits = { i };
93*4bdc9457SAndroid Build Coastguard Worker return bits.as_double;
94*4bdc9457SAndroid Build Coastguard Worker }
95*4bdc9457SAndroid Build Coastguard Worker
double_as_uint64(double f)96*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static uint64_t double_as_uint64(double f) {
97*4bdc9457SAndroid Build Coastguard Worker union {
98*4bdc9457SAndroid Build Coastguard Worker double as_double;
99*4bdc9457SAndroid Build Coastguard Worker uint64_t as_uint64;
100*4bdc9457SAndroid Build Coastguard Worker } bits = { f };
101*4bdc9457SAndroid Build Coastguard Worker return bits.as_uint64;
102*4bdc9457SAndroid Build Coastguard Worker }
103*4bdc9457SAndroid Build Coastguard Worker
math_abs_s32(int32_t n)104*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static uint32_t math_abs_s32(int32_t n) {
105*4bdc9457SAndroid Build Coastguard Worker #if defined(_MSC_VER)
106*4bdc9457SAndroid Build Coastguard Worker return (uint32_t) abs((int) n);
107*4bdc9457SAndroid Build Coastguard Worker #else
108*4bdc9457SAndroid Build Coastguard Worker return XNN_UNPREDICTABLE(n >= 0) ? (uint32_t) n : -(uint32_t) n;
109*4bdc9457SAndroid Build Coastguard Worker #endif
110*4bdc9457SAndroid Build Coastguard Worker }
111*4bdc9457SAndroid Build Coastguard Worker
math_min_s32(int32_t a,int32_t b)112*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static int32_t math_min_s32(int32_t a, int32_t b) {
113*4bdc9457SAndroid Build Coastguard Worker return XNN_UNPREDICTABLE(a < b) ? a : b;
114*4bdc9457SAndroid Build Coastguard Worker }
115*4bdc9457SAndroid Build Coastguard Worker
math_max_s32(int32_t a,int32_t b)116*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static int32_t math_max_s32(int32_t a, int32_t b) {
117*4bdc9457SAndroid Build Coastguard Worker return XNN_UNPREDICTABLE(a > b) ? a : b;
118*4bdc9457SAndroid Build Coastguard Worker }
119*4bdc9457SAndroid Build Coastguard Worker
math_min_u32(uint32_t a,uint32_t b)120*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static uint32_t math_min_u32(uint32_t a, uint32_t b) {
121*4bdc9457SAndroid Build Coastguard Worker return XNN_UNPREDICTABLE(a < b) ? a : b;
122*4bdc9457SAndroid Build Coastguard Worker }
123*4bdc9457SAndroid Build Coastguard Worker
math_max_u32(uint32_t a,uint32_t b)124*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static uint32_t math_max_u32(uint32_t a, uint32_t b) {
125*4bdc9457SAndroid Build Coastguard Worker return XNN_UNPREDICTABLE(a > b) ? a : b;
126*4bdc9457SAndroid Build Coastguard Worker }
127*4bdc9457SAndroid Build Coastguard Worker
math_doz_u32(uint32_t a,uint32_t b)128*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static uint32_t math_doz_u32(uint32_t a, uint32_t b) {
129*4bdc9457SAndroid Build Coastguard Worker return XNN_UNPREDICTABLE(a > b) ? a - b : 0;
130*4bdc9457SAndroid Build Coastguard Worker }
131*4bdc9457SAndroid Build Coastguard Worker
math_mulext_s32(int32_t a,int32_t b)132*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static int64_t math_mulext_s32(int32_t a, int32_t b) {
133*4bdc9457SAndroid Build Coastguard Worker #if defined(_MSC_VER) && defined(_M_IX86)
134*4bdc9457SAndroid Build Coastguard Worker return (int64_t) __emul((int) a, (int) b);
135*4bdc9457SAndroid Build Coastguard Worker #else
136*4bdc9457SAndroid Build Coastguard Worker return (int64_t) a * (int64_t) b;
137*4bdc9457SAndroid Build Coastguard Worker #endif
138*4bdc9457SAndroid Build Coastguard Worker }
139*4bdc9457SAndroid Build Coastguard Worker
math_mulext_u32(uint32_t a,uint32_t b)140*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static uint64_t math_mulext_u32(uint32_t a, uint32_t b) {
141*4bdc9457SAndroid Build Coastguard Worker #if defined(_MSC_VER) && defined(_M_IX86)
142*4bdc9457SAndroid Build Coastguard Worker return (uint64_t) __emulu((unsigned int) a, (unsigned int) b);
143*4bdc9457SAndroid Build Coastguard Worker #else
144*4bdc9457SAndroid Build Coastguard Worker return (uint64_t) a * (uint64_t) b;
145*4bdc9457SAndroid Build Coastguard Worker #endif
146*4bdc9457SAndroid Build Coastguard Worker }
147*4bdc9457SAndroid Build Coastguard Worker
math_muladd_f32(float x,float y,float acc)148*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static float math_muladd_f32(float x, float y, float acc) {
149*4bdc9457SAndroid Build Coastguard Worker #if defined(__GNUC__) && defined(__FP_FAST_FMAF)
150*4bdc9457SAndroid Build Coastguard Worker return __builtin_fmaf(x, y, acc);
151*4bdc9457SAndroid Build Coastguard Worker #elif defined(__clang__) && defined(__riscv)
152*4bdc9457SAndroid Build Coastguard Worker return __builtin_fmaf(x, y, acc);
153*4bdc9457SAndroid Build Coastguard Worker #else
154*4bdc9457SAndroid Build Coastguard Worker return x * y + acc;
155*4bdc9457SAndroid Build Coastguard Worker #endif
156*4bdc9457SAndroid Build Coastguard Worker }
157*4bdc9457SAndroid Build Coastguard Worker
math_min_f32(float a,float b)158*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static float math_min_f32(float a, float b) {
159*4bdc9457SAndroid Build Coastguard Worker #if defined(__GNUC__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 8)
160*4bdc9457SAndroid Build Coastguard Worker return __builtin_fminf(a, b);
161*4bdc9457SAndroid Build Coastguard Worker #elif defined(__clang__) && defined(__riscv)
162*4bdc9457SAndroid Build Coastguard Worker return __builtin_fminf(a, b);
163*4bdc9457SAndroid Build Coastguard Worker #else
164*4bdc9457SAndroid Build Coastguard Worker return XNN_UNPREDICTABLE(b < a) ? b : a;
165*4bdc9457SAndroid Build Coastguard Worker #endif
166*4bdc9457SAndroid Build Coastguard Worker }
167*4bdc9457SAndroid Build Coastguard Worker
math_max_f32(float a,float b)168*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static float math_max_f32(float a, float b) {
169*4bdc9457SAndroid Build Coastguard Worker #if defined(__GNUC__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 8)
170*4bdc9457SAndroid Build Coastguard Worker return __builtin_fmaxf(a, b);
171*4bdc9457SAndroid Build Coastguard Worker #elif defined(__clang__) && defined(__riscv)
172*4bdc9457SAndroid Build Coastguard Worker return __builtin_fmaxf(a, b);
173*4bdc9457SAndroid Build Coastguard Worker #else
174*4bdc9457SAndroid Build Coastguard Worker return XNN_UNPREDICTABLE(b < a) ? a : b;
175*4bdc9457SAndroid Build Coastguard Worker #endif
176*4bdc9457SAndroid Build Coastguard Worker }
177*4bdc9457SAndroid Build Coastguard Worker
math_min_f64(double a,double b)178*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static double math_min_f64(double a, double b) {
179*4bdc9457SAndroid Build Coastguard Worker #if defined(__GNUC__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 8)
180*4bdc9457SAndroid Build Coastguard Worker return __builtin_fmin(a, b);
181*4bdc9457SAndroid Build Coastguard Worker #elif defined(__clang__) && defined(__riscv)
182*4bdc9457SAndroid Build Coastguard Worker return __builtin_fmin(a, b);
183*4bdc9457SAndroid Build Coastguard Worker #else
184*4bdc9457SAndroid Build Coastguard Worker return XNN_UNPREDICTABLE(b < a) ? b : a;
185*4bdc9457SAndroid Build Coastguard Worker #endif
186*4bdc9457SAndroid Build Coastguard Worker }
187*4bdc9457SAndroid Build Coastguard Worker
math_max_f64(double a,double b)188*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static double math_max_f64(double a, double b) {
189*4bdc9457SAndroid Build Coastguard Worker #if defined(__GNUC__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 8)
190*4bdc9457SAndroid Build Coastguard Worker return __builtin_fmax(a, b);
191*4bdc9457SAndroid Build Coastguard Worker #elif defined(__clang__) && defined(__riscv)
192*4bdc9457SAndroid Build Coastguard Worker return __builtin_fmax(a, b);
193*4bdc9457SAndroid Build Coastguard Worker #else
194*4bdc9457SAndroid Build Coastguard Worker return XNN_UNPREDICTABLE(b < a) ? a : b;
195*4bdc9457SAndroid Build Coastguard Worker #endif
196*4bdc9457SAndroid Build Coastguard Worker }
197*4bdc9457SAndroid Build Coastguard Worker
math_nonsign_mask_f32()198*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static float math_nonsign_mask_f32() {
199*4bdc9457SAndroid Build Coastguard Worker #if defined(__INTEL_COMPILER)
200*4bdc9457SAndroid Build Coastguard Worker // Surprisingly, Intel compiler ignores __builtin_nanf payload
201*4bdc9457SAndroid Build Coastguard Worker return _castu32_f32(0x7FFFFFFF);
202*4bdc9457SAndroid Build Coastguard Worker #elif defined(__GNUC__)
203*4bdc9457SAndroid Build Coastguard Worker return __builtin_nanf("0x7FFFFF");
204*4bdc9457SAndroid Build Coastguard Worker #else
205*4bdc9457SAndroid Build Coastguard Worker union {
206*4bdc9457SAndroid Build Coastguard Worker uint32_t as_word;
207*4bdc9457SAndroid Build Coastguard Worker float as_float;
208*4bdc9457SAndroid Build Coastguard Worker } f;
209*4bdc9457SAndroid Build Coastguard Worker f.as_word = 0x7FFFFFFF;
210*4bdc9457SAndroid Build Coastguard Worker return f.as_float;
211*4bdc9457SAndroid Build Coastguard Worker #endif
212*4bdc9457SAndroid Build Coastguard Worker }
213*4bdc9457SAndroid Build Coastguard Worker
214*4bdc9457SAndroid Build Coastguard Worker
215*4bdc9457SAndroid Build Coastguard Worker #if defined(__clang__)
216*4bdc9457SAndroid Build Coastguard Worker #if __clang_major__ == 3 && __clang_minor__ >= 7 || __clang_major__ > 3
217*4bdc9457SAndroid Build Coastguard Worker #define XNN_IGNORE_SHIFT_BASE_UB __attribute__((__no_sanitize__("shift-base")))
218*4bdc9457SAndroid Build Coastguard Worker #else
219*4bdc9457SAndroid Build Coastguard Worker #define XNN_IGNORE_SHIFT_BASE_UB
220*4bdc9457SAndroid Build Coastguard Worker #endif
221*4bdc9457SAndroid Build Coastguard Worker #elif defined(__GNUC__)
222*4bdc9457SAndroid Build Coastguard Worker #if __GNUC__ >= 8
223*4bdc9457SAndroid Build Coastguard Worker #define XNN_IGNORE_SHIFT_BASE_UB __attribute__((__no_sanitize__("shift-base")))
224*4bdc9457SAndroid Build Coastguard Worker #elif __GNUC__ == 4 && __GNUC_MINOR__ >= 9 || __GNUC__ > 4
225*4bdc9457SAndroid Build Coastguard Worker // 4.9 <= gcc < 8 support ubsan, but doesn't support no_sanitize attribute
226*4bdc9457SAndroid Build Coastguard Worker #define XNN_IGNORE_SHIFT_BASE_UB
227*4bdc9457SAndroid Build Coastguard Worker #ifndef XNN_USE_SHIFT_BASE_UB_WORKAROUND
228*4bdc9457SAndroid Build Coastguard Worker #define XNN_USE_SHIFT_BASE_UB_WORKAROUND 1
229*4bdc9457SAndroid Build Coastguard Worker #endif
230*4bdc9457SAndroid Build Coastguard Worker #else
231*4bdc9457SAndroid Build Coastguard Worker #define XNN_IGNORE_SHIFT_BASE_UB
232*4bdc9457SAndroid Build Coastguard Worker #endif
233*4bdc9457SAndroid Build Coastguard Worker #else
234*4bdc9457SAndroid Build Coastguard Worker #define XNN_IGNORE_SHIFT_BASE_UB
235*4bdc9457SAndroid Build Coastguard Worker #endif
236*4bdc9457SAndroid Build Coastguard Worker
237*4bdc9457SAndroid Build Coastguard Worker XNN_IGNORE_SHIFT_BASE_UB
math_asr_s32(int32_t x,uint32_t n)238*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static int32_t math_asr_s32(int32_t x, uint32_t n) {
239*4bdc9457SAndroid Build Coastguard Worker #ifdef XNN_USE_SHIFT_BASE_UB_WORKAROUND
240*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86_64 || XNN_ARCH_ARM64
241*4bdc9457SAndroid Build Coastguard Worker return (int32_t) ((uint64_t) (int64_t) x >> n);
242*4bdc9457SAndroid Build Coastguard Worker #else
243*4bdc9457SAndroid Build Coastguard Worker return x >= 0 ? x >> n : ~(~x >> n);
244*4bdc9457SAndroid Build Coastguard Worker #endif
245*4bdc9457SAndroid Build Coastguard Worker #else
246*4bdc9457SAndroid Build Coastguard Worker return x >> n;
247*4bdc9457SAndroid Build Coastguard Worker #endif
248*4bdc9457SAndroid Build Coastguard Worker }
249*4bdc9457SAndroid Build Coastguard Worker
250*4bdc9457SAndroid Build Coastguard Worker XNN_IGNORE_SHIFT_BASE_UB
math_asr_s64(int64_t x,uint32_t n)251*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static int64_t math_asr_s64(int64_t x, uint32_t n) {
252*4bdc9457SAndroid Build Coastguard Worker #ifdef XNN_USE_SHIFT_BASE_UB_WORKAROUND
253*4bdc9457SAndroid Build Coastguard Worker return x >= 0 ? x >> n : ~(~x >> n);
254*4bdc9457SAndroid Build Coastguard Worker #else
255*4bdc9457SAndroid Build Coastguard Worker return x >> n;
256*4bdc9457SAndroid Build Coastguard Worker #endif
257*4bdc9457SAndroid Build Coastguard Worker }
258*4bdc9457SAndroid Build Coastguard Worker
math_clz_u32(uint32_t x)259*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static uint32_t math_clz_u32(uint32_t x) {
260*4bdc9457SAndroid Build Coastguard Worker #if defined(_MSC_VER) && !defined(__clang__)
261*4bdc9457SAndroid Build Coastguard Worker unsigned long index;
262*4bdc9457SAndroid Build Coastguard Worker if XNN_UNPREDICTABLE(_BitScanReverse(&index, (unsigned long) x) != 0) {
263*4bdc9457SAndroid Build Coastguard Worker return (uint32_t) index ^ 31;
264*4bdc9457SAndroid Build Coastguard Worker } else {
265*4bdc9457SAndroid Build Coastguard Worker return 32;
266*4bdc9457SAndroid Build Coastguard Worker }
267*4bdc9457SAndroid Build Coastguard Worker #else
268*4bdc9457SAndroid Build Coastguard Worker if XNN_UNPREDICTABLE(x == 0) {
269*4bdc9457SAndroid Build Coastguard Worker return 32;
270*4bdc9457SAndroid Build Coastguard Worker } else {
271*4bdc9457SAndroid Build Coastguard Worker return (uint32_t) __builtin_clz((unsigned int) x);
272*4bdc9457SAndroid Build Coastguard Worker }
273*4bdc9457SAndroid Build Coastguard Worker #endif
274*4bdc9457SAndroid Build Coastguard Worker }
275*4bdc9457SAndroid Build Coastguard Worker
math_clz_nonzero_u32(uint32_t x)276*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static uint32_t math_clz_nonzero_u32(uint32_t x) {
277*4bdc9457SAndroid Build Coastguard Worker assert(x != 0);
278*4bdc9457SAndroid Build Coastguard Worker #if defined(_MSC_VER) && !defined(__clang__)
279*4bdc9457SAndroid Build Coastguard Worker unsigned long index;
280*4bdc9457SAndroid Build Coastguard Worker _BitScanReverse(&index, (unsigned long) x);
281*4bdc9457SAndroid Build Coastguard Worker return (uint32_t) index ^ 31;
282*4bdc9457SAndroid Build Coastguard Worker #else
283*4bdc9457SAndroid Build Coastguard Worker return (uint32_t) __builtin_clz((unsigned int) x);
284*4bdc9457SAndroid Build Coastguard Worker #endif
285*4bdc9457SAndroid Build Coastguard Worker }
286*4bdc9457SAndroid Build Coastguard Worker
math_ctz_u32(uint32_t x)287*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static uint32_t math_ctz_u32(uint32_t x) {
288*4bdc9457SAndroid Build Coastguard Worker #if defined(_MSC_VER) && !defined(__clang__)
289*4bdc9457SAndroid Build Coastguard Worker unsigned long index;
290*4bdc9457SAndroid Build Coastguard Worker _BitScanForward(&index, (unsigned long) x);
291*4bdc9457SAndroid Build Coastguard Worker return (uint32_t) index;
292*4bdc9457SAndroid Build Coastguard Worker #else
293*4bdc9457SAndroid Build Coastguard Worker return (uint32_t) __builtin_ctz((unsigned int) x);
294*4bdc9457SAndroid Build Coastguard Worker #endif
295*4bdc9457SAndroid Build Coastguard Worker }
296*4bdc9457SAndroid Build Coastguard Worker
math_rotl_u32(uint32_t x,int8_t r)297*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static uint32_t math_rotl_u32(uint32_t x, int8_t r)
298*4bdc9457SAndroid Build Coastguard Worker {
299*4bdc9457SAndroid Build Coastguard Worker #if XNN_COMPILER_MSVC
300*4bdc9457SAndroid Build Coastguard Worker return _rotl((unsigned int) x, (int) r);
301*4bdc9457SAndroid Build Coastguard Worker #else
302*4bdc9457SAndroid Build Coastguard Worker return (x << r) | (x >> (32 - r));
303*4bdc9457SAndroid Build Coastguard Worker #endif
304*4bdc9457SAndroid Build Coastguard Worker }
305*4bdc9457SAndroid Build Coastguard Worker
306*4bdc9457SAndroid Build Coastguard Worker #ifndef __cplusplus
math_cvt_sat_u32_f64(double x)307*4bdc9457SAndroid Build Coastguard Worker XNN_INLINE static uint32_t math_cvt_sat_u32_f64(double x) {
308*4bdc9457SAndroid Build Coastguard Worker #if defined(__GNUC__) && defined(__arm__)
309*4bdc9457SAndroid Build Coastguard Worker float i; // float instead of uint32_t because vcvt.u32.f64 writes to an S register
310*4bdc9457SAndroid Build Coastguard Worker __asm__ ("vcvt.u32.f64 %[i], %P[x]"
311*4bdc9457SAndroid Build Coastguard Worker : [i] "=w" (i)
312*4bdc9457SAndroid Build Coastguard Worker : [x] "w" (x));
313*4bdc9457SAndroid Build Coastguard Worker return float_as_uint32(i);
314*4bdc9457SAndroid Build Coastguard Worker #elif defined(__GNUC__) && defined(__aarch64__)
315*4bdc9457SAndroid Build Coastguard Worker uint32_t i;
316*4bdc9457SAndroid Build Coastguard Worker __asm__ ("fcvtnu %w[i], %d[x]"
317*4bdc9457SAndroid Build Coastguard Worker : [i] "=r" (i)
318*4bdc9457SAndroid Build Coastguard Worker : [x] "w" (x));
319*4bdc9457SAndroid Build Coastguard Worker return i;
320*4bdc9457SAndroid Build Coastguard Worker #elif defined(__GNUC__) && defined(__riscv)
321*4bdc9457SAndroid Build Coastguard Worker uint32_t i;
322*4bdc9457SAndroid Build Coastguard Worker __asm__ ("fcvt.wu.d %[i], %[x], rne"
323*4bdc9457SAndroid Build Coastguard Worker : [i] "=r" (i)
324*4bdc9457SAndroid Build Coastguard Worker : [x] "f" (x));
325*4bdc9457SAndroid Build Coastguard Worker return i;
326*4bdc9457SAndroid Build Coastguard Worker #elif defined(__clang__) && defined(__wasm__) && defined(__wasm_nontrapping_fptoint__)
327*4bdc9457SAndroid Build Coastguard Worker return __builtin_wasm_trunc_saturate_u_i32_f64(rint(x));
328*4bdc9457SAndroid Build Coastguard Worker #else
329*4bdc9457SAndroid Build Coastguard Worker x = math_max_f64(x, 0.0);
330*4bdc9457SAndroid Build Coastguard Worker x = math_min_f64(x, 4294967295.0);
331*4bdc9457SAndroid Build Coastguard Worker return (uint32_t) double_as_uint64(x + 0x1.0p+52);
332*4bdc9457SAndroid Build Coastguard Worker #endif
333*4bdc9457SAndroid Build Coastguard Worker }
334*4bdc9457SAndroid Build Coastguard Worker #endif
335