1 // Copyright 2022 Google LLC 2 // 3 // This source code is licensed under the BSD-style license found in the 4 // LICENSE file in the root directory of this source tree. 5 6 #include <assert.h> 7 #include <stddef.h> 8 9 #include <xnnpack/math.h> 10 #include <xnnpack/math-stubs.h> 11 12 xnn_math_u32_sqrt__scalar_tflm(size_t n,const uint32_t * input,uint32_t * output)13void xnn_math_u32_sqrt__scalar_tflm( 14 size_t n, 15 const uint32_t* input, 16 uint32_t* output) 17 { 18 assert(n % sizeof(uint32_t) == 0); 19 20 for (; n != 0; n -= sizeof(uint32_t)) { 21 uint32_t vx = *input++; 22 23 // Algorithm adapted from tensorflow/lite/experimental/microfrontend/lib/filterbank.c in TFLite-Micro 24 uint32_t vy = 0; 25 if (vx != 0) { 26 const uint32_t vn = (math_clz_nonzero_u32(vx) | 1) ^ 31; 27 uint32_t vb = UINT32_C(1) << vn; 28 uint32_t iterations = (vn >> 1) + 1; 29 while (iterations--) { 30 const uint32_t vyb = vy + vb; 31 if (vx >= vyb) { 32 vx -= vyb; 33 vy = (vy >> 1) + vb; 34 } else { 35 vy >>= 1; 36 } 37 vb >>= 2; 38 } 39 40 // vy is sqrt(.) rounded down. Do the final rounding up if needed. 41 if (vx > vy) { 42 // This condition prevents overflowing uint16_t, but produces incorrectly 43 // rounded result for large inputs where square root should round to 0x10000. 44 if (vy != UINT32_C(0xFFFF)) { 45 vy += 1; 46 } 47 } 48 } 49 50 *output++ = vy; 51 } 52 } 53