1*4bdc9457SAndroid Build Coastguard Worker // Copyright 2021 Google LLC 2*4bdc9457SAndroid Build Coastguard Worker // 3*4bdc9457SAndroid Build Coastguard Worker // This source code is licensed under the BSD-style license found in the 4*4bdc9457SAndroid Build Coastguard Worker // LICENSE file in the root directory of this source tree. 5*4bdc9457SAndroid Build Coastguard Worker 6*4bdc9457SAndroid Build Coastguard Worker #include <assert.h> 7*4bdc9457SAndroid Build Coastguard Worker #include <stddef.h> 8*4bdc9457SAndroid Build Coastguard Worker #include <stdint.h> 9*4bdc9457SAndroid Build Coastguard Worker 10*4bdc9457SAndroid Build Coastguard Worker #include <arm_neon.h> 11*4bdc9457SAndroid Build Coastguard Worker 12*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/intrinsics-polyfill.h> 13*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/math-stubs.h> 14*4bdc9457SAndroid Build Coastguard Worker 15*4bdc9457SAndroid Build Coastguard Worker xnn_math_f32_qu8_cvt__neonv8(size_t n,const float * input,uint8_t * output,uint8_t output_zero_point)16*4bdc9457SAndroid Build Coastguard Workervoid xnn_math_f32_qu8_cvt__neonv8( 17*4bdc9457SAndroid Build Coastguard Worker size_t n, 18*4bdc9457SAndroid Build Coastguard Worker const float* input, 19*4bdc9457SAndroid Build Coastguard Worker uint8_t* output, 20*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point) 21*4bdc9457SAndroid Build Coastguard Worker { 22*4bdc9457SAndroid Build Coastguard Worker assert(n % (8 * sizeof(int8_t)) == 0); 23*4bdc9457SAndroid Build Coastguard Worker 24*4bdc9457SAndroid Build Coastguard Worker const int16x8_t voutput_zero_point = vdupq_n_s16((int16_t) (uint16_t) output_zero_point); 25*4bdc9457SAndroid Build Coastguard Worker for (; n != 0; n -= 8 * sizeof(int8_t)) { 26*4bdc9457SAndroid Build Coastguard Worker const float32x4_t vx_lo = vld1q_f32(input); input += 4; 27*4bdc9457SAndroid Build Coastguard Worker const float32x4_t vx_hi = vld1q_f32(input); input += 4; 28*4bdc9457SAndroid Build Coastguard Worker 29*4bdc9457SAndroid Build Coastguard Worker const int32x4_t vy_lo = vcvtnq_s32_f32(vx_lo); 30*4bdc9457SAndroid Build Coastguard Worker const int32x4_t vy_hi = vcvtnq_s32_f32(vx_hi); 31*4bdc9457SAndroid Build Coastguard Worker 32*4bdc9457SAndroid Build Coastguard Worker const int16x8_t vy = vqaddq_s16(vcombine_s16(vqmovn_s32(vy_lo), vqmovn_s32(vy_hi)), voutput_zero_point); 33*4bdc9457SAndroid Build Coastguard Worker 34*4bdc9457SAndroid Build Coastguard Worker const uint8x8_t vout = vqmovun_s16(vy); 35*4bdc9457SAndroid Build Coastguard Worker vst1_u8(output, vout); output += 8; 36*4bdc9457SAndroid Build Coastguard Worker } 37*4bdc9457SAndroid Build Coastguard Worker } 38