1*4bdc9457SAndroid Build Coastguard Worker// Copyright 2019 Google LLC 2*4bdc9457SAndroid Build Coastguard Worker// 3*4bdc9457SAndroid Build Coastguard Worker// This source code is licensed under the BSD-style license found in the 4*4bdc9457SAndroid Build Coastguard Worker// LICENSE file in the root directory of this source tree. 5*4bdc9457SAndroid Build Coastguard Worker 6*4bdc9457SAndroid Build Coastguard Worker$assert BATCH_TILE >= 1 7*4bdc9457SAndroid Build Coastguard Worker$ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" 8*4bdc9457SAndroid Build Coastguard Worker#include <assert.h> 9*4bdc9457SAndroid Build Coastguard Worker 10*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/common.h> 11*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/math.h> 12*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/vunary.h> 13*4bdc9457SAndroid Build Coastguard Worker 14*4bdc9457SAndroid Build Coastguard Worker 15*4bdc9457SAndroid Build Coastguard Worker$MIN_F32 = "__builtin_wasm_min_f32" if WASM else "math_min_f32" 16*4bdc9457SAndroid Build Coastguard Worker$MAX_F32 = "__builtin_wasm_max_f32" if WASM else "math_max_f32" 17*4bdc9457SAndroid Build Coastguard Workervoid xnn_f32_vhswish_ukernel__${"wasm" if WASM else "scalar"}_x${BATCH_TILE}( 18*4bdc9457SAndroid Build Coastguard Worker size_t n, 19*4bdc9457SAndroid Build Coastguard Worker const float* x, 20*4bdc9457SAndroid Build Coastguard Worker float* y, 21*4bdc9457SAndroid Build Coastguard Worker const union xnn_f32_hswish_params params[restrict XNN_MIN_ELEMENTS(1)]) 22*4bdc9457SAndroid Build Coastguard Worker{ 23*4bdc9457SAndroid Build Coastguard Worker assert(n != 0); 24*4bdc9457SAndroid Build Coastguard Worker assert(n % sizeof(float) == 0); 25*4bdc9457SAndroid Build Coastguard Worker 26*4bdc9457SAndroid Build Coastguard Worker const float vsixth = params->scalar.sixth; 27*4bdc9457SAndroid Build Coastguard Worker const float vthree = params->scalar.three; 28*4bdc9457SAndroid Build Coastguard Worker const float vsix = params->scalar.six; 29*4bdc9457SAndroid Build Coastguard Worker const float vzero = 0.0f; 30*4bdc9457SAndroid Build Coastguard Worker assert(vthree == 3.0f); 31*4bdc9457SAndroid Build Coastguard Worker assert(vsix == 6.0f); 32*4bdc9457SAndroid Build Coastguard Worker 33*4bdc9457SAndroid Build Coastguard Worker $if BATCH_TILE > 1: 34*4bdc9457SAndroid Build Coastguard Worker for (; n >= ${BATCH_TILE} * sizeof(float); n -= ${BATCH_TILE} * sizeof(float)) { 35*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 36*4bdc9457SAndroid Build Coastguard Worker float vx${ABC[N]} = x[${N}]; 37*4bdc9457SAndroid Build Coastguard Worker x += ${BATCH_TILE}; 38*4bdc9457SAndroid Build Coastguard Worker 39*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 40*4bdc9457SAndroid Build Coastguard Worker float vacc${ABC[N]} = vx${ABC[N]} + vthree; 41*4bdc9457SAndroid Build Coastguard Worker vx${ABC[N]} *= vsixth; 42*4bdc9457SAndroid Build Coastguard Worker 43*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 44*4bdc9457SAndroid Build Coastguard Worker vacc${ABC[N]} = ${MAX_F32}(vacc${ABC[N]}, vzero); 45*4bdc9457SAndroid Build Coastguard Worker 46*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 47*4bdc9457SAndroid Build Coastguard Worker vacc${ABC[N]} = ${MIN_F32}(vacc${ABC[N]}, vsix); 48*4bdc9457SAndroid Build Coastguard Worker 49*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 50*4bdc9457SAndroid Build Coastguard Worker vacc${ABC[N]} *= vx${ABC[N]}; 51*4bdc9457SAndroid Build Coastguard Worker 52*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 53*4bdc9457SAndroid Build Coastguard Worker y[${N}] = vacc${ABC[N]}; 54*4bdc9457SAndroid Build Coastguard Worker y += ${BATCH_TILE}; 55*4bdc9457SAndroid Build Coastguard Worker } 56*4bdc9457SAndroid Build Coastguard Worker if XNN_UNLIKELY(n != 0) { 57*4bdc9457SAndroid Build Coastguard Worker $if BATCH_TILE > 2: 58*4bdc9457SAndroid Build Coastguard Worker do { 59*4bdc9457SAndroid Build Coastguard Worker float vx = *x++; 60*4bdc9457SAndroid Build Coastguard Worker float vacc = vx + vthree; 61*4bdc9457SAndroid Build Coastguard Worker vx *= vsixth; 62*4bdc9457SAndroid Build Coastguard Worker vacc = ${MAX_F32}(vacc, vzero); 63*4bdc9457SAndroid Build Coastguard Worker vacc = ${MIN_F32}(vacc, vsix); 64*4bdc9457SAndroid Build Coastguard Worker vacc *= vx; 65*4bdc9457SAndroid Build Coastguard Worker *y++ = vacc; 66*4bdc9457SAndroid Build Coastguard Worker n -= sizeof(float); 67*4bdc9457SAndroid Build Coastguard Worker } while (n != 0); 68*4bdc9457SAndroid Build Coastguard Worker $else: 69*4bdc9457SAndroid Build Coastguard Worker float vx = *x; 70*4bdc9457SAndroid Build Coastguard Worker float vacc = vx + vthree; 71*4bdc9457SAndroid Build Coastguard Worker vx *= vsixth; 72*4bdc9457SAndroid Build Coastguard Worker vacc = ${MAX_F32}(vacc, vzero); 73*4bdc9457SAndroid Build Coastguard Worker vacc = ${MIN_F32}(vacc, vsix); 74*4bdc9457SAndroid Build Coastguard Worker vacc *= vx; 75*4bdc9457SAndroid Build Coastguard Worker *y = vacc; 76*4bdc9457SAndroid Build Coastguard Worker } 77*4bdc9457SAndroid Build Coastguard Worker $else: 78*4bdc9457SAndroid Build Coastguard Worker for (; n >= sizeof(float); n -= sizeof(float)) { 79*4bdc9457SAndroid Build Coastguard Worker float vx = *x++; 80*4bdc9457SAndroid Build Coastguard Worker float vacc = vx + vthree; 81*4bdc9457SAndroid Build Coastguard Worker vx *= vsixth; 82*4bdc9457SAndroid Build Coastguard Worker vacc = ${MAX_F32}(vacc, vzero); 83*4bdc9457SAndroid Build Coastguard Worker vacc = ${MIN_F32}(vacc, vsix); 84*4bdc9457SAndroid Build Coastguard Worker vacc *= vx; 85*4bdc9457SAndroid Build Coastguard Worker *y++ = vacc; 86*4bdc9457SAndroid Build Coastguard Worker } 87*4bdc9457SAndroid Build Coastguard Worker} 88