1*4bdc9457SAndroid Build Coastguard Worker# Copyright 2020 Google LLC 2*4bdc9457SAndroid Build Coastguard Worker# 3*4bdc9457SAndroid Build Coastguard Worker# This source code is licensed under the BSD-style license found in the 4*4bdc9457SAndroid Build Coastguard Worker# LICENSE file in the root directory of this source tree. 5*4bdc9457SAndroid Build Coastguard Worker 6*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/assembly.h> 7*4bdc9457SAndroid Build Coastguard Worker 8*4bdc9457SAndroid Build Coastguard Worker# void xnn_f32_vrelu_ukernel__wasm32_shr_x4( 9*4bdc9457SAndroid Build Coastguard Worker# size_t n, 0 10*4bdc9457SAndroid Build Coastguard Worker# const float* x, 1 11*4bdc9457SAndroid Build Coastguard Worker# float* y, 2 12*4bdc9457SAndroid Build Coastguard Worker# const union params) 3 unused 13*4bdc9457SAndroid Build Coastguard Worker 14*4bdc9457SAndroid Build Coastguard Worker# locals 15*4bdc9457SAndroid Build Coastguard Worker# float value0 4 16*4bdc9457SAndroid Build Coastguard Worker# float value1 5 17*4bdc9457SAndroid Build Coastguard Worker# float value2 6 18*4bdc9457SAndroid Build Coastguard Worker# float value3 7 19*4bdc9457SAndroid Build Coastguard Worker# float mask0 8 20*4bdc9457SAndroid Build Coastguard Worker# float mask1 9 21*4bdc9457SAndroid Build Coastguard Worker# float mask2 10 22*4bdc9457SAndroid Build Coastguard Worker# float mask3 11 23*4bdc9457SAndroid Build Coastguard Worker 24*4bdc9457SAndroid Build Coastguard WorkerBEGIN_FUNCTION xnn_f32_vrelu_ukernel__wasm32_shr_x4 25*4bdc9457SAndroid Build Coastguard Worker .functype xnn_f32_vrelu_ukernel__wasm32_shr_x4 (i32, i32, i32, i32) -> () 26*4bdc9457SAndroid Build Coastguard Worker .local i32, i32, i32, i32, i32, i32, i32, i32 27*4bdc9457SAndroid Build Coastguard Worker 28*4bdc9457SAndroid Build Coastguard Worker local.get 0 29*4bdc9457SAndroid Build Coastguard Worker i32.const 16 # count >= 16 30*4bdc9457SAndroid Build Coastguard Worker i32.ge_s 31*4bdc9457SAndroid Build Coastguard Worker if 32*4bdc9457SAndroid Build Coastguard Worker loop 33*4bdc9457SAndroid Build Coastguard Worker local.get 1 34*4bdc9457SAndroid Build Coastguard Worker i32.load 0 # load 4 floats from src 35*4bdc9457SAndroid Build Coastguard Worker local.set 4 36*4bdc9457SAndroid Build Coastguard Worker local.get 1 37*4bdc9457SAndroid Build Coastguard Worker i32.load 4 38*4bdc9457SAndroid Build Coastguard Worker local.set 5 39*4bdc9457SAndroid Build Coastguard Worker local.get 1 40*4bdc9457SAndroid Build Coastguard Worker i32.load 8 41*4bdc9457SAndroid Build Coastguard Worker local.set 6 42*4bdc9457SAndroid Build Coastguard Worker local.get 1 43*4bdc9457SAndroid Build Coastguard Worker i32.load 12 44*4bdc9457SAndroid Build Coastguard Worker local.set 7 45*4bdc9457SAndroid Build Coastguard Worker 46*4bdc9457SAndroid Build Coastguard Worker local.get 4 # (v >> 31) - 1) & v 47*4bdc9457SAndroid Build Coastguard Worker i32.const 31 48*4bdc9457SAndroid Build Coastguard Worker i32.shr_u 49*4bdc9457SAndroid Build Coastguard Worker local.set 8 50*4bdc9457SAndroid Build Coastguard Worker local.get 5 51*4bdc9457SAndroid Build Coastguard Worker i32.const 31 52*4bdc9457SAndroid Build Coastguard Worker i32.shr_u 53*4bdc9457SAndroid Build Coastguard Worker local.set 9 54*4bdc9457SAndroid Build Coastguard Worker local.get 6 55*4bdc9457SAndroid Build Coastguard Worker i32.const 31 56*4bdc9457SAndroid Build Coastguard Worker i32.shr_u 57*4bdc9457SAndroid Build Coastguard Worker local.set 10 58*4bdc9457SAndroid Build Coastguard Worker local.get 7 59*4bdc9457SAndroid Build Coastguard Worker i32.const 31 60*4bdc9457SAndroid Build Coastguard Worker i32.shr_u 61*4bdc9457SAndroid Build Coastguard Worker local.set 11 62*4bdc9457SAndroid Build Coastguard Worker 63*4bdc9457SAndroid Build Coastguard Worker local.get 8 64*4bdc9457SAndroid Build Coastguard Worker i32.const -1 65*4bdc9457SAndroid Build Coastguard Worker i32.add 66*4bdc9457SAndroid Build Coastguard Worker local.set 8 67*4bdc9457SAndroid Build Coastguard Worker local.get 9 68*4bdc9457SAndroid Build Coastguard Worker i32.const -1 69*4bdc9457SAndroid Build Coastguard Worker i32.add 70*4bdc9457SAndroid Build Coastguard Worker local.set 9 71*4bdc9457SAndroid Build Coastguard Worker local.get 10 72*4bdc9457SAndroid Build Coastguard Worker i32.const -1 73*4bdc9457SAndroid Build Coastguard Worker i32.add 74*4bdc9457SAndroid Build Coastguard Worker local.set 10 75*4bdc9457SAndroid Build Coastguard Worker local.get 11 76*4bdc9457SAndroid Build Coastguard Worker i32.const -1 77*4bdc9457SAndroid Build Coastguard Worker i32.add 78*4bdc9457SAndroid Build Coastguard Worker local.set 11 79*4bdc9457SAndroid Build Coastguard Worker 80*4bdc9457SAndroid Build Coastguard Worker local.get 4 81*4bdc9457SAndroid Build Coastguard Worker local.get 8 82*4bdc9457SAndroid Build Coastguard Worker i32.and 83*4bdc9457SAndroid Build Coastguard Worker local.set 4 84*4bdc9457SAndroid Build Coastguard Worker local.get 5 85*4bdc9457SAndroid Build Coastguard Worker local.get 9 86*4bdc9457SAndroid Build Coastguard Worker i32.and 87*4bdc9457SAndroid Build Coastguard Worker local.set 5 88*4bdc9457SAndroid Build Coastguard Worker local.get 6 89*4bdc9457SAndroid Build Coastguard Worker local.get 10 90*4bdc9457SAndroid Build Coastguard Worker i32.and 91*4bdc9457SAndroid Build Coastguard Worker local.set 6 92*4bdc9457SAndroid Build Coastguard Worker local.get 7 93*4bdc9457SAndroid Build Coastguard Worker local.get 11 94*4bdc9457SAndroid Build Coastguard Worker i32.and 95*4bdc9457SAndroid Build Coastguard Worker local.set 7 96*4bdc9457SAndroid Build Coastguard Worker 97*4bdc9457SAndroid Build Coastguard Worker local.get 2 98*4bdc9457SAndroid Build Coastguard Worker local.get 4 99*4bdc9457SAndroid Build Coastguard Worker i32.store 0 # store 4 floats 100*4bdc9457SAndroid Build Coastguard Worker local.get 2 101*4bdc9457SAndroid Build Coastguard Worker local.get 5 102*4bdc9457SAndroid Build Coastguard Worker i32.store 4 103*4bdc9457SAndroid Build Coastguard Worker local.get 2 104*4bdc9457SAndroid Build Coastguard Worker local.get 6 105*4bdc9457SAndroid Build Coastguard Worker i32.store 8 106*4bdc9457SAndroid Build Coastguard Worker local.get 2 107*4bdc9457SAndroid Build Coastguard Worker local.get 7 108*4bdc9457SAndroid Build Coastguard Worker i32.store 12 109*4bdc9457SAndroid Build Coastguard Worker 110*4bdc9457SAndroid Build Coastguard Worker local.get 2 # dst += 16 111*4bdc9457SAndroid Build Coastguard Worker i32.const 16 112*4bdc9457SAndroid Build Coastguard Worker i32.add 113*4bdc9457SAndroid Build Coastguard Worker local.set 2 114*4bdc9457SAndroid Build Coastguard Worker 115*4bdc9457SAndroid Build Coastguard Worker local.get 1 # src += 16 116*4bdc9457SAndroid Build Coastguard Worker i32.const 16 117*4bdc9457SAndroid Build Coastguard Worker i32.add 118*4bdc9457SAndroid Build Coastguard Worker local.set 1 119*4bdc9457SAndroid Build Coastguard Worker 120*4bdc9457SAndroid Build Coastguard Worker local.get 0 121*4bdc9457SAndroid Build Coastguard Worker i32.const -16 122*4bdc9457SAndroid Build Coastguard Worker i32.add # count -= 16 123*4bdc9457SAndroid Build Coastguard Worker local.set 0 124*4bdc9457SAndroid Build Coastguard Worker 125*4bdc9457SAndroid Build Coastguard Worker local.get 0 126*4bdc9457SAndroid Build Coastguard Worker i32.const 16 # count >= 16 127*4bdc9457SAndroid Build Coastguard Worker i32.ge_s 128*4bdc9457SAndroid Build Coastguard Worker br_if 0 # loop 129*4bdc9457SAndroid Build Coastguard Worker end_loop 130*4bdc9457SAndroid Build Coastguard Worker end_if 131*4bdc9457SAndroid Build Coastguard Worker 132*4bdc9457SAndroid Build Coastguard Worker local.get 0 133*4bdc9457SAndroid Build Coastguard Worker i32.const 4 # if count >= 4 134*4bdc9457SAndroid Build Coastguard Worker i32.ge_s 135*4bdc9457SAndroid Build Coastguard Worker if 136*4bdc9457SAndroid Build Coastguard Worker loop 137*4bdc9457SAndroid Build Coastguard Worker local.get 1 # src 138*4bdc9457SAndroid Build Coastguard Worker i32.load 0 # load float from src 139*4bdc9457SAndroid Build Coastguard Worker local.set 4 140*4bdc9457SAndroid Build Coastguard Worker 141*4bdc9457SAndroid Build Coastguard Worker local.get 1 # src += 4 142*4bdc9457SAndroid Build Coastguard Worker i32.const 4 143*4bdc9457SAndroid Build Coastguard Worker i32.add 144*4bdc9457SAndroid Build Coastguard Worker local.set 1 145*4bdc9457SAndroid Build Coastguard Worker 146*4bdc9457SAndroid Build Coastguard Worker local.get 4 # (v >> 31) - 1) & v 147*4bdc9457SAndroid Build Coastguard Worker i32.const 31 148*4bdc9457SAndroid Build Coastguard Worker i32.shr_u 149*4bdc9457SAndroid Build Coastguard Worker local.set 5 150*4bdc9457SAndroid Build Coastguard Worker 151*4bdc9457SAndroid Build Coastguard Worker local.get 5 152*4bdc9457SAndroid Build Coastguard Worker i32.const -1 153*4bdc9457SAndroid Build Coastguard Worker i32.add 154*4bdc9457SAndroid Build Coastguard Worker local.set 5 155*4bdc9457SAndroid Build Coastguard Worker 156*4bdc9457SAndroid Build Coastguard Worker local.get 4 157*4bdc9457SAndroid Build Coastguard Worker local.get 5 158*4bdc9457SAndroid Build Coastguard Worker i32.and 159*4bdc9457SAndroid Build Coastguard Worker local.set 4 160*4bdc9457SAndroid Build Coastguard Worker 161*4bdc9457SAndroid Build Coastguard Worker local.get 2 # dst 162*4bdc9457SAndroid Build Coastguard Worker local.get 4 163*4bdc9457SAndroid Build Coastguard Worker i32.store 0 # store float 164*4bdc9457SAndroid Build Coastguard Worker 165*4bdc9457SAndroid Build Coastguard Worker local.get 2 # dst += 4 166*4bdc9457SAndroid Build Coastguard Worker i32.const 4 167*4bdc9457SAndroid Build Coastguard Worker i32.add 168*4bdc9457SAndroid Build Coastguard Worker local.set 2 169*4bdc9457SAndroid Build Coastguard Worker 170*4bdc9457SAndroid Build Coastguard Worker local.get 0 171*4bdc9457SAndroid Build Coastguard Worker i32.const -4 172*4bdc9457SAndroid Build Coastguard Worker i32.add # count -= 4 173*4bdc9457SAndroid Build Coastguard Worker local.set 0 174*4bdc9457SAndroid Build Coastguard Worker 175*4bdc9457SAndroid Build Coastguard Worker local.get 0 176*4bdc9457SAndroid Build Coastguard Worker i32.const 4 # count >= 4 177*4bdc9457SAndroid Build Coastguard Worker i32.ge_s 178*4bdc9457SAndroid Build Coastguard Worker br_if 0 # loop 179*4bdc9457SAndroid Build Coastguard Worker end_loop 180*4bdc9457SAndroid Build Coastguard Worker end_if 181*4bdc9457SAndroid Build Coastguard WorkerEND_FUNCTION xnn_f32_vrelu_ukernel__wasm32_shr_x4 182