1*4bdc9457SAndroid Build Coastguard Worker// Copyright 2020 Google LLC 2*4bdc9457SAndroid Build Coastguard Worker// 3*4bdc9457SAndroid Build Coastguard Worker// This source code is licensed under the BSD-style license found in the 4*4bdc9457SAndroid Build Coastguard Worker// LICENSE file in the root directory of this source tree. 5*4bdc9457SAndroid Build Coastguard Worker 6*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/assembly.h> 7*4bdc9457SAndroid Build Coastguard Worker 8*4bdc9457SAndroid Build Coastguard Worker.syntax unified 9*4bdc9457SAndroid Build Coastguard Worker 10*4bdc9457SAndroid Build Coastguard Worker// void xnn_f32_gemm_minmax_ukernel_4x4__aarch32_vfp_ld64( 11*4bdc9457SAndroid Build Coastguard Worker// size_t mr, r0 12*4bdc9457SAndroid Build Coastguard Worker// size_t nc, r1 13*4bdc9457SAndroid Build Coastguard Worker// size_t kc, r2 -> r5 14*4bdc9457SAndroid Build Coastguard Worker// const uint8_t*restrict a, r3 15*4bdc9457SAndroid Build Coastguard Worker// size_t a_stride, sp + 96 -> (r11) 16*4bdc9457SAndroid Build Coastguard Worker// const void*restrict w, sp + 100 -> r9 17*4bdc9457SAndroid Build Coastguard Worker// uint8_t*restrict c, sp + 104 -> r6 18*4bdc9457SAndroid Build Coastguard Worker// size_t cm_stride, sp + 108 -> (r7) 19*4bdc9457SAndroid Build Coastguard Worker// size_t cn_stride, sp + 112 -> r11 20*4bdc9457SAndroid Build Coastguard Worker// const union xnn_f32_minmax_params params) sp + 116 -> (r11) 21*4bdc9457SAndroid Build Coastguard Worker 22*4bdc9457SAndroid Build Coastguard Worker// d8-d15, r4-r11,r14(lr) need to be preserved if used. r13(sp),r15(pc) are reserved. 23*4bdc9457SAndroid Build Coastguard Worker 24*4bdc9457SAndroid Build Coastguard Worker// Register usage 25*4bdc9457SAndroid Build Coastguard Worker 26*4bdc9457SAndroid Build Coastguard Worker// A0 r3 s0-s1 d0 27*4bdc9457SAndroid Build Coastguard Worker// A1 r12 s2-s3 d1 28*4bdc9457SAndroid Build Coastguard Worker// A2 r10 s4-s5 d2 29*4bdc9457SAndroid Build Coastguard Worker// A3 r0 s6-s7 d3 30*4bdc9457SAndroid Build Coastguard Worker 31*4bdc9457SAndroid Build Coastguard Worker// B r9 s12, s13, s14, s15 d6-d7 32*4bdc9457SAndroid Build Coastguard Worker// B s10, s11, s12, s13 d5-d6 33*4bdc9457SAndroid Build Coastguard Worker 34*4bdc9457SAndroid Build Coastguard Worker// C0 r6 s16-s17 d8 s18-s19 d9 35*4bdc9457SAndroid Build Coastguard Worker// C1 r4 s20-s21 d10 s22-s23 d11 36*4bdc9457SAndroid Build Coastguard Worker// C2 r8 s24-s25 d12 s26-s27 d13 37*4bdc9457SAndroid Build Coastguard Worker// C3 r7 s28-s29 d14 s30-s31 d15 38*4bdc9457SAndroid Build Coastguard Worker 39*4bdc9457SAndroid Build Coastguard Worker// Clamp (r5) s8, s9 d4 40*4bdc9457SAndroid Build Coastguard Worker 41*4bdc9457SAndroid Build Coastguard WorkerBEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_4x4__aarch32_vfp_ld64 42*4bdc9457SAndroid Build Coastguard Worker .arm 43*4bdc9457SAndroid Build Coastguard Worker#ifndef __APPLE__ 44*4bdc9457SAndroid Build Coastguard Worker .arch armv6 45*4bdc9457SAndroid Build Coastguard Worker .fpu vfp 46*4bdc9457SAndroid Build Coastguard Worker#endif 47*4bdc9457SAndroid Build Coastguard Worker # Push 96 bytes 48*4bdc9457SAndroid Build Coastguard Worker PUSH {r4, r5, r6, r7, r8, r9, r10, r11} // 32 49*4bdc9457SAndroid Build Coastguard Worker VPUSH {d8-d15} // +64 = 96 50*4bdc9457SAndroid Build Coastguard Worker 51*4bdc9457SAndroid Build Coastguard Worker LDR r11, [sp, 96] // Load a_stride 52*4bdc9457SAndroid Build Coastguard Worker LDRD r6, r7, [sp, 104] // Load c and cm_stride 53*4bdc9457SAndroid Build Coastguard Worker LDR r5, [sp, 116] // Load params 54*4bdc9457SAndroid Build Coastguard Worker 55*4bdc9457SAndroid Build Coastguard Worker # Clamp A and C pointers 56*4bdc9457SAndroid Build Coastguard Worker CMP r0, 2 // if mr >= 2 57*4bdc9457SAndroid Build Coastguard Worker ADD r12, r3, r11 // a1 = a0 + a_stride 58*4bdc9457SAndroid Build Coastguard Worker ADD r4, r6, r7 // c1 = c0 + cm_stride 59*4bdc9457SAndroid Build Coastguard Worker MOVLO r12, r3 // a1 60*4bdc9457SAndroid Build Coastguard Worker MOVLO r4, r6 // c1 61*4bdc9457SAndroid Build Coastguard Worker 62*4bdc9457SAndroid Build Coastguard Worker LDR r9, [sp, 100] // Load w 63*4bdc9457SAndroid Build Coastguard Worker 64*4bdc9457SAndroid Build Coastguard Worker // if mr > 2 65*4bdc9457SAndroid Build Coastguard Worker ADD r10, r12, r11 // a2 = a1 + a_stride 66*4bdc9457SAndroid Build Coastguard Worker ADD r8, r4, r7 // c2 = c1 + cm_stride 67*4bdc9457SAndroid Build Coastguard Worker MOVLS r10, r12 // a2 68*4bdc9457SAndroid Build Coastguard Worker MOVLS r8, r4 // c2 69*4bdc9457SAndroid Build Coastguard Worker 70*4bdc9457SAndroid Build Coastguard Worker VLDR d4, [r5] // Load min/max values 71*4bdc9457SAndroid Build Coastguard Worker 72*4bdc9457SAndroid Build Coastguard Worker CMP r0, 4 // if mr >=4 73*4bdc9457SAndroid Build Coastguard Worker ADD r0, r10, r11 // a3 = a2 + a_stride 74*4bdc9457SAndroid Build Coastguard Worker ADD r7, r8, r7 // c3 = c2 + cm_stride 75*4bdc9457SAndroid Build Coastguard Worker LDR r11, [sp, 112] // Load cn_stride 76*4bdc9457SAndroid Build Coastguard Worker MOVLO r0, r10 // a3 77*4bdc9457SAndroid Build Coastguard Worker MOVLO r7, r8 // c3 78*4bdc9457SAndroid Build Coastguard Worker 79*4bdc9457SAndroid Build Coastguard Worker 80*4bdc9457SAndroid Build Coastguard Worker0: 81*4bdc9457SAndroid Build Coastguard Worker # Load initial bias from w into accumulators 82*4bdc9457SAndroid Build Coastguard Worker VLDM r9!, {d8-d9} // Bias 83*4bdc9457SAndroid Build Coastguard Worker SUBS r5, r2, 8 84*4bdc9457SAndroid Build Coastguard Worker VMOV.F64 d10, d8 85*4bdc9457SAndroid Build Coastguard Worker VMOV.F64 d12, d8 86*4bdc9457SAndroid Build Coastguard Worker VMOV.F64 d14, d8 87*4bdc9457SAndroid Build Coastguard Worker VMOV.F64 d11, d9 88*4bdc9457SAndroid Build Coastguard Worker VMOV.F64 d13, d9 89*4bdc9457SAndroid Build Coastguard Worker VMOV.F64 d15, d9 90*4bdc9457SAndroid Build Coastguard Worker BLO 3f // less than 2 channels? 91*4bdc9457SAndroid Build Coastguard Worker 92*4bdc9457SAndroid Build Coastguard Worker # Main loop - 2 floats of A (8 bytes) 93*4bdc9457SAndroid Build Coastguard Worker1: 94*4bdc9457SAndroid Build Coastguard Worker VLDM r3!, {d0} // A0 95*4bdc9457SAndroid Build Coastguard Worker VLDM r9!, {d6-d7} // B0 96*4bdc9457SAndroid Build Coastguard Worker VLDM r12!, {d1} // A1 97*4bdc9457SAndroid Build Coastguard Worker VLDM r10!, {d2} // A2 98*4bdc9457SAndroid Build Coastguard Worker VLDM r0!, {d3} // A3 99*4bdc9457SAndroid Build Coastguard Worker 100*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s16, s12, s0 101*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s17, s13, s0 102*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s20, s12, s2 103*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s21, s13, s2 104*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s24, s12, s4 105*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s25, s13, s4 106*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s28, s12, s6 107*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s29, s13, s6 108*4bdc9457SAndroid Build Coastguard Worker 109*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s18, s14, s0 110*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s19, s15, s0 111*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s22, s14, s2 112*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s23, s15, s2 113*4bdc9457SAndroid Build Coastguard Worker VLDM r9!, {d5-d6} // B1 114*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s26, s14, s4 115*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s27, s15, s4 116*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s30, s14, s6 117*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s31, s15, s6 118*4bdc9457SAndroid Build Coastguard Worker 119*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s16, s10, s1 120*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s17, s11, s1 121*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s20, s10, s3 122*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s21, s11, s3 123*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s24, s10, s5 124*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s25, s11, s5 125*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s28, s10, s7 126*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s29, s11, s7 127*4bdc9457SAndroid Build Coastguard Worker 128*4bdc9457SAndroid Build Coastguard Worker SUBS r5, r5, 8 129*4bdc9457SAndroid Build Coastguard Worker 130*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s18, s12, s1 131*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s19, s13, s1 132*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s22, s12, s3 133*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s23, s13, s3 134*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s26, s12, s5 135*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s27, s13, s5 136*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s30, s12, s7 137*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s31, s13, s7 138*4bdc9457SAndroid Build Coastguard Worker 139*4bdc9457SAndroid Build Coastguard Worker BHS 1b 140*4bdc9457SAndroid Build Coastguard Worker 141*4bdc9457SAndroid Build Coastguard Worker # Is there a remainder?- 1 float of A (4 bytes) 142*4bdc9457SAndroid Build Coastguard Worker TST r5, 4 143*4bdc9457SAndroid Build Coastguard Worker BNE 3f 144*4bdc9457SAndroid Build Coastguard Worker 145*4bdc9457SAndroid Build Coastguard Worker2: 146*4bdc9457SAndroid Build Coastguard Worker # Clamp 147*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s8, s16 148*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 149*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s8, s17 150*4bdc9457SAndroid Build Coastguard Worker VMOVPL.F32 s16, s8 151*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 152*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s8, s18 153*4bdc9457SAndroid Build Coastguard Worker VMOVPL.F32 s17, s8 154*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 155*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s8, s19 156*4bdc9457SAndroid Build Coastguard Worker VMOVPL.F32 s18, s8 157*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 158*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s8, s20 159*4bdc9457SAndroid Build Coastguard Worker VMOVPL.F32 s19, s8 160*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 161*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s8, s21 162*4bdc9457SAndroid Build Coastguard Worker VMOVPL.F32 s20, s8 163*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 164*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s8, s22 165*4bdc9457SAndroid Build Coastguard Worker VMOVPL.F32 s21, s8 166*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 167*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s8, s23 168*4bdc9457SAndroid Build Coastguard Worker VMOVPL.F32 s22, s8 169*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 170*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s8, s24 171*4bdc9457SAndroid Build Coastguard Worker VMOVPL.F32 s23, s8 172*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 173*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s8, s25 174*4bdc9457SAndroid Build Coastguard Worker VMOVPL.F32 s24, s8 175*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 176*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s8, s26 177*4bdc9457SAndroid Build Coastguard Worker VMOVPL.F32 s25, s8 178*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 179*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s8, s27 180*4bdc9457SAndroid Build Coastguard Worker VMOVPL.F32 s26, s8 181*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 182*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s8, s28 183*4bdc9457SAndroid Build Coastguard Worker VMOVPL.F32 s27, s8 184*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 185*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s8, s29 186*4bdc9457SAndroid Build Coastguard Worker VMOVPL.F32 s28, s8 187*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 188*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s8, s30 189*4bdc9457SAndroid Build Coastguard Worker VMOVPL.F32 s29, s8 190*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 191*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s8, s31 192*4bdc9457SAndroid Build Coastguard Worker VMOVPL.F32 s30, s8 193*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 194*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s9, s16 195*4bdc9457SAndroid Build Coastguard Worker VMOVPL.F32 s31, s8 196*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 197*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s9, s17 198*4bdc9457SAndroid Build Coastguard Worker VMOVMI.F32 s16, s9 199*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 200*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s9, s18 201*4bdc9457SAndroid Build Coastguard Worker VMOVMI.F32 s17, s9 202*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 203*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s9, s19 204*4bdc9457SAndroid Build Coastguard Worker VMOVMI.F32 s18, s9 205*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 206*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s9, s20 207*4bdc9457SAndroid Build Coastguard Worker VMOVMI.F32 s19, s9 208*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 209*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s9, s21 210*4bdc9457SAndroid Build Coastguard Worker VMOVMI.F32 s20, s9 211*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 212*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s9, s22 213*4bdc9457SAndroid Build Coastguard Worker VMOVMI.F32 s21, s9 214*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 215*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s9, s23 216*4bdc9457SAndroid Build Coastguard Worker VMOVMI.F32 s22, s9 217*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 218*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s9, s24 219*4bdc9457SAndroid Build Coastguard Worker VMOVMI.F32 s23, s9 220*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 221*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s9, s25 222*4bdc9457SAndroid Build Coastguard Worker VMOVMI.F32 s24, s9 223*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 224*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s9, s26 225*4bdc9457SAndroid Build Coastguard Worker VMOVMI.F32 s25, s9 226*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 227*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s9, s27 228*4bdc9457SAndroid Build Coastguard Worker VMOVMI.F32 s26, s9 229*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 230*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s9, s28 231*4bdc9457SAndroid Build Coastguard Worker VMOVMI.F32 s27, s9 232*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 233*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s9, s29 234*4bdc9457SAndroid Build Coastguard Worker VMOVMI.F32 s28, s9 235*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 236*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s9, s30 237*4bdc9457SAndroid Build Coastguard Worker VMOVMI.F32 s29, s9 238*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 239*4bdc9457SAndroid Build Coastguard Worker VCMPE.F32 s9, s31 240*4bdc9457SAndroid Build Coastguard Worker VMOVMI.F32 s30, s9 241*4bdc9457SAndroid Build Coastguard Worker VMRS APSR_nzcv, FPSCR 242*4bdc9457SAndroid Build Coastguard Worker VMOVMI.F32 s31, s9 243*4bdc9457SAndroid Build Coastguard Worker 244*4bdc9457SAndroid Build Coastguard Worker SUBS r1, r1, 4 245*4bdc9457SAndroid Build Coastguard Worker BLO 4f 246*4bdc9457SAndroid Build Coastguard Worker 247*4bdc9457SAndroid Build Coastguard Worker # Store full 4 x 4 248*4bdc9457SAndroid Build Coastguard Worker VSTM r6, {d8-d9} 249*4bdc9457SAndroid Build Coastguard Worker SUB r0, r0, r2 250*4bdc9457SAndroid Build Coastguard Worker ADD r6, r11 251*4bdc9457SAndroid Build Coastguard Worker VSTM r4, {d10-d11} 252*4bdc9457SAndroid Build Coastguard Worker SUB r10, r10, r2 253*4bdc9457SAndroid Build Coastguard Worker ADD r4, r11 254*4bdc9457SAndroid Build Coastguard Worker VSTM r8, {d12-d13} 255*4bdc9457SAndroid Build Coastguard Worker SUB r12, r12, r2 256*4bdc9457SAndroid Build Coastguard Worker ADD r8, r11 257*4bdc9457SAndroid Build Coastguard Worker VSTM r7, {d14-d15} 258*4bdc9457SAndroid Build Coastguard Worker SUB r3, r3, r2 259*4bdc9457SAndroid Build Coastguard Worker ADD r7, r11 260*4bdc9457SAndroid Build Coastguard Worker BHI 0b 261*4bdc9457SAndroid Build Coastguard Worker 262*4bdc9457SAndroid Build Coastguard Worker VPOP {d8-d15} 263*4bdc9457SAndroid Build Coastguard Worker POP {r4, r5, r6, r7, r8, r9, r10, r11} 264*4bdc9457SAndroid Build Coastguard Worker BX lr 265*4bdc9457SAndroid Build Coastguard Worker 266*4bdc9457SAndroid Build Coastguard Worker3: 267*4bdc9457SAndroid Build Coastguard Worker # Remainder- 1 float of A (4 bytes) 268*4bdc9457SAndroid Build Coastguard Worker VLDM r3!, {s0} // A0 269*4bdc9457SAndroid Build Coastguard Worker VLDM r9!, {d6-d7} // B 270*4bdc9457SAndroid Build Coastguard Worker VLDM r12!, {s1} // A1 271*4bdc9457SAndroid Build Coastguard Worker VLDM r10!, {s2} // A2 272*4bdc9457SAndroid Build Coastguard Worker VLDM r0!, {s3} // A3 273*4bdc9457SAndroid Build Coastguard Worker 274*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s16, s12, s0 275*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s17, s13, s0 276*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s18, s14, s0 277*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s19, s15, s0 278*4bdc9457SAndroid Build Coastguard Worker 279*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s20, s12, s1 280*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s21, s13, s1 281*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s22, s14, s1 282*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s23, s15, s1 283*4bdc9457SAndroid Build Coastguard Worker 284*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s24, s12, s2 285*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s25, s13, s2 286*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s26, s14, s2 287*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s27, s15, s2 288*4bdc9457SAndroid Build Coastguard Worker 289*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s28, s12, s3 290*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s29, s13, s3 291*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s30, s14, s3 292*4bdc9457SAndroid Build Coastguard Worker VMLA.F32 s31, s15, s3 293*4bdc9457SAndroid Build Coastguard Worker 294*4bdc9457SAndroid Build Coastguard Worker B 2b 295*4bdc9457SAndroid Build Coastguard Worker 296*4bdc9457SAndroid Build Coastguard Worker # Store odd width 297*4bdc9457SAndroid Build Coastguard Worker4: 298*4bdc9457SAndroid Build Coastguard Worker TST r1, 2 299*4bdc9457SAndroid Build Coastguard Worker BEQ 5f 300*4bdc9457SAndroid Build Coastguard Worker VSTM r6!, {d8} 301*4bdc9457SAndroid Build Coastguard Worker VMOV.F32 s16, s18 302*4bdc9457SAndroid Build Coastguard Worker VSTM r4!, {d10} 303*4bdc9457SAndroid Build Coastguard Worker VMOV.F32 s20, s22 304*4bdc9457SAndroid Build Coastguard Worker VSTM r8!, {d12} 305*4bdc9457SAndroid Build Coastguard Worker VMOV.F32 s24, s26 306*4bdc9457SAndroid Build Coastguard Worker VSTM r7!, {d14} 307*4bdc9457SAndroid Build Coastguard Worker VMOV.F32 s28, s30 308*4bdc9457SAndroid Build Coastguard Worker 309*4bdc9457SAndroid Build Coastguard Worker5: 310*4bdc9457SAndroid Build Coastguard Worker TST r1, 1 311*4bdc9457SAndroid Build Coastguard Worker BEQ 6f 312*4bdc9457SAndroid Build Coastguard Worker VSTR s16, [r6] 313*4bdc9457SAndroid Build Coastguard Worker VSTR s20, [r4] 314*4bdc9457SAndroid Build Coastguard Worker VSTR s24, [r8] 315*4bdc9457SAndroid Build Coastguard Worker VSTR s28, [r7] 316*4bdc9457SAndroid Build Coastguard Worker 317*4bdc9457SAndroid Build Coastguard Worker6: 318*4bdc9457SAndroid Build Coastguard Worker VPOP {d8-d15} 319*4bdc9457SAndroid Build Coastguard Worker POP {r4, r5, r6, r7, r8, r9, r10, r11} 320*4bdc9457SAndroid Build Coastguard Worker BX lr 321*4bdc9457SAndroid Build Coastguard Worker 322*4bdc9457SAndroid Build Coastguard WorkerEND_FUNCTION xnn_f32_gemm_minmax_ukernel_4x4__aarch32_vfp_ld64 323*4bdc9457SAndroid Build Coastguard Worker 324*4bdc9457SAndroid Build Coastguard Worker#ifdef __ELF__ 325*4bdc9457SAndroid Build Coastguard Worker.section ".note.GNU-stack","",%progbits 326*4bdc9457SAndroid Build Coastguard Worker#endif 327