1*e1eccf28SAndroid Build Coastguard Worker/* 2*e1eccf28SAndroid Build Coastguard Worker * Copyright (C) 2014 The Android Open Source Project 3*e1eccf28SAndroid Build Coastguard Worker * 4*e1eccf28SAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License"); 5*e1eccf28SAndroid Build Coastguard Worker * you may not use this file except in compliance with the License. 6*e1eccf28SAndroid Build Coastguard Worker * You may obtain a copy of the License at 7*e1eccf28SAndroid Build Coastguard Worker * 8*e1eccf28SAndroid Build Coastguard Worker * http://www.apache.org/licenses/LICENSE-2.0 9*e1eccf28SAndroid Build Coastguard Worker * 10*e1eccf28SAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software 11*e1eccf28SAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS, 12*e1eccf28SAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13*e1eccf28SAndroid Build Coastguard Worker * See the License for the specific language governing permissions and 14*e1eccf28SAndroid Build Coastguard Worker * limitations under the License. 15*e1eccf28SAndroid Build Coastguard Worker */ 16*e1eccf28SAndroid Build Coastguard Worker 17*e1eccf28SAndroid Build Coastguard Worker#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f: 18*e1eccf28SAndroid Build Coastguard Worker#define PRIVATE(f) .text; .align 4; .type f,#function; f: 19*e1eccf28SAndroid Build Coastguard Worker#define END(f) .size f, .-f; 20*e1eccf28SAndroid Build Coastguard Worker 21*e1eccf28SAndroid Build Coastguard Worker//#define ARCH_ARM64_USE_BLUR_PRELOAD 22*e1eccf28SAndroid Build Coastguard Worker 23*e1eccf28SAndroid Build Coastguard Worker/* Number of fractional bits to preserve in intermediate results. The 24*e1eccf28SAndroid Build Coastguard Worker * intermediate storage is 16-bit, and we started with 8 bit data (the integer 25*e1eccf28SAndroid Build Coastguard Worker * part), so this should be between 0 and 8. 26*e1eccf28SAndroid Build Coastguard Worker */ 27*e1eccf28SAndroid Build Coastguard Worker.set FRACTION_BITS, 7 28*e1eccf28SAndroid Build Coastguard Worker.set MAX_R, 25 29*e1eccf28SAndroid Build Coastguard Worker 30*e1eccf28SAndroid Build Coastguard Worker 31*e1eccf28SAndroid Build Coastguard Worker/* A quick way of making a line of code conditional on some other condition. 32*e1eccf28SAndroid Build Coastguard Worker * Use `.set cc, 1` or `.set cc, 0` to enable or disable lines prefixed with 33*e1eccf28SAndroid Build Coastguard Worker * `ifcc`: 34*e1eccf28SAndroid Build Coastguard Worker */ 35*e1eccf28SAndroid Build Coastguard Worker.macro ifcc zzz:vararg 36*e1eccf28SAndroid Build Coastguard Worker.if cc 37*e1eccf28SAndroid Build Coastguard Worker \zzz 38*e1eccf28SAndroid Build Coastguard Worker.endif 39*e1eccf28SAndroid Build Coastguard Worker.endm 40*e1eccf28SAndroid Build Coastguard Worker 41*e1eccf28SAndroid Build Coastguard Worker/* It's not always clear that prefetching is beneficial and this needs further 42*e1eccf28SAndroid Build Coastguard Worker * testing on different cores, so it's made switchable here. 43*e1eccf28SAndroid Build Coastguard Worker */ 44*e1eccf28SAndroid Build Coastguard Worker#if defined(ARCH_ARM64_USE_BLUR_PRELOAD) 45*e1eccf28SAndroid Build Coastguard Worker#define VERTPLD(...) prfm PLDL1KEEP, [__VA_ARGS__] 46*e1eccf28SAndroid Build Coastguard Worker#else 47*e1eccf28SAndroid Build Coastguard Worker#define VERTPLD(...) nop 48*e1eccf28SAndroid Build Coastguard Worker#endif 49*e1eccf28SAndroid Build Coastguard Worker 50*e1eccf28SAndroid Build Coastguard Worker/* Fetch 16 columns of bytes (regardless of image format), convolve these 51*e1eccf28SAndroid Build Coastguard Worker * vertically, and leave them in the register file. If working near the top or 52*e1eccf28SAndroid Build Coastguard Worker * bottom of an image then clamp the addressing while loading the data in. 53*e1eccf28SAndroid Build Coastguard Worker * 54*e1eccf28SAndroid Build Coastguard Worker * The convolution is fully unrolled for windows up to max_r, with the 55*e1eccf28SAndroid Build Coastguard Worker * outermost edges calculated first. This way it's possible to branch directly 56*e1eccf28SAndroid Build Coastguard Worker * into the relevant part of the code for an arbitrary convolution radius. Two 57*e1eccf28SAndroid Build Coastguard Worker * variants of the loop are produced; one eliminates the clamping code for a 58*e1eccf28SAndroid Build Coastguard Worker * slight speed advantage. 59*e1eccf28SAndroid Build Coastguard Worker * 60*e1eccf28SAndroid Build Coastguard Worker * Where the macro is called with reg=x, the specified register is taken to 61*e1eccf28SAndroid Build Coastguard Worker * contain a pre-calculated pointer into one of the two loops. 62*e1eccf28SAndroid Build Coastguard Worker * 63*e1eccf28SAndroid Build Coastguard Worker * Input: 64*e1eccf28SAndroid Build Coastguard Worker * x1 -- src 65*e1eccf28SAndroid Build Coastguard Worker * x2 -- pitch 66*e1eccf28SAndroid Build Coastguard Worker * x5 -- r 67*e1eccf28SAndroid Build Coastguard Worker * x6 -- rup (r, unless clipped to top of source image) 68*e1eccf28SAndroid Build Coastguard Worker * x7 -- rdn (r, unless clipped to bottom of source image) 69*e1eccf28SAndroid Build Coastguard Worker * x12 -- switch index 70*e1eccf28SAndroid Build Coastguard Worker * v0-v3 -- coefficient table 71*e1eccf28SAndroid Build Coastguard Worker * x13 = -pitch 72*e1eccf28SAndroid Build Coastguard Worker * x15 = top-row in 73*e1eccf28SAndroid Build Coastguard Worker * x19 = bottom-row in 74*e1eccf28SAndroid Build Coastguard Worker * Output: 75*e1eccf28SAndroid Build Coastguard Worker * x1 += 16 76*e1eccf28SAndroid Build Coastguard Worker * v10,v11 -- 16 convolved columns 77*e1eccf28SAndroid Build Coastguard Worker * Modifies: 78*e1eccf28SAndroid Build Coastguard Worker * x10 = upper row pointer 79*e1eccf28SAndroid Build Coastguard Worker * x11 = lower row pointer 80*e1eccf28SAndroid Build Coastguard Worker * v12-v15 = temporary sums 81*e1eccf28SAndroid Build Coastguard Worker */ 82*e1eccf28SAndroid Build Coastguard Worker.macro fetch, max_r=MAX_R, labelc=1, labelnc=2, reg=x12 /*{{{*/ 83*e1eccf28SAndroid Build Coastguard Worker .ifc \reg,x12 ; .set cc, 1 ; .else ; .set cc, 0 ; .endif 84*e1eccf28SAndroid Build Coastguard Worker 85*e1eccf28SAndroid Build Coastguard Worker ld1 {v15.16b}, [x1], #16 86*e1eccf28SAndroid Build Coastguard Worker mov x10, x15 87*e1eccf28SAndroid Build Coastguard Worker 88*e1eccf28SAndroid Build Coastguard Worker uxtl v14.8h, v15.8b 89*e1eccf28SAndroid Build Coastguard Worker VERTPLD(x1, #16) 90*e1eccf28SAndroid Build Coastguard Worker uxtl2 v15.8h, v15.16b 91*e1eccf28SAndroid Build Coastguard Worker .if \max_r < 16 // approximate 92*e1eccf28SAndroid Build Coastguard Worker ifcc adr \reg, 1f 93*e1eccf28SAndroid Build Coastguard Worker .else 94*e1eccf28SAndroid Build Coastguard Worker ifcc adrp \reg, 1f 95*e1eccf28SAndroid Build Coastguard Worker ifcc add \reg, \reg, #:lo12:1f 96*e1eccf28SAndroid Build Coastguard Worker .endif 97*e1eccf28SAndroid Build Coastguard Worker 98*e1eccf28SAndroid Build Coastguard Worker umull v12.4s, v14.4h, v0.h[0] 99*e1eccf28SAndroid Build Coastguard Worker ifcc sub \reg, \reg, x5, LSL #6 100*e1eccf28SAndroid Build Coastguard Worker umull2 v13.4s, v14.8h, v0.h[0] 101*e1eccf28SAndroid Build Coastguard Worker mov x11, x19 102*e1eccf28SAndroid Build Coastguard Worker umull v14.4s, v15.4h, v0.h[0] 103*e1eccf28SAndroid Build Coastguard Worker ifcc add \reg, \reg, x5, LSL #3 104*e1eccf28SAndroid Build Coastguard Worker umull2 v15.4s, v15.8h, v0.h[0] 105*e1eccf28SAndroid Build Coastguard Worker br \reg 106*e1eccf28SAndroid Build Coastguard Worker 107*e1eccf28SAndroid Build Coastguard Worker /* This version of the vertical fetch loop body is used away from the edges 108*e1eccf28SAndroid Build Coastguard Worker * of the source image. The pointers start at the top and bottom source rows 109*e1eccf28SAndroid Build Coastguard Worker * and work their way towards the centre on each iteration. This way the 110*e1eccf28SAndroid Build Coastguard Worker * number of taps used can be controlled by jumping directly into the middle 111*e1eccf28SAndroid Build Coastguard Worker * of the loop and running to completion. 112*e1eccf28SAndroid Build Coastguard Worker * If the loop body changes size then the code which calculates the address of 113*e1eccf28SAndroid Build Coastguard Worker * the initial iteration must be updated to accordingly. 114*e1eccf28SAndroid Build Coastguard Worker */ 115*e1eccf28SAndroid Build Coastguard Worker .macro vertfetch_noclamp i, dreg 116*e1eccf28SAndroid Build Coastguard Worker .if 0 < \i && \i <= \max_r 117*e1eccf28SAndroid Build Coastguard Worker ld1 {v10.16b}, [x10], x2 118*e1eccf28SAndroid Build Coastguard Worker ld1 {v11.16b}, [x11], x13 119*e1eccf28SAndroid Build Coastguard Worker uaddl v16.8h, v10.8b, v11.8b 120*e1eccf28SAndroid Build Coastguard Worker uaddl2 v11.8h, v10.16b, v11.16b 121*e1eccf28SAndroid Build Coastguard Worker umlal v12.4s, v16.4h, \dreg 122*e1eccf28SAndroid Build Coastguard Worker umlal2 v13.4s, v16.8h, \dreg 123*e1eccf28SAndroid Build Coastguard Worker VERTPLD(x10, #32) 124*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v11.4h, \dreg 125*e1eccf28SAndroid Build Coastguard Worker VERTPLD(x11, #32) 126*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v11.8h, \dreg 127*e1eccf28SAndroid Build Coastguard Worker .endif 128*e1eccf28SAndroid Build Coastguard Worker .endm 129*e1eccf28SAndroid Build Coastguard Worker 130*e1eccf28SAndroid Build Coastguard Worker /* This version of the vertical fetch loop body is used near the edges of the 131*e1eccf28SAndroid Build Coastguard Worker * source image, where one or both of the accesses may start with a clamped 132*e1eccf28SAndroid Build Coastguard Worker * value, and the row addresses only begin to change after some number of 133*e1eccf28SAndroid Build Coastguard Worker * iterations before the end. 134*e1eccf28SAndroid Build Coastguard Worker * If the loop body changes size then the code which calculates the address of 135*e1eccf28SAndroid Build Coastguard Worker * the initial iteration must be updated to accordingly. 136*e1eccf28SAndroid Build Coastguard Worker */ 137*e1eccf28SAndroid Build Coastguard Worker .macro vertfetch_clamped i, dreg 138*e1eccf28SAndroid Build Coastguard Worker .if 0 < \i && \i <= \max_r 139*e1eccf28SAndroid Build Coastguard Worker ld1 {v10.16b}, [x10], x2 140*e1eccf28SAndroid Build Coastguard Worker cmp x6, #\i 141*e1eccf28SAndroid Build Coastguard Worker ld1 {v11.16b}, [x11], x13 142*e1eccf28SAndroid Build Coastguard Worker csel x10, x15, x10, lo 143*e1eccf28SAndroid Build Coastguard Worker uaddl v16.8h, v10.8b, v11.8b 144*e1eccf28SAndroid Build Coastguard Worker cmp x7, #\i 145*e1eccf28SAndroid Build Coastguard Worker uaddl2 v11.8h, v10.16b, v11.16b 146*e1eccf28SAndroid Build Coastguard Worker csel x11, x19, x11, lo 147*e1eccf28SAndroid Build Coastguard Worker umlal v12.4s, v16.4h, \dreg 148*e1eccf28SAndroid Build Coastguard Worker umlal2 v13.4s, v16.8h, \dreg 149*e1eccf28SAndroid Build Coastguard Worker VERTPLD(x10, #32) 150*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v11.4h, \dreg 151*e1eccf28SAndroid Build Coastguard Worker VERTPLD(x11, #32) 152*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v11.8h, \dreg 153*e1eccf28SAndroid Build Coastguard Worker .endif 154*e1eccf28SAndroid Build Coastguard Worker .endm 155*e1eccf28SAndroid Build Coastguard Worker 156*e1eccf28SAndroid Build Coastguard Worker /* Entry into this unrolled loop is computed as a negative index from 157*e1eccf28SAndroid Build Coastguard Worker * \labelc at the end of the block. 158*e1eccf28SAndroid Build Coastguard Worker */ 159*e1eccf28SAndroid Build Coastguard Worker .align 4 160*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 27, v3.h[3] 161*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 26, v3.h[2] 162*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 25, v3.h[1] 163*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 24, v3.h[0] 164*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 23, v2.h[7] 165*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 22, v2.h[6] 166*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 21, v2.h[5] 167*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 20, v2.h[4] 168*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 19, v2.h[3] 169*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 18, v2.h[2] 170*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 17, v2.h[1] 171*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 16, v2.h[0] 172*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 15, v1.h[7] 173*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 14, v1.h[6] 174*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 13, v1.h[5] 175*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 12, v1.h[4] 176*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 11, v1.h[3] 177*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 10, v1.h[2] 178*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 9, v1.h[1] 179*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 8, v1.h[0] 180*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 7, v0.h[7] 181*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 6, v0.h[6] 182*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 5, v0.h[5] 183*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 4, v0.h[4] 184*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 3, v0.h[3] 185*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 2, v0.h[2] 186*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 1, v0.h[1] 187*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 0, v0.h[0] 188*e1eccf28SAndroid Build Coastguard Worker 1: 189*e1eccf28SAndroid Build Coastguard Worker \labelc : b 2f /* done with clamped loop, skip over non-clamped loop */ 190*e1eccf28SAndroid Build Coastguard Worker 191*e1eccf28SAndroid Build Coastguard Worker /* Entry into this unrolled loop is computed as a negative index from 192*e1eccf28SAndroid Build Coastguard Worker * \labelnc at the end of the block. 193*e1eccf28SAndroid Build Coastguard Worker */ 194*e1eccf28SAndroid Build Coastguard Worker .align 4 195*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 27, v3.h[3] 196*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 26, v3.h[2] 197*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 25, v3.h[1] 198*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 24, v3.h[0] 199*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 23, v2.h[7] 200*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 22, v2.h[6] 201*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 21, v2.h[5] 202*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 20, v2.h[4] 203*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 19, v2.h[3] 204*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 18, v2.h[2] 205*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 17, v2.h[1] 206*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 16, v2.h[0] 207*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 15, v1.h[7] 208*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 14, v1.h[6] 209*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 13, v1.h[5] 210*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 12, v1.h[4] 211*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 11, v1.h[3] 212*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 10, v1.h[2] 213*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 9, v1.h[1] 214*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 8, v1.h[0] 215*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 7, v0.h[7] 216*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 6, v0.h[6] 217*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 5, v0.h[5] 218*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 4, v0.h[4] 219*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 3, v0.h[3] 220*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 2, v0.h[2] 221*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 1, v0.h[1] 222*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 0, v0.h[0] 223*e1eccf28SAndroid Build Coastguard Worker \labelnc : 224*e1eccf28SAndroid Build Coastguard Worker 225*e1eccf28SAndroid Build Coastguard Worker .purgem vertfetch_clamped 226*e1eccf28SAndroid Build Coastguard Worker .purgem vertfetch_noclamp 227*e1eccf28SAndroid Build Coastguard Worker 228*e1eccf28SAndroid Build Coastguard Worker 2: uqrshrn v10.4h, v12.4s, #16 - FRACTION_BITS 229*e1eccf28SAndroid Build Coastguard Worker add x15, x15, #16 230*e1eccf28SAndroid Build Coastguard Worker uqrshrn2 v10.8h, v13.4s, #16 - FRACTION_BITS 231*e1eccf28SAndroid Build Coastguard Worker add x19, x19, #16 232*e1eccf28SAndroid Build Coastguard Worker uqrshrn v11.4h, v14.4s, #16 - FRACTION_BITS 233*e1eccf28SAndroid Build Coastguard Worker uqrshrn2 v11.8h, v15.4s, #16 - FRACTION_BITS 234*e1eccf28SAndroid Build Coastguard Worker.endm /*}}}*/ 235*e1eccf28SAndroid Build Coastguard Worker 236*e1eccf28SAndroid Build Coastguard Worker/* Some portion of the convolution window (as much as will fit, and all of it 237*e1eccf28SAndroid Build Coastguard Worker * for the uchar1 cases) is kept in the register file to avoid unnecessary 238*e1eccf28SAndroid Build Coastguard Worker * memory accesses. This forces the horizontal loops to be unrolled because 239*e1eccf28SAndroid Build Coastguard Worker * there's no indexed addressing into the register file. 240*e1eccf28SAndroid Build Coastguard Worker * 241*e1eccf28SAndroid Build Coastguard Worker * As in the fetch macro, the operations are ordered from outside to inside, so 242*e1eccf28SAndroid Build Coastguard Worker * that jumping into the middle of the block bypasses the unwanted window taps. 243*e1eccf28SAndroid Build Coastguard Worker * 244*e1eccf28SAndroid Build Coastguard Worker * There are several variants of the macro because of the fixed offets of the 245*e1eccf28SAndroid Build Coastguard Worker * taps -- the wider the maximum radius the further the centre tap is from the 246*e1eccf28SAndroid Build Coastguard Worker * most recently fetched data. This means that pre-filling the window requires 247*e1eccf28SAndroid Build Coastguard Worker * more data that won't be used and it means that rotating the window involves 248*e1eccf28SAndroid Build Coastguard Worker * more mov operations. 249*e1eccf28SAndroid Build Coastguard Worker * 250*e1eccf28SAndroid Build Coastguard Worker * When the buffer gets too big the buffer at [x9] is used. 251*e1eccf28SAndroid Build Coastguard Worker * 252*e1eccf28SAndroid Build Coastguard Worker * Input: 253*e1eccf28SAndroid Build Coastguard Worker * v16-v31,v4-v11 -- convoltion window 254*e1eccf28SAndroid Build Coastguard Worker * x9 -- pointer to additional convolution window data 255*e1eccf28SAndroid Build Coastguard Worker * Output: 256*e1eccf28SAndroid Build Coastguard Worker * x9 -- updated buffer pointer (if used) 257*e1eccf28SAndroid Build Coastguard Worker * d31 -- result to be stored 258*e1eccf28SAndroid Build Coastguard Worker * Modifies: 259*e1eccf28SAndroid Build Coastguard Worker * x12 -- temp buffer pointer 260*e1eccf28SAndroid Build Coastguard Worker * v12-v13 -- temporaries for load and vext operations. 261*e1eccf28SAndroid Build Coastguard Worker * v14-v15 -- intermediate sums 262*e1eccf28SAndroid Build Coastguard Worker */ 263*e1eccf28SAndroid Build Coastguard Worker#define TUNED_LIST1 8, 16 264*e1eccf28SAndroid Build Coastguard Worker.macro hconv1_8/*{{{*/ 265*e1eccf28SAndroid Build Coastguard Worker 266*e1eccf28SAndroid Build Coastguard Worker.rodata 267*e1eccf28SAndroid Build Coastguard Worker 200: .hword -4 268*e1eccf28SAndroid Build Coastguard Worker .hword 101f-100f 269*e1eccf28SAndroid Build Coastguard Worker .hword 102f-100f 270*e1eccf28SAndroid Build Coastguard Worker .hword 103f-100f 271*e1eccf28SAndroid Build Coastguard Worker .hword 104f-100f 272*e1eccf28SAndroid Build Coastguard Worker .hword 105f-100f 273*e1eccf28SAndroid Build Coastguard Worker .hword 106f-100f 274*e1eccf28SAndroid Build Coastguard Worker .hword 107f-100f 275*e1eccf28SAndroid Build Coastguard Worker .hword 108f-100f 276*e1eccf28SAndroid Build Coastguard Worker .align 4 277*e1eccf28SAndroid Build Coastguard Worker.text 278*e1eccf28SAndroid Build Coastguard Worker umull v14.4s, v9.4h, v0.h[0] 279*e1eccf28SAndroid Build Coastguard Worker umull2 v15.4s, v9.8h, v0.h[0] 280*e1eccf28SAndroid Build Coastguard Worker 281*e1eccf28SAndroid Build Coastguard Worker adrp x16, 200b 282*e1eccf28SAndroid Build Coastguard Worker add x16, x16, :lo12:200b 283*e1eccf28SAndroid Build Coastguard Worker ldrsh x12, [x16, x5, LSL #1] 284*e1eccf28SAndroid Build Coastguard Worker adr x16, 100f 285*e1eccf28SAndroid Build Coastguard Worker add x12, x12, x16 286*e1eccf28SAndroid Build Coastguard Worker 100: br x12 287*e1eccf28SAndroid Build Coastguard Worker 108: umlal v14.4s, v8.4h, v1.h[0] 288*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v8.8h, v1.h[0] 289*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v10.4h, v1.h[0] 290*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v10.8h, v1.h[0] 291*e1eccf28SAndroid Build Coastguard Worker 107: ext v12.16b, v8.16b, v9.16b, #1*2 292*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v9.16b, v10.16b, #7*2 293*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v0.h[7] 294*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v0.h[7] 295*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v0.h[7] 296*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v0.h[7] 297*e1eccf28SAndroid Build Coastguard Worker 106: ext v12.16b, v8.16b, v9.16b, #2*2 298*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v9.16b, v10.16b, #6*2 299*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v0.h[6] 300*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v0.h[6] 301*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v0.h[6] 302*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v0.h[6] 303*e1eccf28SAndroid Build Coastguard Worker 105: ext v12.16b, v8.16b, v9.16b, #3*2 304*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v9.16b, v10.16b, #5*2 305*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v0.h[5] 306*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v0.h[5] 307*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v0.h[5] 308*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v0.h[5] 309*e1eccf28SAndroid Build Coastguard Worker 104: //ext v12.16b, v8.16b, v9.16b, #4*2 310*e1eccf28SAndroid Build Coastguard Worker //ext v13.16b, v9.16b, v10.16b, #4*2 311*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v8.8h, v0.h[4] 312*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v9.4h, v0.h[4] 313*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v9.8h, v0.h[4] 314*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v10.4h, v0.h[4] 315*e1eccf28SAndroid Build Coastguard Worker 103: ext v12.16b, v8.16b, v9.16b, #5*2 316*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v9.16b, v10.16b, #3*2 317*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v0.h[3] 318*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v0.h[3] 319*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v0.h[3] 320*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v0.h[3] 321*e1eccf28SAndroid Build Coastguard Worker 102: ext v12.16b, v8.16b, v9.16b, #6*2 322*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v9.16b, v10.16b, #2*2 323*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v0.h[2] 324*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v0.h[2] 325*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v0.h[2] 326*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v0.h[2] 327*e1eccf28SAndroid Build Coastguard Worker 101: ext v12.16b, v8.16b, v9.16b, #7*2 328*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v9.16b, v10.16b, #1*2 329*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v0.h[1] 330*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v0.h[1] 331*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v0.h[1] 332*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v0.h[1] 333*e1eccf28SAndroid Build Coastguard Worker 334*e1eccf28SAndroid Build Coastguard Worker uqrshrn v14.4h, v14.4s, #16 335*e1eccf28SAndroid Build Coastguard Worker uqrshrn2 v14.8h, v15.4s, #16 336*e1eccf28SAndroid Build Coastguard Worker uqrshrn v15.8b, v14.8h, #FRACTION_BITS 337*e1eccf28SAndroid Build Coastguard Worker 338*e1eccf28SAndroid Build Coastguard Worker mov v8.16b, v9.16b 339*e1eccf28SAndroid Build Coastguard Worker mov v9.16b, v10.16b 340*e1eccf28SAndroid Build Coastguard Worker mov v10.16b, v11.16b 341*e1eccf28SAndroid Build Coastguard Worker.endm/*}}}*/ 342*e1eccf28SAndroid Build Coastguard Worker 343*e1eccf28SAndroid Build Coastguard Worker.macro hconv1_16/*{{{*/ 344*e1eccf28SAndroid Build Coastguard Worker.rodata 345*e1eccf28SAndroid Build Coastguard Worker 200: .hword -4 346*e1eccf28SAndroid Build Coastguard Worker .hword 101f-100f 347*e1eccf28SAndroid Build Coastguard Worker .hword 102f-100f 348*e1eccf28SAndroid Build Coastguard Worker .hword 103f-100f 349*e1eccf28SAndroid Build Coastguard Worker .hword 104f-100f 350*e1eccf28SAndroid Build Coastguard Worker .hword 105f-100f 351*e1eccf28SAndroid Build Coastguard Worker .hword 106f-100f 352*e1eccf28SAndroid Build Coastguard Worker .hword 107f-100f 353*e1eccf28SAndroid Build Coastguard Worker .hword 108f-100f 354*e1eccf28SAndroid Build Coastguard Worker .hword 109f-100f 355*e1eccf28SAndroid Build Coastguard Worker .hword 110f-100f 356*e1eccf28SAndroid Build Coastguard Worker .hword 111f-100f 357*e1eccf28SAndroid Build Coastguard Worker .hword 112f-100f 358*e1eccf28SAndroid Build Coastguard Worker .hword 113f-100f 359*e1eccf28SAndroid Build Coastguard Worker .hword 114f-100f 360*e1eccf28SAndroid Build Coastguard Worker .hword 115f-100f 361*e1eccf28SAndroid Build Coastguard Worker .hword 116f-100f 362*e1eccf28SAndroid Build Coastguard Worker .align 4 363*e1eccf28SAndroid Build Coastguard Worker 364*e1eccf28SAndroid Build Coastguard Worker.text 365*e1eccf28SAndroid Build Coastguard Worker umull v14.4s, v8.4h, v0.h[0] 366*e1eccf28SAndroid Build Coastguard Worker umull2 v15.4s, v8.8h, v0.h[0] 367*e1eccf28SAndroid Build Coastguard Worker 368*e1eccf28SAndroid Build Coastguard Worker adrp x16, 200b 369*e1eccf28SAndroid Build Coastguard Worker add x16, x16, :lo12:200b 370*e1eccf28SAndroid Build Coastguard Worker ldrsh x12, [x16, x5, LSL #1] 371*e1eccf28SAndroid Build Coastguard Worker adr x16, 100f 372*e1eccf28SAndroid Build Coastguard Worker add x12, x12, x16 373*e1eccf28SAndroid Build Coastguard Worker 100: br x12 374*e1eccf28SAndroid Build Coastguard Worker 116: //ext v12.16b, v6.16b, v7.16b, #0*2 375*e1eccf28SAndroid Build Coastguard Worker //ext v13.16b, v10.16b, v11.16b, #0*2 376*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v6.4h, v2.h[0] 377*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v6.8h, v2.h[0] 378*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v10.4h, v2.h[0] 379*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v10.8h, v2.h[0] 380*e1eccf28SAndroid Build Coastguard Worker 115: ext v12.16b, v6.16b, v7.16b, #1*2 381*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v9.16b, v10.16b, #7*2 382*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v1.h[7] 383*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v1.h[7] 384*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v1.h[7] 385*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v1.h[7] 386*e1eccf28SAndroid Build Coastguard Worker 114: ext v12.16b, v6.16b, v7.16b, #2*2 387*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v9.16b, v10.16b, #6*2 388*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v1.h[6] 389*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v1.h[6] 390*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v1.h[6] 391*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v1.h[6] 392*e1eccf28SAndroid Build Coastguard Worker 113: ext v12.16b, v6.16b, v7.16b, #3*2 393*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v9.16b, v10.16b, #5*2 394*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v1.h[5] 395*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v1.h[5] 396*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v1.h[5] 397*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v1.h[5] 398*e1eccf28SAndroid Build Coastguard Worker 112: //ext v12.16b, v6.16b, v7.16b, #4*2 399*e1eccf28SAndroid Build Coastguard Worker //ext v13.16b, v9.16b, v10.16b, #4*2 400*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v6.8h, v1.h[4] 401*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v7.4h, v1.h[4] 402*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v9.8h, v1.h[4] 403*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v10.4h, v1.h[4] 404*e1eccf28SAndroid Build Coastguard Worker 111: ext v12.16b, v6.16b, v7.16b, #5*2 405*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v9.16b, v10.16b, #3*2 406*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v1.h[3] 407*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v1.h[3] 408*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v1.h[3] 409*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v1.h[3] 410*e1eccf28SAndroid Build Coastguard Worker 110: ext v12.16b, v6.16b, v7.16b, #6*2 411*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v9.16b, v10.16b, #2*2 412*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v1.h[2] 413*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v1.h[2] 414*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v1.h[2] 415*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v1.h[2] 416*e1eccf28SAndroid Build Coastguard Worker 109: ext v12.16b, v6.16b, v7.16b, #7*2 417*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v9.16b, v10.16b, #1*2 418*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v1.h[1] 419*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v1.h[1] 420*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v1.h[1] 421*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v1.h[1] 422*e1eccf28SAndroid Build Coastguard Worker 108: //ext v12.16b, v7.16b, v8.16b, #0*2 423*e1eccf28SAndroid Build Coastguard Worker //ext v13.16b, v9.16b, v10.16b, #0*2 424*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v7.4h, v1.h[0] 425*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v7.8h, v1.h[0] 426*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v9.4h, v1.h[0] 427*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v9.8h, v1.h[0] 428*e1eccf28SAndroid Build Coastguard Worker 107: ext v12.16b, v7.16b, v8.16b, #1*2 429*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v8.16b, v9.16b, #7*2 430*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v0.h[7] 431*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v0.h[7] 432*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v0.h[7] 433*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v0.h[7] 434*e1eccf28SAndroid Build Coastguard Worker 106: ext v12.16b, v7.16b, v8.16b, #2*2 435*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v8.16b, v9.16b, #6*2 436*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v0.h[6] 437*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v0.h[6] 438*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v0.h[6] 439*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v0.h[6] 440*e1eccf28SAndroid Build Coastguard Worker 105: ext v12.16b, v7.16b, v8.16b, #3*2 441*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v8.16b, v9.16b, #5*2 442*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v0.h[5] 443*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v0.h[5] 444*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v0.h[5] 445*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v0.h[5] 446*e1eccf28SAndroid Build Coastguard Worker 104: //ext v12.16b, v7.16b, v8.16b, #4*2 447*e1eccf28SAndroid Build Coastguard Worker //ext v13.16b, v8.16b, v9.16b, #4*2 448*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v7.8h, v0.h[4] 449*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v8.4h, v0.h[4] 450*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v8.8h, v0.h[4] 451*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v9.4h, v0.h[4] 452*e1eccf28SAndroid Build Coastguard Worker 103: ext v12.16b, v7.16b, v8.16b, #5*2 453*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v8.16b, v9.16b, #3*2 454*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v0.h[3] 455*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v0.h[3] 456*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v0.h[3] 457*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v0.h[3] 458*e1eccf28SAndroid Build Coastguard Worker 102: ext v12.16b, v7.16b, v8.16b, #6*2 459*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v8.16b, v9.16b, #2*2 460*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v0.h[2] 461*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v0.h[2] 462*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v0.h[2] 463*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v0.h[2] 464*e1eccf28SAndroid Build Coastguard Worker 101: ext v12.16b, v7.16b, v8.16b, #7*2 465*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v8.16b, v9.16b, #1*2 466*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v0.h[1] 467*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v0.h[1] 468*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v0.h[1] 469*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v0.h[1] 470*e1eccf28SAndroid Build Coastguard Worker 471*e1eccf28SAndroid Build Coastguard Worker uqrshrn v14.4h, v14.4s, #16 472*e1eccf28SAndroid Build Coastguard Worker uqrshrn2 v14.8h, v15.4s, #16 473*e1eccf28SAndroid Build Coastguard Worker uqrshrn v15.8b, v14.8h, #FRACTION_BITS 474*e1eccf28SAndroid Build Coastguard Worker 475*e1eccf28SAndroid Build Coastguard Worker mov v6.16b, v7.16b 476*e1eccf28SAndroid Build Coastguard Worker mov v7.16b, v8.16b 477*e1eccf28SAndroid Build Coastguard Worker mov v8.16b, v9.16b 478*e1eccf28SAndroid Build Coastguard Worker mov v9.16b, v10.16b 479*e1eccf28SAndroid Build Coastguard Worker mov v10.16b, v11.16b 480*e1eccf28SAndroid Build Coastguard Worker.endm/*}}}*/ 481*e1eccf28SAndroid Build Coastguard Worker 482*e1eccf28SAndroid Build Coastguard Worker.macro hconv1_25/*{{{*/ 483*e1eccf28SAndroid Build Coastguard Worker.rodata 484*e1eccf28SAndroid Build Coastguard Worker 200: .hword -4 485*e1eccf28SAndroid Build Coastguard Worker .hword 101f-100f 486*e1eccf28SAndroid Build Coastguard Worker .hword 102f-100f 487*e1eccf28SAndroid Build Coastguard Worker .hword 103f-100f 488*e1eccf28SAndroid Build Coastguard Worker .hword 104f-100f 489*e1eccf28SAndroid Build Coastguard Worker .hword 105f-100f 490*e1eccf28SAndroid Build Coastguard Worker .hword 106f-100f 491*e1eccf28SAndroid Build Coastguard Worker .hword 107f-100f 492*e1eccf28SAndroid Build Coastguard Worker .hword 108f-100f 493*e1eccf28SAndroid Build Coastguard Worker .hword 109f-100f 494*e1eccf28SAndroid Build Coastguard Worker .hword 110f-100f 495*e1eccf28SAndroid Build Coastguard Worker .hword 111f-100f 496*e1eccf28SAndroid Build Coastguard Worker .hword 112f-100f 497*e1eccf28SAndroid Build Coastguard Worker .hword 113f-100f 498*e1eccf28SAndroid Build Coastguard Worker .hword 114f-100f 499*e1eccf28SAndroid Build Coastguard Worker .hword 115f-100f 500*e1eccf28SAndroid Build Coastguard Worker .hword 116f-100f 501*e1eccf28SAndroid Build Coastguard Worker .hword 117f-100f 502*e1eccf28SAndroid Build Coastguard Worker .hword 118f-100f 503*e1eccf28SAndroid Build Coastguard Worker .hword 119f-100f 504*e1eccf28SAndroid Build Coastguard Worker .hword 120f-100f 505*e1eccf28SAndroid Build Coastguard Worker .hword 121f-100f 506*e1eccf28SAndroid Build Coastguard Worker .hword 122f-100f 507*e1eccf28SAndroid Build Coastguard Worker .hword 123f-100f 508*e1eccf28SAndroid Build Coastguard Worker .hword 124f-100f 509*e1eccf28SAndroid Build Coastguard Worker .hword 125f-100f 510*e1eccf28SAndroid Build Coastguard Worker .align 4 511*e1eccf28SAndroid Build Coastguard Worker.text 512*e1eccf28SAndroid Build Coastguard Worker ext v12.16b, v6.16b, v7.16b, #7*2 513*e1eccf28SAndroid Build Coastguard Worker umull v14.4s, v12.4h, v0.h[0] 514*e1eccf28SAndroid Build Coastguard Worker umull2 v15.4s, v12.8h, v0.h[0] 515*e1eccf28SAndroid Build Coastguard Worker 516*e1eccf28SAndroid Build Coastguard Worker adrp x16, 200b 517*e1eccf28SAndroid Build Coastguard Worker add x16, x16, :lo12:200b 518*e1eccf28SAndroid Build Coastguard Worker ldrsh x12, [x16, x5, LSL #1] 519*e1eccf28SAndroid Build Coastguard Worker adr x16, 100f 520*e1eccf28SAndroid Build Coastguard Worker add x12, x12, x16 521*e1eccf28SAndroid Build Coastguard Worker 100: br x12 522*e1eccf28SAndroid Build Coastguard Worker 125: ext v12.16b, v31.16b, v4.16b, #6*2 523*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v10.16b, v11.16b, #0*2 524*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v3.h[1] 525*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v3.h[1] 526*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v3.h[1] 527*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v3.h[1] 528*e1eccf28SAndroid Build Coastguard Worker 124: ext v12.16b, v31.16b, v4.16b, #7*2 529*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v9.16b, v10.16b, #7*2 530*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v3.h[0] 531*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v3.h[0] 532*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v3.h[0] 533*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v3.h[0] 534*e1eccf28SAndroid Build Coastguard Worker 123: ext v12.16b, v4.16b, v5.16b, #0*2 535*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v9.16b, v10.16b, #6*2 536*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v2.h[7] 537*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v2.h[7] 538*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v2.h[7] 539*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v2.h[7] 540*e1eccf28SAndroid Build Coastguard Worker 122: ext v12.16b, v4.16b, v5.16b, #1*2 541*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v9.16b, v10.16b, #5*2 542*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v2.h[6] 543*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v2.h[6] 544*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v2.h[6] 545*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v2.h[6] 546*e1eccf28SAndroid Build Coastguard Worker 121: ext v12.16b, v4.16b, v5.16b, #2*2 547*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v9.16b, v10.16b, #4*2 548*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v2.h[5] 549*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v2.h[5] 550*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v2.h[5] 551*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v2.h[5] 552*e1eccf28SAndroid Build Coastguard Worker 120: ext v12.16b, v4.16b, v5.16b, #3*2 553*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v9.16b, v10.16b, #3*2 554*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v2.h[4] 555*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v2.h[4] 556*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v2.h[4] 557*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v2.h[4] 558*e1eccf28SAndroid Build Coastguard Worker 119: ext v12.16b, v4.16b, v5.16b, #4*2 559*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v9.16b, v10.16b, #2*2 560*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v2.h[3] 561*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v2.h[3] 562*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v2.h[3] 563*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v2.h[3] 564*e1eccf28SAndroid Build Coastguard Worker 118: ext v12.16b, v4.16b, v5.16b, #5*2 565*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v9.16b, v10.16b, #1*2 566*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v2.h[2] 567*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v2.h[2] 568*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v2.h[2] 569*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v2.h[2] 570*e1eccf28SAndroid Build Coastguard Worker 117: ext v12.16b, v4.16b, v5.16b, #6*2 571*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v9.16b, v10.16b, #0*2 572*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v2.h[1] 573*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v2.h[1] 574*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v2.h[1] 575*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v2.h[1] 576*e1eccf28SAndroid Build Coastguard Worker 116: ext v12.16b, v4.16b, v5.16b, #7*2 577*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v8.16b, v9.16b, #7*2 578*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v2.h[0] 579*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v2.h[0] 580*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v2.h[0] 581*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v2.h[0] 582*e1eccf28SAndroid Build Coastguard Worker 115: ext v12.16b, v5.16b, v6.16b, #0*2 583*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v8.16b, v9.16b, #6*2 584*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v1.h[7] 585*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v1.h[7] 586*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v1.h[7] 587*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v1.h[7] 588*e1eccf28SAndroid Build Coastguard Worker 114: ext v12.16b, v5.16b, v6.16b, #1*2 589*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v8.16b, v9.16b, #5*2 590*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v1.h[6] 591*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v1.h[6] 592*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v1.h[6] 593*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v1.h[6] 594*e1eccf28SAndroid Build Coastguard Worker 113: ext v12.16b, v5.16b, v6.16b, #2*2 595*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v8.16b, v9.16b, #4*2 596*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v1.h[5] 597*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v1.h[5] 598*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v1.h[5] 599*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v1.h[5] 600*e1eccf28SAndroid Build Coastguard Worker 112: ext v12.16b, v5.16b, v6.16b, #3*2 601*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v8.16b, v9.16b, #3*2 602*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v1.h[4] 603*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v1.h[4] 604*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v1.h[4] 605*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v1.h[4] 606*e1eccf28SAndroid Build Coastguard Worker 111: ext v12.16b, v5.16b, v6.16b, #4*2 607*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v8.16b, v9.16b, #2*2 608*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v1.h[3] 609*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v1.h[3] 610*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v1.h[3] 611*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v1.h[3] 612*e1eccf28SAndroid Build Coastguard Worker 110: ext v12.16b, v5.16b, v6.16b, #5*2 613*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v8.16b, v9.16b, #1*2 614*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v1.h[2] 615*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v1.h[2] 616*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v1.h[2] 617*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v1.h[2] 618*e1eccf28SAndroid Build Coastguard Worker 109: ext v12.16b, v5.16b, v6.16b, #6*2 619*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v8.16b, v9.16b, #0*2 620*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v1.h[1] 621*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v1.h[1] 622*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v1.h[1] 623*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v1.h[1] 624*e1eccf28SAndroid Build Coastguard Worker 108: ext v12.16b, v5.16b, v6.16b, #7*2 625*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v7.16b, v8.16b, #7*2 626*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v1.h[0] 627*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v1.h[0] 628*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v1.h[0] 629*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v1.h[0] 630*e1eccf28SAndroid Build Coastguard Worker 107: ext v12.16b, v6.16b, v7.16b, #0*2 631*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v7.16b, v8.16b, #6*2 632*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v0.h[7] 633*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v0.h[7] 634*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v0.h[7] 635*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v0.h[7] 636*e1eccf28SAndroid Build Coastguard Worker 106: ext v12.16b, v6.16b, v7.16b, #1*2 637*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v7.16b, v8.16b, #5*2 638*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v0.h[6] 639*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v0.h[6] 640*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v0.h[6] 641*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v0.h[6] 642*e1eccf28SAndroid Build Coastguard Worker 105: ext v12.16b, v6.16b, v7.16b, #2*2 643*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v7.16b, v8.16b, #4*2 644*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v0.h[5] 645*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v0.h[5] 646*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v0.h[5] 647*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v0.h[5] 648*e1eccf28SAndroid Build Coastguard Worker 104: ext v12.16b, v6.16b, v7.16b, #3*2 649*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v7.16b, v8.16b, #3*2 650*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v0.h[4] 651*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v0.h[4] 652*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v0.h[4] 653*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v0.h[4] 654*e1eccf28SAndroid Build Coastguard Worker 103: ext v12.16b, v6.16b, v7.16b, #4*2 655*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v7.16b, v8.16b, #2*2 656*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v0.h[3] 657*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v0.h[3] 658*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v0.h[3] 659*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v0.h[3] 660*e1eccf28SAndroid Build Coastguard Worker 102: ext v12.16b, v6.16b, v7.16b, #5*2 661*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v7.16b, v8.16b, #1*2 662*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v0.h[2] 663*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v0.h[2] 664*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v0.h[2] 665*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v0.h[2] 666*e1eccf28SAndroid Build Coastguard Worker 101: ext v12.16b, v6.16b, v7.16b, #6*2 667*e1eccf28SAndroid Build Coastguard Worker ext v13.16b, v7.16b, v8.16b, #0*2 668*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v0.h[1] 669*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v0.h[1] 670*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v13.4h, v0.h[1] 671*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v13.8h, v0.h[1] 672*e1eccf28SAndroid Build Coastguard Worker 673*e1eccf28SAndroid Build Coastguard Worker uqrshrn v14.4h, v14.4s, #16 674*e1eccf28SAndroid Build Coastguard Worker uqrshrn2 v14.8h, v15.4s, #16 675*e1eccf28SAndroid Build Coastguard Worker uqrshrn v15.8b, v14.8h, #FRACTION_BITS 676*e1eccf28SAndroid Build Coastguard Worker 677*e1eccf28SAndroid Build Coastguard Worker mov v31.16b, v4.16b 678*e1eccf28SAndroid Build Coastguard Worker mov v4.16b, v5.16b 679*e1eccf28SAndroid Build Coastguard Worker mov v5.16b, v6.16b 680*e1eccf28SAndroid Build Coastguard Worker mov v6.16b, v7.16b 681*e1eccf28SAndroid Build Coastguard Worker mov v7.16b, v8.16b 682*e1eccf28SAndroid Build Coastguard Worker mov v8.16b, v9.16b 683*e1eccf28SAndroid Build Coastguard Worker mov v9.16b, v10.16b 684*e1eccf28SAndroid Build Coastguard Worker mov v10.16b, v11.16b 685*e1eccf28SAndroid Build Coastguard Worker.endm/*}}}*/ 686*e1eccf28SAndroid Build Coastguard Worker 687*e1eccf28SAndroid Build Coastguard Worker#define TUNED_LIST4 6, 12, 20 688*e1eccf28SAndroid Build Coastguard Worker.macro hconv4_6/*{{{*/ 689*e1eccf28SAndroid Build Coastguard Worker.rodata 690*e1eccf28SAndroid Build Coastguard Worker 200: .hword -4 691*e1eccf28SAndroid Build Coastguard Worker .hword 101f-100f 692*e1eccf28SAndroid Build Coastguard Worker .hword 102f-100f 693*e1eccf28SAndroid Build Coastguard Worker .hword 103f-100f 694*e1eccf28SAndroid Build Coastguard Worker .hword 104f-100f 695*e1eccf28SAndroid Build Coastguard Worker .hword 105f-100f 696*e1eccf28SAndroid Build Coastguard Worker .hword 106f-100f 697*e1eccf28SAndroid Build Coastguard Worker .align 4 698*e1eccf28SAndroid Build Coastguard Worker.text 699*e1eccf28SAndroid Build Coastguard Worker umull v14.4s, v7.4h, v0.h[0] 700*e1eccf28SAndroid Build Coastguard Worker umull2 v15.4s, v7.8h, v0.h[0] 701*e1eccf28SAndroid Build Coastguard Worker 702*e1eccf28SAndroid Build Coastguard Worker adrp x16, 200b 703*e1eccf28SAndroid Build Coastguard Worker add x16, x16, :lo12:200b 704*e1eccf28SAndroid Build Coastguard Worker ldrsh x12, [x16, x5, LSL #1] 705*e1eccf28SAndroid Build Coastguard Worker adr x16, 100f 706*e1eccf28SAndroid Build Coastguard Worker add x12, x12, x16 707*e1eccf28SAndroid Build Coastguard Worker 100: br x12 708*e1eccf28SAndroid Build Coastguard Worker 106: umlal v14.4s, v4.4h, v0.h[6] 709*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v4.8h, v0.h[6] 710*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v10.4h, v0.h[6] 711*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v10.8h, v0.h[6] 712*e1eccf28SAndroid Build Coastguard Worker 105: umlal2 v14.4s, v4.8h, v0.h[5] 713*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v5.4h, v0.h[5] 714*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v9.8h, v0.h[5] 715*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v10.4h, v0.h[5] 716*e1eccf28SAndroid Build Coastguard Worker 104: umlal v14.4s, v5.4h, v0.h[4] 717*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v5.8h, v0.h[4] 718*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v9.4h, v0.h[4] 719*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v9.8h, v0.h[4] 720*e1eccf28SAndroid Build Coastguard Worker 103: umlal2 v14.4s, v5.8h, v0.h[3] 721*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v6.4h, v0.h[3] 722*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v8.8h, v0.h[3] 723*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v9.4h, v0.h[3] 724*e1eccf28SAndroid Build Coastguard Worker 102: umlal v14.4s, v6.4h, v0.h[2] 725*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v6.8h, v0.h[2] 726*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v8.4h, v0.h[2] 727*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v8.8h, v0.h[2] 728*e1eccf28SAndroid Build Coastguard Worker 101: umlal2 v14.4s, v6.8h, v0.h[1] 729*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v7.4h, v0.h[1] 730*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v7.8h, v0.h[1] 731*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v8.4h, v0.h[1] 732*e1eccf28SAndroid Build Coastguard Worker 733*e1eccf28SAndroid Build Coastguard Worker uqrshrn v14.4h, v14.4s, #16 734*e1eccf28SAndroid Build Coastguard Worker uqrshrn2 v14.8h, v15.4s, #16 735*e1eccf28SAndroid Build Coastguard Worker uqrshrn v15.8b, v14.8h, #FRACTION_BITS 736*e1eccf28SAndroid Build Coastguard Worker 737*e1eccf28SAndroid Build Coastguard Worker mov v4.16b, v5.16b 738*e1eccf28SAndroid Build Coastguard Worker mov v5.16b, v6.16b 739*e1eccf28SAndroid Build Coastguard Worker mov v6.16b, v7.16b 740*e1eccf28SAndroid Build Coastguard Worker mov v7.16b, v8.16b 741*e1eccf28SAndroid Build Coastguard Worker mov v8.16b, v9.16b 742*e1eccf28SAndroid Build Coastguard Worker mov v9.16b, v10.16b 743*e1eccf28SAndroid Build Coastguard Worker mov v10.16b, v11.16b 744*e1eccf28SAndroid Build Coastguard Worker.endm/*}}}*/ 745*e1eccf28SAndroid Build Coastguard Worker 746*e1eccf28SAndroid Build Coastguard Worker.macro hconv4_12/*{{{*/ 747*e1eccf28SAndroid Build Coastguard Worker.rodata 748*e1eccf28SAndroid Build Coastguard Worker 200: .hword -4 //Might need to remove these... 749*e1eccf28SAndroid Build Coastguard Worker .hword 101f-100f 750*e1eccf28SAndroid Build Coastguard Worker .hword 102f-100f 751*e1eccf28SAndroid Build Coastguard Worker .hword 103f-100f 752*e1eccf28SAndroid Build Coastguard Worker .hword 104f-100f 753*e1eccf28SAndroid Build Coastguard Worker .hword 105f-100f 754*e1eccf28SAndroid Build Coastguard Worker .hword 106f-100f 755*e1eccf28SAndroid Build Coastguard Worker .hword 107f-100f 756*e1eccf28SAndroid Build Coastguard Worker .hword 108f-100f 757*e1eccf28SAndroid Build Coastguard Worker .hword 109f-100f 758*e1eccf28SAndroid Build Coastguard Worker .hword 110f-100f 759*e1eccf28SAndroid Build Coastguard Worker .hword 111f-100f 760*e1eccf28SAndroid Build Coastguard Worker .hword 112f-100f 761*e1eccf28SAndroid Build Coastguard Worker .align 4 762*e1eccf28SAndroid Build Coastguard Worker.text 763*e1eccf28SAndroid Build Coastguard Worker umull v14.4s, v4.4h, v0.h[0] 764*e1eccf28SAndroid Build Coastguard Worker umull2 v15.4s, v4.8h, v0.h[0] 765*e1eccf28SAndroid Build Coastguard Worker 766*e1eccf28SAndroid Build Coastguard Worker adrp x16, 200b 767*e1eccf28SAndroid Build Coastguard Worker add x16, x16, :lo12:200b 768*e1eccf28SAndroid Build Coastguard Worker ldrsh x12, [x16, x5, LSL #1] 769*e1eccf28SAndroid Build Coastguard Worker adr x16, 100f 770*e1eccf28SAndroid Build Coastguard Worker add x12, x12, x16 771*e1eccf28SAndroid Build Coastguard Worker 100: br x12 772*e1eccf28SAndroid Build Coastguard Worker 112: umlal v14.4s, v26.4h, v1.h[4] 773*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v26.8h, v1.h[4] 774*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v10.4h, v1.h[4] 775*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v10.8h, v1.h[4] 776*e1eccf28SAndroid Build Coastguard Worker 111: umlal2 v14.4s, v26.8h, v1.h[3] 777*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v27.4h, v1.h[3] 778*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v9.8h, v1.h[3] 779*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v10.4h, v1.h[3] 780*e1eccf28SAndroid Build Coastguard Worker 110: umlal v14.4s, v27.4h, v1.h[2] 781*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v27.8h, v1.h[2] 782*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v9.4h, v1.h[2] 783*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v9.8h, v1.h[2] 784*e1eccf28SAndroid Build Coastguard Worker 109: umlal2 v14.4s, v27.8h, v1.h[1] 785*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v28.4h, v1.h[1] 786*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v8.8h, v1.h[1] 787*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v9.4h, v1.h[1] 788*e1eccf28SAndroid Build Coastguard Worker 108: umlal v14.4s, v28.4h, v1.h[0] 789*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v28.8h, v1.h[0] 790*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v8.4h, v1.h[0] 791*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v8.8h, v1.h[0] 792*e1eccf28SAndroid Build Coastguard Worker 107: umlal2 v14.4s, v28.8h, v0.h[7] 793*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v29.4h, v0.h[7] 794*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v7.8h, v0.h[7] 795*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v8.4h, v0.h[7] 796*e1eccf28SAndroid Build Coastguard Worker 106: umlal v14.4s, v29.4h, v0.h[6] 797*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v29.8h, v0.h[6] 798*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v7.4h, v0.h[6] 799*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v7.8h, v0.h[6] 800*e1eccf28SAndroid Build Coastguard Worker 105: umlal2 v14.4s, v29.8h, v0.h[5] 801*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v30.4h, v0.h[5] 802*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v6.8h, v0.h[5] 803*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v7.4h, v0.h[5] 804*e1eccf28SAndroid Build Coastguard Worker 104: umlal v14.4s, v30.4h, v0.h[4] 805*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v30.8h, v0.h[4] 806*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v6.4h, v0.h[4] 807*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v6.8h, v0.h[4] 808*e1eccf28SAndroid Build Coastguard Worker 103: umlal2 v14.4s, v30.8h, v0.h[3] 809*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v31.4h, v0.h[3] 810*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v5.8h, v0.h[3] 811*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v6.4h, v0.h[3] 812*e1eccf28SAndroid Build Coastguard Worker 102: umlal v14.4s, v31.4h, v0.h[2] 813*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v31.8h, v0.h[2] 814*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v5.4h, v0.h[2] 815*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v5.8h, v0.h[2] 816*e1eccf28SAndroid Build Coastguard Worker 101: umlal2 v14.4s, v31.8h, v0.h[1] 817*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v4.4h, v0.h[1] 818*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v4.8h, v0.h[1] 819*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v5.4h, v0.h[1] 820*e1eccf28SAndroid Build Coastguard Worker 821*e1eccf28SAndroid Build Coastguard Worker uqrshrn v14.4h, v14.4s, #16 822*e1eccf28SAndroid Build Coastguard Worker uqrshrn2 v14.8h, v15.4s, #16 823*e1eccf28SAndroid Build Coastguard Worker uqrshrn v15.8b, v14.8h, #FRACTION_BITS 824*e1eccf28SAndroid Build Coastguard Worker 825*e1eccf28SAndroid Build Coastguard Worker mov v26.16b, v27.16b 826*e1eccf28SAndroid Build Coastguard Worker mov v27.16b, v28.16b 827*e1eccf28SAndroid Build Coastguard Worker mov v28.16b, v29.16b 828*e1eccf28SAndroid Build Coastguard Worker mov v29.16b, v30.16b 829*e1eccf28SAndroid Build Coastguard Worker mov v30.16b, v31.16b 830*e1eccf28SAndroid Build Coastguard Worker mov v31.16b, v4.16b 831*e1eccf28SAndroid Build Coastguard Worker mov v4.16b, v5.16b 832*e1eccf28SAndroid Build Coastguard Worker mov v5.16b, v6.16b 833*e1eccf28SAndroid Build Coastguard Worker mov v6.16b, v7.16b 834*e1eccf28SAndroid Build Coastguard Worker mov v7.16b, v8.16b 835*e1eccf28SAndroid Build Coastguard Worker mov v8.16b, v9.16b 836*e1eccf28SAndroid Build Coastguard Worker mov v9.16b, v10.16b 837*e1eccf28SAndroid Build Coastguard Worker mov v10.16b, v11.16b 838*e1eccf28SAndroid Build Coastguard Worker.endm/*}}}*/ 839*e1eccf28SAndroid Build Coastguard Worker 840*e1eccf28SAndroid Build Coastguard Worker.macro hconv4_20/*{{{*/ 841*e1eccf28SAndroid Build Coastguard Worker.rodata 842*e1eccf28SAndroid Build Coastguard Worker 200: .hword -4 843*e1eccf28SAndroid Build Coastguard Worker .hword 101f-100f 844*e1eccf28SAndroid Build Coastguard Worker .hword 102f-100f 845*e1eccf28SAndroid Build Coastguard Worker .hword 103f-100f 846*e1eccf28SAndroid Build Coastguard Worker .hword 104f-100f 847*e1eccf28SAndroid Build Coastguard Worker .hword 105f-100f 848*e1eccf28SAndroid Build Coastguard Worker .hword 106f-100f 849*e1eccf28SAndroid Build Coastguard Worker .hword 107f-100f 850*e1eccf28SAndroid Build Coastguard Worker .hword 108f-100f 851*e1eccf28SAndroid Build Coastguard Worker .hword 109f-100f 852*e1eccf28SAndroid Build Coastguard Worker .hword 110f-100f 853*e1eccf28SAndroid Build Coastguard Worker .hword 111f-100f 854*e1eccf28SAndroid Build Coastguard Worker .hword 112f-100f 855*e1eccf28SAndroid Build Coastguard Worker .hword 113f-100f 856*e1eccf28SAndroid Build Coastguard Worker .hword 114f-100f 857*e1eccf28SAndroid Build Coastguard Worker .hword 115f-100f 858*e1eccf28SAndroid Build Coastguard Worker .hword 116f-100f 859*e1eccf28SAndroid Build Coastguard Worker .hword 117f-100f 860*e1eccf28SAndroid Build Coastguard Worker .hword 118f-100f 861*e1eccf28SAndroid Build Coastguard Worker .hword 119f-100f 862*e1eccf28SAndroid Build Coastguard Worker .hword 120f-100f 863*e1eccf28SAndroid Build Coastguard Worker .align 4 864*e1eccf28SAndroid Build Coastguard Worker.text 865*e1eccf28SAndroid Build Coastguard Worker umull v14.4s, v28.4h, v0.h[0] 866*e1eccf28SAndroid Build Coastguard Worker umull2 v15.4s, v28.8h, v0.h[0] 867*e1eccf28SAndroid Build Coastguard Worker 868*e1eccf28SAndroid Build Coastguard Worker adrp x16, 200b 869*e1eccf28SAndroid Build Coastguard Worker add x16, x16, :lo12:200b 870*e1eccf28SAndroid Build Coastguard Worker ldrsh x12, [x16, x5, LSL #1] 871*e1eccf28SAndroid Build Coastguard Worker adr x16, 100f 872*e1eccf28SAndroid Build Coastguard Worker add x12, x12, x16 873*e1eccf28SAndroid Build Coastguard Worker 100: br x12 874*e1eccf28SAndroid Build Coastguard Worker 120: umlal v14.4s, v18.4h, v2.h[4] 875*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v18.8h, v2.h[4] 876*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v10.4h, v2.h[4] 877*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v10.8h, v2.h[4] 878*e1eccf28SAndroid Build Coastguard Worker 119: umlal2 v14.4s, v18.8h, v2.h[3] 879*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v19.4h, v2.h[3] 880*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v9.8h, v2.h[3] 881*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v10.4h, v2.h[3] 882*e1eccf28SAndroid Build Coastguard Worker 118: umlal v14.4s, v19.4h, v2.h[2] 883*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v19.8h, v2.h[2] 884*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v9.4h, v2.h[2] 885*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v9.8h, v2.h[2] 886*e1eccf28SAndroid Build Coastguard Worker 117: umlal2 v14.4s, v19.8h, v2.h[1] 887*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v20.4h, v2.h[1] 888*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v8.8h, v2.h[1] 889*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v9.4h, v2.h[1] 890*e1eccf28SAndroid Build Coastguard Worker 116: umlal v14.4s, v20.4h, v2.h[0] 891*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v20.8h, v2.h[0] 892*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v8.4h, v2.h[0] 893*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v8.8h, v2.h[0] 894*e1eccf28SAndroid Build Coastguard Worker 115: umlal2 v14.4s, v20.8h, v1.h[7] 895*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v21.4h, v1.h[7] 896*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v7.8h, v1.h[7] 897*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v8.4h, v1.h[7] 898*e1eccf28SAndroid Build Coastguard Worker 114: umlal v14.4s, v21.4h, v1.h[6] 899*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v21.8h, v1.h[6] 900*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v7.4h, v1.h[6] 901*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v7.8h, v1.h[6] 902*e1eccf28SAndroid Build Coastguard Worker 113: umlal2 v14.4s, v21.8h, v1.h[5] 903*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v22.4h, v1.h[5] 904*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v6.8h, v1.h[5] 905*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v7.4h, v1.h[5] 906*e1eccf28SAndroid Build Coastguard Worker 112: umlal v14.4s, v22.4h, v1.h[4] 907*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v22.8h, v1.h[4] 908*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v6.4h, v1.h[4] 909*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v6.8h, v1.h[4] 910*e1eccf28SAndroid Build Coastguard Worker 111: umlal2 v14.4s, v22.8h, v1.h[3] 911*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v23.4h, v1.h[3] 912*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v5.8h, v1.h[3] 913*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v6.4h, v1.h[3] 914*e1eccf28SAndroid Build Coastguard Worker 110: umlal v14.4s, v23.4h, v1.h[2] 915*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v23.8h, v1.h[2] 916*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v5.4h, v1.h[2] 917*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v5.8h, v1.h[2] 918*e1eccf28SAndroid Build Coastguard Worker 109: umlal2 v14.4s, v23.8h, v1.h[1] 919*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v24.4h, v1.h[1] 920*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v4.8h, v1.h[1] 921*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v5.4h, v1.h[1] 922*e1eccf28SAndroid Build Coastguard Worker 108: umlal v14.4s, v24.4h, v1.h[0] 923*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v24.8h, v1.h[0] 924*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v4.4h, v1.h[0] 925*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v4.8h, v1.h[0] 926*e1eccf28SAndroid Build Coastguard Worker 107: umlal2 v14.4s, v24.8h, v0.h[7] 927*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v25.4h, v0.h[7] 928*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v31.8h, v0.h[7] 929*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v4.4h, v0.h[7] 930*e1eccf28SAndroid Build Coastguard Worker 106: umlal v14.4s, v25.4h, v0.h[6] 931*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v25.8h, v0.h[6] 932*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v31.4h, v0.h[6] 933*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v31.8h, v0.h[6] 934*e1eccf28SAndroid Build Coastguard Worker 105: umlal2 v14.4s, v25.8h, v0.h[5] 935*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v26.4h, v0.h[5] 936*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v30.8h, v0.h[5] 937*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v31.4h, v0.h[5] 938*e1eccf28SAndroid Build Coastguard Worker 104: umlal v14.4s, v26.4h, v0.h[4] 939*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v26.8h, v0.h[4] 940*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v30.4h, v0.h[4] 941*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v30.8h, v0.h[4] 942*e1eccf28SAndroid Build Coastguard Worker 103: umlal2 v14.4s, v26.8h, v0.h[3] 943*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v27.4h, v0.h[3] 944*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v29.8h, v0.h[3] 945*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v30.4h, v0.h[3] 946*e1eccf28SAndroid Build Coastguard Worker 102: umlal v14.4s, v27.4h, v0.h[2] 947*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v27.8h, v0.h[2] 948*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v29.4h, v0.h[2] 949*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v29.8h, v0.h[2] 950*e1eccf28SAndroid Build Coastguard Worker 101: umlal2 v14.4s, v27.8h, v0.h[1] 951*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v28.4h, v0.h[1] 952*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v28.8h, v0.h[1] 953*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v29.4h, v0.h[1] 954*e1eccf28SAndroid Build Coastguard Worker 955*e1eccf28SAndroid Build Coastguard Worker uqrshrn v14.4h, v14.4s, #16 956*e1eccf28SAndroid Build Coastguard Worker uqrshrn2 v14.8h, v15.4s, #16 957*e1eccf28SAndroid Build Coastguard Worker uqrshrn v15.8b, v14.8h, #FRACTION_BITS 958*e1eccf28SAndroid Build Coastguard Worker 959*e1eccf28SAndroid Build Coastguard Worker mov v18.16b, v19.16b 960*e1eccf28SAndroid Build Coastguard Worker mov v19.16b, v20.16b 961*e1eccf28SAndroid Build Coastguard Worker mov v20.16b, v21.16b 962*e1eccf28SAndroid Build Coastguard Worker mov v21.16b, v22.16b 963*e1eccf28SAndroid Build Coastguard Worker mov v22.16b, v23.16b 964*e1eccf28SAndroid Build Coastguard Worker mov v23.16b, v24.16b 965*e1eccf28SAndroid Build Coastguard Worker mov v24.16b, v25.16b 966*e1eccf28SAndroid Build Coastguard Worker mov v25.16b, v26.16b 967*e1eccf28SAndroid Build Coastguard Worker mov v26.16b, v27.16b 968*e1eccf28SAndroid Build Coastguard Worker mov v27.16b, v28.16b 969*e1eccf28SAndroid Build Coastguard Worker mov v28.16b, v29.16b 970*e1eccf28SAndroid Build Coastguard Worker mov v29.16b, v30.16b 971*e1eccf28SAndroid Build Coastguard Worker mov v30.16b, v31.16b 972*e1eccf28SAndroid Build Coastguard Worker mov v31.16b, v4.16b 973*e1eccf28SAndroid Build Coastguard Worker mov v4.16b, v5.16b 974*e1eccf28SAndroid Build Coastguard Worker mov v5.16b, v6.16b 975*e1eccf28SAndroid Build Coastguard Worker mov v6.16b, v7.16b 976*e1eccf28SAndroid Build Coastguard Worker mov v7.16b, v8.16b 977*e1eccf28SAndroid Build Coastguard Worker mov v8.16b, v9.16b 978*e1eccf28SAndroid Build Coastguard Worker mov v9.16b, v10.16b 979*e1eccf28SAndroid Build Coastguard Worker mov v10.16b, v11.16b 980*e1eccf28SAndroid Build Coastguard Worker.endm/*}}}*/ 981*e1eccf28SAndroid Build Coastguard Worker 982*e1eccf28SAndroid Build Coastguard Worker.macro hconv4_25/*{{{*/ 983*e1eccf28SAndroid Build Coastguard Worker.rodata 984*e1eccf28SAndroid Build Coastguard Worker 200: .hword -4 985*e1eccf28SAndroid Build Coastguard Worker .hword 101f-100f 986*e1eccf28SAndroid Build Coastguard Worker .hword 102f-100f 987*e1eccf28SAndroid Build Coastguard Worker .hword 103f-100f 988*e1eccf28SAndroid Build Coastguard Worker .hword 104f-100f 989*e1eccf28SAndroid Build Coastguard Worker .hword 105f-100f 990*e1eccf28SAndroid Build Coastguard Worker .hword 106f-100f 991*e1eccf28SAndroid Build Coastguard Worker .hword 107f-100f 992*e1eccf28SAndroid Build Coastguard Worker .hword 108f-100f 993*e1eccf28SAndroid Build Coastguard Worker .hword 109f-100f 994*e1eccf28SAndroid Build Coastguard Worker .hword 110f-100f 995*e1eccf28SAndroid Build Coastguard Worker .hword 111f-100f 996*e1eccf28SAndroid Build Coastguard Worker .hword 112f-100f 997*e1eccf28SAndroid Build Coastguard Worker .hword 113f-100f 998*e1eccf28SAndroid Build Coastguard Worker .hword 114f-100f 999*e1eccf28SAndroid Build Coastguard Worker .hword 115f-100f 1000*e1eccf28SAndroid Build Coastguard Worker .hword 116f-100f 1001*e1eccf28SAndroid Build Coastguard Worker .hword 117f-100f 1002*e1eccf28SAndroid Build Coastguard Worker .hword 118f-100f 1003*e1eccf28SAndroid Build Coastguard Worker .hword 119f-100f 1004*e1eccf28SAndroid Build Coastguard Worker .hword 120f-100f 1005*e1eccf28SAndroid Build Coastguard Worker .hword 121f-100f 1006*e1eccf28SAndroid Build Coastguard Worker .hword 122f-100f 1007*e1eccf28SAndroid Build Coastguard Worker .hword 123f-100f 1008*e1eccf28SAndroid Build Coastguard Worker .hword 124f-100f 1009*e1eccf28SAndroid Build Coastguard Worker .hword 125f-100f 1010*e1eccf28SAndroid Build Coastguard Worker .align 4 1011*e1eccf28SAndroid Build Coastguard Worker.text 1012*e1eccf28SAndroid Build Coastguard Worker umull2 v14.4s, v25.8h, v0.h[0] 1013*e1eccf28SAndroid Build Coastguard Worker umull v15.4s, v26.4h, v0.h[0] 1014*e1eccf28SAndroid Build Coastguard Worker 1015*e1eccf28SAndroid Build Coastguard Worker adrp x16, 200b 1016*e1eccf28SAndroid Build Coastguard Worker add x16, x16, :lo12:200b 1017*e1eccf28SAndroid Build Coastguard Worker ldrsh x12, [x16, x5, LSL #1] 1018*e1eccf28SAndroid Build Coastguard Worker adr x16, 100f 1019*e1eccf28SAndroid Build Coastguard Worker add x12, x12, x16 1020*e1eccf28SAndroid Build Coastguard Worker 100: br x12 1021*e1eccf28SAndroid Build Coastguard Worker 125: ld1 {v12.8h}, [x9] 1022*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v3.h[1] 1023*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v3.h[1] 1024*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v10.4h, v3.h[1] 1025*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v10.8h, v3.h[1] 1026*e1eccf28SAndroid Build Coastguard Worker 124: add x12, x9, #0x08 1027*e1eccf28SAndroid Build Coastguard Worker bic x12, x12, #0x40 1028*e1eccf28SAndroid Build Coastguard Worker ld1 {v12.4h}, [x12], #8 1029*e1eccf28SAndroid Build Coastguard Worker bic x12, x12, #0x40 1030*e1eccf28SAndroid Build Coastguard Worker ld1 {v13.4h}, [x12] 1031*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v3.h[0] 1032*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v13.4h, v3.h[0] 1033*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v9.8h, v3.h[0] 1034*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v10.4h, v3.h[0] 1035*e1eccf28SAndroid Build Coastguard Worker 123: add x12, x9, #0x10 1036*e1eccf28SAndroid Build Coastguard Worker bic x12, x12, #0x40 1037*e1eccf28SAndroid Build Coastguard Worker ld1 {v12.8h}, [x12] 1038*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v2.h[7] 1039*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v2.h[7] 1040*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v9.4h, v2.h[7] 1041*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v9.8h, v2.h[7] 1042*e1eccf28SAndroid Build Coastguard Worker 122: add x12, x9, #0x18 1043*e1eccf28SAndroid Build Coastguard Worker bic x12, x12, #0x40 1044*e1eccf28SAndroid Build Coastguard Worker ld1 {v12.4h}, [x12], #8 1045*e1eccf28SAndroid Build Coastguard Worker bic x12, x12, #0x40 1046*e1eccf28SAndroid Build Coastguard Worker ld1 {v13.4h}, [x12] 1047*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v2.h[6] 1048*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v13.4h, v2.h[6] 1049*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v8.8h, v2.h[6] 1050*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v9.4h, v2.h[6] 1051*e1eccf28SAndroid Build Coastguard Worker 121: add x12, x9, #0x20 1052*e1eccf28SAndroid Build Coastguard Worker bic x12, x12, #0x40 1053*e1eccf28SAndroid Build Coastguard Worker ld1 {v12.8h}, [x12] 1054*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v2.h[5] 1055*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v2.h[5] 1056*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v8.4h, v2.h[5] 1057*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v8.8h, v2.h[5] 1058*e1eccf28SAndroid Build Coastguard Worker 120: add x12, x9, #0x28 1059*e1eccf28SAndroid Build Coastguard Worker bic x12, x12, #0x40 1060*e1eccf28SAndroid Build Coastguard Worker ld1 {v12.4h}, [x12], #8 1061*e1eccf28SAndroid Build Coastguard Worker bic x12, x12, #0x40 1062*e1eccf28SAndroid Build Coastguard Worker ld1 {v13.4h}, [x12] 1063*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v2.h[4] 1064*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v13.4h, v2.h[4] 1065*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v7.8h, v2.h[4] 1066*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v8.4h, v2.h[4] 1067*e1eccf28SAndroid Build Coastguard Worker 119: add x12, x9, #0x30 1068*e1eccf28SAndroid Build Coastguard Worker bic x12, x12, #0x40 1069*e1eccf28SAndroid Build Coastguard Worker ld1 {v12.8h}, [x12] 1070*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v2.h[3] 1071*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v12.8h, v2.h[3] 1072*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v7.4h, v2.h[3] 1073*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v7.8h, v2.h[3] 1074*e1eccf28SAndroid Build Coastguard Worker 118: add x12, x9, #0x38 1075*e1eccf28SAndroid Build Coastguard Worker bic x12, x12, #0x40 1076*e1eccf28SAndroid Build Coastguard Worker ld1 {v12.4h}, [x12] 1077*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v12.4h, v2.h[2] 1078*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v17.4h, v2.h[2] 1079*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v6.8h, v2.h[2] 1080*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v7.4h, v2.h[2] 1081*e1eccf28SAndroid Build Coastguard Worker 117: umlal v14.4s, v17.4h, v2.h[1] 1082*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v17.8h, v2.h[1] 1083*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v6.4h, v2.h[1] 1084*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v6.8h, v2.h[1] 1085*e1eccf28SAndroid Build Coastguard Worker 116: umlal2 v14.4s, v17.8h, v2.h[0] 1086*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v18.4h, v2.h[0] 1087*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v5.8h, v2.h[0] 1088*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v6.4h, v2.h[0] 1089*e1eccf28SAndroid Build Coastguard Worker 115: umlal v14.4s, v18.4h, v1.h[7] 1090*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v18.8h, v1.h[7] 1091*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v5.4h, v1.h[7] 1092*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v5.8h, v1.h[7] 1093*e1eccf28SAndroid Build Coastguard Worker 114: umlal2 v14.4s, v18.8h, v1.h[6] 1094*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v19.4h, v1.h[6] 1095*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v4.8h, v1.h[6] 1096*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v5.4h, v1.h[6] 1097*e1eccf28SAndroid Build Coastguard Worker 113: umlal v14.4s, v19.4h, v1.h[5] 1098*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v19.8h, v1.h[5] 1099*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v4.4h, v1.h[5] 1100*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v4.8h, v1.h[5] 1101*e1eccf28SAndroid Build Coastguard Worker 112: umlal2 v14.4s, v19.8h, v1.h[4] 1102*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v20.4h, v1.h[4] 1103*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v31.8h, v1.h[4] 1104*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v4.4h, v1.h[4] 1105*e1eccf28SAndroid Build Coastguard Worker 111: umlal v14.4s, v20.4h, v1.h[3] 1106*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v20.8h, v1.h[3] 1107*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v31.4h, v1.h[3] 1108*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v31.8h, v1.h[3] 1109*e1eccf28SAndroid Build Coastguard Worker 110: umlal2 v14.4s, v20.8h, v1.h[2] 1110*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v21.4h, v1.h[2] 1111*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v30.8h, v1.h[2] 1112*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v31.4h, v1.h[2] 1113*e1eccf28SAndroid Build Coastguard Worker 109: umlal v14.4s, v21.4h, v1.h[1] 1114*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v21.8h, v1.h[1] 1115*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v30.4h, v1.h[1] 1116*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v30.8h, v1.h[1] 1117*e1eccf28SAndroid Build Coastguard Worker 108: umlal2 v14.4s, v21.8h, v1.h[0] 1118*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v22.4h, v1.h[0] 1119*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v29.8h, v1.h[0] 1120*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v30.4h, v1.h[0] 1121*e1eccf28SAndroid Build Coastguard Worker 107: umlal v14.4s, v22.4h, v0.h[7] 1122*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v22.8h, v0.h[7] 1123*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v29.4h, v0.h[7] 1124*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v29.8h, v0.h[7] 1125*e1eccf28SAndroid Build Coastguard Worker 106: umlal2 v14.4s, v22.8h, v0.h[6] 1126*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v23.4h, v0.h[6] 1127*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v28.8h, v0.h[6] 1128*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v29.4h, v0.h[6] 1129*e1eccf28SAndroid Build Coastguard Worker 105: umlal v14.4s, v23.4h, v0.h[5] 1130*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v23.8h, v0.h[5] 1131*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v28.4h, v0.h[5] 1132*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v28.8h, v0.h[5] 1133*e1eccf28SAndroid Build Coastguard Worker 104: umlal2 v14.4s, v23.8h, v0.h[4] 1134*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v24.4h, v0.h[4] 1135*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v27.8h, v0.h[4] 1136*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v28.4h, v0.h[4] 1137*e1eccf28SAndroid Build Coastguard Worker 103: umlal v14.4s, v24.4h, v0.h[3] 1138*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v24.8h, v0.h[3] 1139*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v27.4h, v0.h[3] 1140*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v27.8h, v0.h[3] 1141*e1eccf28SAndroid Build Coastguard Worker 102: umlal2 v14.4s, v24.8h, v0.h[2] 1142*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v25.4h, v0.h[2] 1143*e1eccf28SAndroid Build Coastguard Worker umlal2 v14.4s, v26.8h, v0.h[2] 1144*e1eccf28SAndroid Build Coastguard Worker umlal v15.4s, v27.4h, v0.h[2] 1145*e1eccf28SAndroid Build Coastguard Worker 101: umlal v14.4s, v25.4h, v0.h[1] 1146*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v25.8h, v0.h[1] 1147*e1eccf28SAndroid Build Coastguard Worker umlal v14.4s, v26.4h, v0.h[1] 1148*e1eccf28SAndroid Build Coastguard Worker umlal2 v15.4s, v26.8h, v0.h[1] 1149*e1eccf28SAndroid Build Coastguard Worker 1150*e1eccf28SAndroid Build Coastguard Worker uqrshrn v14.4h, v14.4s, #16 1151*e1eccf28SAndroid Build Coastguard Worker uqrshrn2 v14.8h, v15.4s, #16 1152*e1eccf28SAndroid Build Coastguard Worker uqrshrn v15.8b, v14.8h, #FRACTION_BITS 1153*e1eccf28SAndroid Build Coastguard Worker 1154*e1eccf28SAndroid Build Coastguard Worker st1 {v17.16b}, [x9], #16 1155*e1eccf28SAndroid Build Coastguard Worker bic x9, x9, #0x40 1156*e1eccf28SAndroid Build Coastguard Worker mov v17.16b, v18.16b 1157*e1eccf28SAndroid Build Coastguard Worker mov v18.16b, v19.16b 1158*e1eccf28SAndroid Build Coastguard Worker mov v19.16b, v20.16b 1159*e1eccf28SAndroid Build Coastguard Worker mov v20.16b, v21.16b 1160*e1eccf28SAndroid Build Coastguard Worker mov v21.16b, v22.16b 1161*e1eccf28SAndroid Build Coastguard Worker mov v22.16b, v23.16b 1162*e1eccf28SAndroid Build Coastguard Worker mov v23.16b, v24.16b 1163*e1eccf28SAndroid Build Coastguard Worker mov v24.16b, v25.16b 1164*e1eccf28SAndroid Build Coastguard Worker mov v25.16b, v26.16b 1165*e1eccf28SAndroid Build Coastguard Worker mov v26.16b, v27.16b 1166*e1eccf28SAndroid Build Coastguard Worker mov v27.16b, v28.16b 1167*e1eccf28SAndroid Build Coastguard Worker mov v28.16b, v29.16b 1168*e1eccf28SAndroid Build Coastguard Worker mov v29.16b, v30.16b 1169*e1eccf28SAndroid Build Coastguard Worker mov v30.16b, v31.16b 1170*e1eccf28SAndroid Build Coastguard Worker mov v31.16b, v4.16b 1171*e1eccf28SAndroid Build Coastguard Worker mov v4.16b, v5.16b 1172*e1eccf28SAndroid Build Coastguard Worker mov v5.16b, v6.16b 1173*e1eccf28SAndroid Build Coastguard Worker mov v6.16b, v7.16b 1174*e1eccf28SAndroid Build Coastguard Worker mov v7.16b, v8.16b 1175*e1eccf28SAndroid Build Coastguard Worker mov v8.16b, v9.16b 1176*e1eccf28SAndroid Build Coastguard Worker mov v9.16b, v10.16b 1177*e1eccf28SAndroid Build Coastguard Worker mov v10.16b, v11.16b 1178*e1eccf28SAndroid Build Coastguard Worker.endm/*}}}*/ 1179*e1eccf28SAndroid Build Coastguard Worker 1180*e1eccf28SAndroid Build Coastguard Worker/* Dedicated function wrapper for the fetch macro, for the cases where 1181*e1eccf28SAndroid Build Coastguard Worker * performance isn't that important, to keep code size down. 1182*e1eccf28SAndroid Build Coastguard Worker */ 1183*e1eccf28SAndroid Build Coastguard WorkerPRIVATE(fetch_generic_asm) 1184*e1eccf28SAndroid Build Coastguard Worker stp x10, x11, [sp, #-16]! 1185*e1eccf28SAndroid Build Coastguard Worker fetch 1186*e1eccf28SAndroid Build Coastguard Worker ldp x10, x11, [sp], #16 1187*e1eccf28SAndroid Build Coastguard Worker ret 1188*e1eccf28SAndroid Build Coastguard WorkerEND(fetch_generic_asm) 1189*e1eccf28SAndroid Build Coastguard Worker 1190*e1eccf28SAndroid Build Coastguard Worker 1191*e1eccf28SAndroid Build Coastguard Worker/* Fetch the next (16 - (x10 & 15)) columns of data, avoiding reading memory 1192*e1eccf28SAndroid Build Coastguard Worker * beyond that limit, and filling the rest of the vector with the last legal 1193*e1eccf28SAndroid Build Coastguard Worker * pixel. 1194*e1eccf28SAndroid Build Coastguard Worker * Result is in v10 and v11. v8 and v9 are filled with the first legal pixel. 1195*e1eccf28SAndroid Build Coastguard Worker * Note: This function can read beyond the right edge of input if the image is 1196*e1eccf28SAndroid Build Coastguard Worker * narrower than 16 bytes. 1197*e1eccf28SAndroid Build Coastguard Worker */ 1198*e1eccf28SAndroid Build Coastguard WorkerPRIVATE(fetch_clampleft1) 1199*e1eccf28SAndroid Build Coastguard Worker stp x29, x30, [sp, #-16]! 1200*e1eccf28SAndroid Build Coastguard Worker bl fetch_generic_asm 1201*e1eccf28SAndroid Build Coastguard Worker dup v8.8h, v10.h[0] 1202*e1eccf28SAndroid Build Coastguard Worker dup v9.8h, v10.h[0] 1203*e1eccf28SAndroid Build Coastguard Worker ands x12, x10, #15 1204*e1eccf28SAndroid Build Coastguard Worker beq 1f 1205*e1eccf28SAndroid Build Coastguard Worker sub x1, x1, x12 1206*e1eccf28SAndroid Build Coastguard Worker sub x15, x15, x12 1207*e1eccf28SAndroid Build Coastguard Worker sub x19, x19, x12 1208*e1eccf28SAndroid Build Coastguard Worker sub x10, x10, x12 1209*e1eccf28SAndroid Build Coastguard Worker sub x12, sp, x12, LSL #1 1210*e1eccf28SAndroid Build Coastguard Worker sub sp, sp, #64 1211*e1eccf28SAndroid Build Coastguard Worker sub x12, x12, #32 1212*e1eccf28SAndroid Build Coastguard Worker st1 {v8.8h, v9.8h, v10.8h,v11.8h}, [sp] 1213*e1eccf28SAndroid Build Coastguard Worker ld1 {v10.8h,v11.8h}, [x12] 1214*e1eccf28SAndroid Build Coastguard Worker add sp, sp, #64 1215*e1eccf28SAndroid Build Coastguard Worker1: ldp x29, x30, [sp], #16 1216*e1eccf28SAndroid Build Coastguard Worker ret 1217*e1eccf28SAndroid Build Coastguard WorkerEND(fetch_clampleft1) 1218*e1eccf28SAndroid Build Coastguard Worker 1219*e1eccf28SAndroid Build Coastguard WorkerPRIVATE(fetch_clampleft4) 1220*e1eccf28SAndroid Build Coastguard Worker stp x29, x30, [sp, #-16]! 1221*e1eccf28SAndroid Build Coastguard Worker bl fetch_generic_asm 1222*e1eccf28SAndroid Build Coastguard Worker dup v8.2d, v10.d[0] 1223*e1eccf28SAndroid Build Coastguard Worker dup v9.2d, v10.d[0] 1224*e1eccf28SAndroid Build Coastguard Worker ands x12, x10, #15 1225*e1eccf28SAndroid Build Coastguard Worker beq 1f 1226*e1eccf28SAndroid Build Coastguard Worker sub x1, x1, x12 1227*e1eccf28SAndroid Build Coastguard Worker sub x15, x15, x12 1228*e1eccf28SAndroid Build Coastguard Worker sub x19, x19, x12 1229*e1eccf28SAndroid Build Coastguard Worker sub x10, x10, x12 1230*e1eccf28SAndroid Build Coastguard Worker sub x12, sp, x12, LSL #1 1231*e1eccf28SAndroid Build Coastguard Worker sub sp, sp, #64 1232*e1eccf28SAndroid Build Coastguard Worker sub x12, x12, #32 1233*e1eccf28SAndroid Build Coastguard Worker st1 {v8.8h, v9.8h, v10.8h,v11.8h}, [sp] 1234*e1eccf28SAndroid Build Coastguard Worker ld1 {v10.8h,v11.8h}, [x12] 1235*e1eccf28SAndroid Build Coastguard Worker add sp, sp, #64 1236*e1eccf28SAndroid Build Coastguard Worker1: ldp x29, x30, [sp], #16 1237*e1eccf28SAndroid Build Coastguard Worker ret 1238*e1eccf28SAndroid Build Coastguard WorkerEND(fetch_clampleft4) 1239*e1eccf28SAndroid Build Coastguard Worker 1240*e1eccf28SAndroid Build Coastguard Worker/* Fetch only the next (x11 & 15) (where 0 means 16) columns of data, avoiding 1241*e1eccf28SAndroid Build Coastguard Worker * reading memory beyond that limit, and filling the rest of the vector with 1242*e1eccf28SAndroid Build Coastguard Worker * the last legal pixel. 1243*e1eccf28SAndroid Build Coastguard Worker * Result is in v10 and v11. v12 and v13 are filled with the last legal pixel. 1244*e1eccf28SAndroid Build Coastguard Worker * Note: This function can read beyond the left edge of input if the image is 1245*e1eccf28SAndroid Build Coastguard Worker * narrower than 16 bytes. 1246*e1eccf28SAndroid Build Coastguard Worker */ 1247*e1eccf28SAndroid Build Coastguard WorkerPRIVATE(fetch_clampright1) 1248*e1eccf28SAndroid Build Coastguard Worker stp x29, x30, [sp, #-16]! 1249*e1eccf28SAndroid Build Coastguard Worker sub x12, xzr, x11 1250*e1eccf28SAndroid Build Coastguard Worker ands x12, x12, #15 1251*e1eccf28SAndroid Build Coastguard Worker beq 1f 1252*e1eccf28SAndroid Build Coastguard Worker sub x1, x1, x12 1253*e1eccf28SAndroid Build Coastguard Worker sub x15, x15, x12 1254*e1eccf28SAndroid Build Coastguard Worker sub x19, x19, x12 1255*e1eccf28SAndroid Build Coastguard Worker bl fetch_generic_asm 1256*e1eccf28SAndroid Build Coastguard Worker dup v12.8h, v11.h[7] 1257*e1eccf28SAndroid Build Coastguard Worker dup v13.8h, v11.h[7] 1258*e1eccf28SAndroid Build Coastguard Worker sub x12, xzr, x11 1259*e1eccf28SAndroid Build Coastguard Worker and x12, x12, #15 1260*e1eccf28SAndroid Build Coastguard Worker sub sp, sp, #64 1261*e1eccf28SAndroid Build Coastguard Worker add x12, sp, x12, LSL #1 1262*e1eccf28SAndroid Build Coastguard Worker st1 {v10.8h,v11.8h,v12.8h,v13.8h}, [sp] 1263*e1eccf28SAndroid Build Coastguard Worker ld1 {v10.8h,v11.8h}, [x12] 1264*e1eccf28SAndroid Build Coastguard Worker add sp, sp, #64 1265*e1eccf28SAndroid Build Coastguard Worker ldp x29, x30, [sp], #16 1266*e1eccf28SAndroid Build Coastguard Worker ret 1267*e1eccf28SAndroid Build Coastguard Worker1: bl fetch_generic_asm 1268*e1eccf28SAndroid Build Coastguard Worker dup v12.8h, v11.h[7] 1269*e1eccf28SAndroid Build Coastguard Worker dup v13.8h, v11.h[7] 1270*e1eccf28SAndroid Build Coastguard Worker ldp x29, x30, [sp], #16 1271*e1eccf28SAndroid Build Coastguard Worker ret 1272*e1eccf28SAndroid Build Coastguard WorkerEND(fetch_clampright1) 1273*e1eccf28SAndroid Build Coastguard Worker 1274*e1eccf28SAndroid Build Coastguard WorkerPRIVATE(fetch_clampright4) 1275*e1eccf28SAndroid Build Coastguard Worker stp x29, x30, [sp, #-16]! 1276*e1eccf28SAndroid Build Coastguard Worker sub x12, xzr, x11 1277*e1eccf28SAndroid Build Coastguard Worker ands x12, x12, #15 1278*e1eccf28SAndroid Build Coastguard Worker beq 1f 1279*e1eccf28SAndroid Build Coastguard Worker sub x1, x1, x12 1280*e1eccf28SAndroid Build Coastguard Worker sub x15, x15, x12 1281*e1eccf28SAndroid Build Coastguard Worker sub x19, x19, x12 1282*e1eccf28SAndroid Build Coastguard Worker bl fetch_generic_asm 1283*e1eccf28SAndroid Build Coastguard Worker dup v12.2d, v11.d[1] 1284*e1eccf28SAndroid Build Coastguard Worker dup v13.2d, v11.d[1] 1285*e1eccf28SAndroid Build Coastguard Worker sub x12, xzr, x11 1286*e1eccf28SAndroid Build Coastguard Worker and x12, x12, #15 1287*e1eccf28SAndroid Build Coastguard Worker sub sp, sp, #64 1288*e1eccf28SAndroid Build Coastguard Worker add x12, sp, x12, LSL #1 1289*e1eccf28SAndroid Build Coastguard Worker st1 {v10.8h,v11.8h,v12.8h,v13.8h}, [sp] 1290*e1eccf28SAndroid Build Coastguard Worker ld1 {v10.8h,v11.8h}, [x12] 1291*e1eccf28SAndroid Build Coastguard Worker add sp, sp, #64 1292*e1eccf28SAndroid Build Coastguard Worker ldp x29, x30, [sp], #16 1293*e1eccf28SAndroid Build Coastguard Worker ret 1294*e1eccf28SAndroid Build Coastguard Worker1: bl fetch_generic_asm 1295*e1eccf28SAndroid Build Coastguard Worker dup v12.2d, v11.d[1] 1296*e1eccf28SAndroid Build Coastguard Worker dup v13.2d, v11.d[1] 1297*e1eccf28SAndroid Build Coastguard Worker ldp x29, x30, [sp], #16 1298*e1eccf28SAndroid Build Coastguard Worker ret 1299*e1eccf28SAndroid Build Coastguard WorkerEND(fetch_clampright4) 1300*e1eccf28SAndroid Build Coastguard Worker 1301*e1eccf28SAndroid Build Coastguard Worker/* Given values in v10 and v11, and an index in x11, sweep the (x11 & 15)th 1302*e1eccf28SAndroid Build Coastguard Worker * value across to fill the rest of the register pair. Used for filling the 1303*e1eccf28SAndroid Build Coastguard Worker * right hand edge of the window when reading too close to the right hand edge 1304*e1eccf28SAndroid Build Coastguard Worker * of the image. 1305*e1eccf28SAndroid Build Coastguard Worker * Also returns a dup-ed copy of the last element in v12 for the tail-fill 1306*e1eccf28SAndroid Build Coastguard Worker * case (this happens incidentally in common path, but must be done 1307*e1eccf28SAndroid Build Coastguard Worker * deliberately in the fast-out path). 1308*e1eccf28SAndroid Build Coastguard Worker */ 1309*e1eccf28SAndroid Build Coastguard WorkerPRIVATE(prefill_sweepright1) 1310*e1eccf28SAndroid Build Coastguard Worker ands x12, x11, #15 1311*e1eccf28SAndroid Build Coastguard Worker beq 1f 1312*e1eccf28SAndroid Build Coastguard Worker sub x12, x12, #1 1313*e1eccf28SAndroid Build Coastguard Worker sub sp, sp, #64 1314*e1eccf28SAndroid Build Coastguard Worker st1 {v10.8h,v11.8h}, [sp] 1315*e1eccf28SAndroid Build Coastguard Worker add x12, sp, x12, LSL #1 1316*e1eccf28SAndroid Build Coastguard Worker ld1r {v12.8h}, [x12] 1317*e1eccf28SAndroid Build Coastguard Worker ld1r {v13.8h}, [x12] 1318*e1eccf28SAndroid Build Coastguard Worker st1 {v12.8h,v13.8h}, [x12] 1319*e1eccf28SAndroid Build Coastguard Worker ld1 {v10.8h,v11.8h}, [sp] 1320*e1eccf28SAndroid Build Coastguard Worker add sp, sp, #64 1321*e1eccf28SAndroid Build Coastguard Worker ret 1322*e1eccf28SAndroid Build Coastguard Worker1: dup v12.8h, v11.h[7] 1323*e1eccf28SAndroid Build Coastguard Worker dup v13.8h, v11.h[7] 1324*e1eccf28SAndroid Build Coastguard Worker ret 1325*e1eccf28SAndroid Build Coastguard WorkerEND(prefill_sweepright1) 1326*e1eccf28SAndroid Build Coastguard Worker 1327*e1eccf28SAndroid Build Coastguard WorkerPRIVATE(prefill_sweepright4) 1328*e1eccf28SAndroid Build Coastguard Worker ands x12, x11, #15 1329*e1eccf28SAndroid Build Coastguard Worker beq 1f 1330*e1eccf28SAndroid Build Coastguard Worker sub x12, x12, #4 1331*e1eccf28SAndroid Build Coastguard Worker sub sp, sp, #64 1332*e1eccf28SAndroid Build Coastguard Worker st1 {v10.8h,v11.8h}, [sp] 1333*e1eccf28SAndroid Build Coastguard Worker add x12, sp, x12, LSL #1 1334*e1eccf28SAndroid Build Coastguard Worker ld1r {v12.2d}, [x12] 1335*e1eccf28SAndroid Build Coastguard Worker st1 {v13.8h}, [x12] 1336*e1eccf28SAndroid Build Coastguard Worker ld1 {v10.8h,v11.8h}, [sp] 1337*e1eccf28SAndroid Build Coastguard Worker add sp, sp, #64 1338*e1eccf28SAndroid Build Coastguard Worker ret 1339*e1eccf28SAndroid Build Coastguard Worker1: dup v12.2d, v11.d[1] 1340*e1eccf28SAndroid Build Coastguard Worker dup v13.2d, v11.d[1] 1341*e1eccf28SAndroid Build Coastguard Worker ret 1342*e1eccf28SAndroid Build Coastguard WorkerEND(prefill_sweepright4) 1343*e1eccf28SAndroid Build Coastguard Worker 1344*e1eccf28SAndroid Build Coastguard Worker/* The main loop keeps a sliding window of data that has already been convolved 1345*e1eccf28SAndroid Build Coastguard Worker * in the vertical axis for the current line. This usually stays in the 1346*e1eccf28SAndroid Build Coastguard Worker * register file, but spills to memory for large windows. The first thing that 1347*e1eccf28SAndroid Build Coastguard Worker * needs to be done at start-up is to fill this window with image data, taking 1348*e1eccf28SAndroid Build Coastguard Worker * into account the padding needed if the left or right edges of the image fall 1349*e1eccf28SAndroid Build Coastguard Worker * within this window. 1350*e1eccf28SAndroid Build Coastguard Worker */ 1351*e1eccf28SAndroid Build Coastguard Worker 1352*e1eccf28SAndroid Build Coastguard Worker/* Because the window is in the register file writes to it cannot be indexed 1353*e1eccf28SAndroid Build Coastguard Worker * by another register. Consequently the fill loops are unrolled to address 1354*e1eccf28SAndroid Build Coastguard Worker * the registers directly. This macro distinguishes between writes to the 1355*e1eccf28SAndroid Build Coastguard Worker * register file and writes to the spill buffer (indicated by a destination 1356*e1eccf28SAndroid Build Coastguard Worker * register named xx). 1357*e1eccf28SAndroid Build Coastguard Worker */ 1358*e1eccf28SAndroid Build Coastguard Worker.macro prefill_out ra, rb, sra, srb 1359*e1eccf28SAndroid Build Coastguard Worker .ifc \ra,xx 1360*e1eccf28SAndroid Build Coastguard Worker .ifc \rb,xx 1361*e1eccf28SAndroid Build Coastguard Worker st1 {\sra,\srb}, [x9], #32 1362*e1eccf28SAndroid Build Coastguard Worker .else 1363*e1eccf28SAndroid Build Coastguard Worker bic x9, x9, #0x40 1364*e1eccf28SAndroid Build Coastguard Worker st1 {\sra}, [x9], #16 1365*e1eccf28SAndroid Build Coastguard Worker mov \rb, \srb 1366*e1eccf28SAndroid Build Coastguard Worker .endif 1367*e1eccf28SAndroid Build Coastguard Worker .else 1368*e1eccf28SAndroid Build Coastguard Worker .ifnc \ra,\sra 1369*e1eccf28SAndroid Build Coastguard Worker mov \ra, \sra 1370*e1eccf28SAndroid Build Coastguard Worker .endif 1371*e1eccf28SAndroid Build Coastguard Worker .ifnc \rb,\srb 1372*e1eccf28SAndroid Build Coastguard Worker mov \rb, \srb 1373*e1eccf28SAndroid Build Coastguard Worker .endif 1374*e1eccf28SAndroid Build Coastguard Worker .endif 1375*e1eccf28SAndroid Build Coastguard Worker.endm 1376*e1eccf28SAndroid Build Coastguard Worker 1377*e1eccf28SAndroid Build Coastguard Worker/* This macro provides the list of registers representing the window, and the 1378*e1eccf28SAndroid Build Coastguard Worker * cases where the register file is too small and a spill buffer is used 1379*e1eccf28SAndroid Build Coastguard Worker * instead. 1380*e1eccf28SAndroid Build Coastguard Worker * Since several specialisations of each function are generated, this also 1381*e1eccf28SAndroid Build Coastguard Worker * culls superfluous iterations, and sets the variable `i` for subsequent 1382*e1eccf28SAndroid Build Coastguard Worker * macros indicating the current index into the window. 1383*e1eccf28SAndroid Build Coastguard Worker */ 1384*e1eccf28SAndroid Build Coastguard Worker.macro prefill_list, macro, nextmacro, max_r, step, label 1385*e1eccf28SAndroid Build Coastguard Worker .macro ifneeded macro, nextmacro, line, nextline, ra, rb, step, label 1386*e1eccf28SAndroid Build Coastguard Worker .if windowsize >= (\line * 16) 1387*e1eccf28SAndroid Build Coastguard Worker .set i, windowsize - (\line * 16) 1388*e1eccf28SAndroid Build Coastguard Worker\label\macro\line: 1389*e1eccf28SAndroid Build Coastguard Worker prefill_\macro \label\nextmacro\line, \label\nextmacro\nextline, \ra, \rb, \step 1390*e1eccf28SAndroid Build Coastguard Worker .endif 1391*e1eccf28SAndroid Build Coastguard Worker .endm 1392*e1eccf28SAndroid Build Coastguard Worker ifneeded \macro \nextmacro, 13, 12, xx, xx, \step, \label 1393*e1eccf28SAndroid Build Coastguard Worker ifneeded \macro \nextmacro, 12, 11, xx, xx, \step, \label 1394*e1eccf28SAndroid Build Coastguard Worker ifneeded \macro \nextmacro, 11, 10, xx, v17.16b, \step, \label 1395*e1eccf28SAndroid Build Coastguard Worker ifneeded \macro \nextmacro, 10, 9, v18.16b, v19.16b, \step, \label 1396*e1eccf28SAndroid Build Coastguard Worker ifneeded \macro \nextmacro, 9, 8, v20.16b, v21.16b, \step, \label 1397*e1eccf28SAndroid Build Coastguard Worker ifneeded \macro \nextmacro, 8, 7, v22.16b, v23.16b, \step, \label 1398*e1eccf28SAndroid Build Coastguard Worker ifneeded \macro \nextmacro, 7, 6, v24.16b, v25.16b, \step, \label 1399*e1eccf28SAndroid Build Coastguard Worker ifneeded \macro \nextmacro, 6, 5, v26.16b, v27.16b, \step, \label 1400*e1eccf28SAndroid Build Coastguard Worker ifneeded \macro \nextmacro, 5, 4, v28.16b, v29.16b, \step, \label 1401*e1eccf28SAndroid Build Coastguard Worker ifneeded \macro \nextmacro, 4, 3, v30.16b, v31.16b, \step, \label 1402*e1eccf28SAndroid Build Coastguard Worker ifneeded \macro \nextmacro, 3, 2, v4.16b, v5.16b, \step, \label 1403*e1eccf28SAndroid Build Coastguard Worker ifneeded \macro \nextmacro, 2, 1, v6.16b, v7.16b, \step, \label 1404*e1eccf28SAndroid Build Coastguard Worker ifneeded \macro \nextmacro, 1, 0, v8.16b, v9.16b, \step, \label 1405*e1eccf28SAndroid Build Coastguard Worker\label\macro\()0: 1406*e1eccf28SAndroid Build Coastguard Worker b \label\()_end 1407*e1eccf28SAndroid Build Coastguard Worker .purgem ifneeded 1408*e1eccf28SAndroid Build Coastguard Worker.endm 1409*e1eccf28SAndroid Build Coastguard Worker 1410*e1eccf28SAndroid Build Coastguard Worker/* These macros represent the possible stages of filling the window. 1411*e1eccf28SAndroid Build Coastguard Worker * Each macro is unrolled enough times that it can fill the entire window 1412*e1eccf28SAndroid Build Coastguard Worker * itself, but normally it will have to hand control to subsequent macros 1413*e1eccf28SAndroid Build Coastguard Worker * part-way through and this is done using labels named \next and \after, where 1414*e1eccf28SAndroid Build Coastguard Worker * \next is the next macro starting at the same window position and \after is 1415*e1eccf28SAndroid Build Coastguard Worker * the next macro starting after the current window position. 1416*e1eccf28SAndroid Build Coastguard Worker */ 1417*e1eccf28SAndroid Build Coastguard Worker 1418*e1eccf28SAndroid Build Coastguard Worker/* leftfill: v8 and v9 contain the left padding value. While the window 1419*e1eccf28SAndroid Build Coastguard Worker * extends outside of the image on the left-hand side, and at least 16 more 1420*e1eccf28SAndroid Build Coastguard Worker * padding values are needed in the window, store v8 and v9 into the window. 1421*e1eccf28SAndroid Build Coastguard Worker * Otherwise skip forward to storing image data. 1422*e1eccf28SAndroid Build Coastguard Worker */ 1423*e1eccf28SAndroid Build Coastguard Worker.macro prefill_leftfill, next, after, ra, rb, step 1424*e1eccf28SAndroid Build Coastguard Worker cmp x10, #i+16 1425*e1eccf28SAndroid Build Coastguard Worker blo \next 1426*e1eccf28SAndroid Build Coastguard Worker prefill_out \ra, \rb, v8.16b, v9.16b 1427*e1eccf28SAndroid Build Coastguard Worker.endm 1428*e1eccf28SAndroid Build Coastguard Worker 1429*e1eccf28SAndroid Build Coastguard Worker/* leftedge: The very first non-fill or partial-fill chunk from the image is 1430*e1eccf28SAndroid Build Coastguard Worker * already loaded (as it was used to calculate the left padding value), so 1431*e1eccf28SAndroid Build Coastguard Worker * store it here, and then drop into the regular load/store cycle in the next 1432*e1eccf28SAndroid Build Coastguard Worker * macro. 1433*e1eccf28SAndroid Build Coastguard Worker */ 1434*e1eccf28SAndroid Build Coastguard Worker.macro prefill_leftedge, next, after, ra, rb, step 1435*e1eccf28SAndroid Build Coastguard Worker1: prefill_out \ra, \rb, v10.16b, v11.16b 1436*e1eccf28SAndroid Build Coastguard Worker b \after 1437*e1eccf28SAndroid Build Coastguard Worker.endm 1438*e1eccf28SAndroid Build Coastguard Worker 1439*e1eccf28SAndroid Build Coastguard Worker/* dofetch: Copy chunks of the image into the window without any complications 1440*e1eccf28SAndroid Build Coastguard Worker * from edge conditions. 1441*e1eccf28SAndroid Build Coastguard Worker */ 1442*e1eccf28SAndroid Build Coastguard Worker.macro prefill_dofetch, next, after, ra, rb, step 1443*e1eccf28SAndroid Build Coastguard Worker cmp x11, #i+16 1444*e1eccf28SAndroid Build Coastguard Worker bls \next 1445*e1eccf28SAndroid Build Coastguard Worker bl fetch_generic_asm 1446*e1eccf28SAndroid Build Coastguard Worker prefill_out \ra, \rb, v10.16b, v11.16b 1447*e1eccf28SAndroid Build Coastguard Worker.endm 1448*e1eccf28SAndroid Build Coastguard Worker 1449*e1eccf28SAndroid Build Coastguard Worker/* rightedge: The last fetch (currently in v10 and v11) may have gone beyond 1450*e1eccf28SAndroid Build Coastguard Worker * the right-hand edge of the image. In that case sweep the last valid pixel 1451*e1eccf28SAndroid Build Coastguard Worker * across the rest of the chunk, and in either case prepare padding data in v12 1452*e1eccf28SAndroid Build Coastguard Worker * and v13 for the next macro. This is done in fetch_clampright. 1453*e1eccf28SAndroid Build Coastguard Worker * This only happens once before going on to the next macro. 1454*e1eccf28SAndroid Build Coastguard Worker * Sometimes leftedge also covers the rightedge case, in which case this has 1455*e1eccf28SAndroid Build Coastguard Worker * to be skipped altogether. 1456*e1eccf28SAndroid Build Coastguard Worker */ 1457*e1eccf28SAndroid Build Coastguard Worker.macro prefill_rightedge, next, after, ra, rb, step 1458*e1eccf28SAndroid Build Coastguard Worker cmp x11, #i 1459*e1eccf28SAndroid Build Coastguard Worker bls \next 1460*e1eccf28SAndroid Build Coastguard Worker bl fetch_clampright\step 1461*e1eccf28SAndroid Build Coastguard Worker prefill_out \ra, \rb, v10.16b, v11.16b 1462*e1eccf28SAndroid Build Coastguard Worker b \after 1463*e1eccf28SAndroid Build Coastguard Worker.endm 1464*e1eccf28SAndroid Build Coastguard Worker 1465*e1eccf28SAndroid Build Coastguard Worker/* rightfill: The rest of the window is simply filled with right padding from 1466*e1eccf28SAndroid Build Coastguard Worker * v12 and v13. 1467*e1eccf28SAndroid Build Coastguard Worker */ 1468*e1eccf28SAndroid Build Coastguard Worker.macro prefill_rightfill, next, after, ra, rb, step 1469*e1eccf28SAndroid Build Coastguard Worker prefill_out \ra, \rb, v12.16b, v13.16b 1470*e1eccf28SAndroid Build Coastguard Worker.endm 1471*e1eccf28SAndroid Build Coastguard Worker 1472*e1eccf28SAndroid Build Coastguard Worker/* Here all of the macros above are unrolled and laid out in the proper order. 1473*e1eccf28SAndroid Build Coastguard Worker */ 1474*e1eccf28SAndroid Build Coastguard Worker.macro prefill_body, max_r, step, label 1475*e1eccf28SAndroid Build Coastguard Worker prefill_list leftfill, leftedge, \max_r, \step, \label 1476*e1eccf28SAndroid Build Coastguard Worker prefill_list leftedge, dofetch, \max_r, \step, \label 1477*e1eccf28SAndroid Build Coastguard Worker prefill_list dofetch, rightedge, \max_r, \step, \label 1478*e1eccf28SAndroid Build Coastguard Worker prefill_list rightedge, rightfill, \max_r, \step, \label 1479*e1eccf28SAndroid Build Coastguard Worker prefill_list rightfill, oops, \max_r, \step, \label 1480*e1eccf28SAndroid Build Coastguard Worker\label\()_end: 1481*e1eccf28SAndroid Build Coastguard Worker.endm 1482*e1eccf28SAndroid Build Coastguard Worker 1483*e1eccf28SAndroid Build Coastguard Worker 1484*e1eccf28SAndroid Build Coastguard Worker/* Fill the convolution window with context data. The aim here is to load 1485*e1eccf28SAndroid Build Coastguard Worker * exactly 2*r columns, and in the main loop to read as many columns as will be 1486*e1eccf28SAndroid Build Coastguard Worker * written. This is complicated by the window being divided into chunks at 1487*e1eccf28SAndroid Build Coastguard Worker * register boundaries, and the need to handle cases when the input starts very 1488*e1eccf28SAndroid Build Coastguard Worker * close to the left or right (or both) edges of the image and the need to fill 1489*e1eccf28SAndroid Build Coastguard Worker * the spaces that leaves with left and right edge padding values. 1490*e1eccf28SAndroid Build Coastguard Worker * 1491*e1eccf28SAndroid Build Coastguard Worker * Input: 1492*e1eccf28SAndroid Build Coastguard Worker * x1 -- src 1493*e1eccf28SAndroid Build Coastguard Worker * x2 -- pitch 1494*e1eccf28SAndroid Build Coastguard Worker * x3 -- count 1495*e1eccf28SAndroid Build Coastguard Worker * x4 -- available image data right of src pointer 1496*e1eccf28SAndroid Build Coastguard Worker * x5 -- r 1497*e1eccf28SAndroid Build Coastguard Worker * x6 -- rup 1498*e1eccf28SAndroid Build Coastguard Worker * x7 -- rdn 1499*e1eccf28SAndroid Build Coastguard Worker * x8 -- available image data left of src pointer 1500*e1eccf28SAndroid Build Coastguard Worker * x9 -- buffer (if needed) 1501*e1eccf28SAndroid Build Coastguard Worker * x13 = -pitch 1502*e1eccf28SAndroid Build Coastguard Worker * x15 = top-row in 1503*e1eccf28SAndroid Build Coastguard Worker * x19 = bottom-row in 1504*e1eccf28SAndroid Build Coastguard Worker * Output: 1505*e1eccf28SAndroid Build Coastguard Worker * x4 -= min(inlen, count + windowsize - centertap) 1506*e1eccf28SAndroid Build Coastguard Worker * x1 += min(inlen, count + windowsize - centertap) 1507*e1eccf28SAndroid Build Coastguard Worker * x15 += min(inlen, count + windowsize - centertap) 1508*e1eccf28SAndroid Build Coastguard Worker * x19 += min(inlen, count + windowsize - centertap) 1509*e1eccf28SAndroid Build Coastguard Worker * Modifies: 1510*e1eccf28SAndroid Build Coastguard Worker * x10 -- fill start index in the window 1511*e1eccf28SAndroid Build Coastguard Worker * x11 -- fill stop index in the window 1512*e1eccf28SAndroid Build Coastguard Worker * x12 -- scratch 1513*e1eccf28SAndroid Build Coastguard Worker */ 1514*e1eccf28SAndroid Build Coastguard Worker.macro prefill step=1, max_r=25, label=xx 1515*e1eccf28SAndroid Build Coastguard Worker.set windowsize, (((\max_r + \max_r) * \step + 15) & ~15) 1516*e1eccf28SAndroid Build Coastguard Worker.set centertap, (windowsize - \max_r * \step) 1517*e1eccf28SAndroid Build Coastguard Worker mov x10, #centertap 1518*e1eccf28SAndroid Build Coastguard Worker subs x10, x10, x8 1519*e1eccf28SAndroid Build Coastguard Worker csel x10, xzr, x10, lo 1520*e1eccf28SAndroid Build Coastguard Worker 1521*e1eccf28SAndroid Build Coastguard Worker subs x11, x4, #windowsize - centertap 1522*e1eccf28SAndroid Build Coastguard Worker csel x11, xzr, x11, hs 1523*e1eccf28SAndroid Build Coastguard Worker add x11, x11, #windowsize 1524*e1eccf28SAndroid Build Coastguard Worker 1525*e1eccf28SAndroid Build Coastguard Worker /* x10 indicates where in the window legal image data begins. 1526*e1eccf28SAndroid Build Coastguard Worker * x11 indicates where in the window legal image date ends. 1527*e1eccf28SAndroid Build Coastguard Worker * When starting near the centre of a large image these would be 1528*e1eccf28SAndroid Build Coastguard Worker * zero and windowsize respectively, but when starting near the 1529*e1eccf28SAndroid Build Coastguard Worker * edges this can change. 1530*e1eccf28SAndroid Build Coastguard Worker * When starting on the leftmost pixel, x10 will be centertap. 1531*e1eccf28SAndroid Build Coastguard Worker * When starting on the rightmost pixel, x11 will be centertap+1. 1532*e1eccf28SAndroid Build Coastguard Worker */ 1533*e1eccf28SAndroid Build Coastguard Worker 1534*e1eccf28SAndroid Build Coastguard Worker /* x4 indicates how much data there is between the current pointers 1535*e1eccf28SAndroid Build Coastguard Worker * and the right edge of the image. The pointers currently point 1536*e1eccf28SAndroid Build Coastguard Worker * to the data needed at centertap. The subsequent code will 1537*e1eccf28SAndroid Build Coastguard Worker * consume (windowsize - x10) data, but only the data from 1538*e1eccf28SAndroid Build Coastguard Worker * centertap to windowsize comes out of x4's budget. 1539*e1eccf28SAndroid Build Coastguard Worker */ 1540*e1eccf28SAndroid Build Coastguard Worker1: subs x4, x4, #windowsize - centertap 1541*e1eccf28SAndroid Build Coastguard Worker csel x4, xzr, x4, lo 1542*e1eccf28SAndroid Build Coastguard Worker 1543*e1eccf28SAndroid Build Coastguard Worker /* And the pointers need to rewind to the start of the window. 1544*e1eccf28SAndroid Build Coastguard Worker */ 1545*e1eccf28SAndroid Build Coastguard Worker sub x1, x1, #centertap 1546*e1eccf28SAndroid Build Coastguard Worker sub x15, x15, #centertap 1547*e1eccf28SAndroid Build Coastguard Worker sub x19, x19, #centertap 1548*e1eccf28SAndroid Build Coastguard Worker 1549*e1eccf28SAndroid Build Coastguard Worker /* Unless x8 indicated that there wasn't that much data available. 1550*e1eccf28SAndroid Build Coastguard Worker */ 1551*e1eccf28SAndroid Build Coastguard Worker add x1, x1, x10 1552*e1eccf28SAndroid Build Coastguard Worker add x15, x15, x10 1553*e1eccf28SAndroid Build Coastguard Worker add x19, x19, x10 1554*e1eccf28SAndroid Build Coastguard Worker 1555*e1eccf28SAndroid Build Coastguard Worker /* Get the first chunk, and add padding to align it to the window 1556*e1eccf28SAndroid Build Coastguard Worker * if necessary. 1557*e1eccf28SAndroid Build Coastguard Worker */ 1558*e1eccf28SAndroid Build Coastguard Worker bl fetch_clampleft\step 1559*e1eccf28SAndroid Build Coastguard Worker 1560*e1eccf28SAndroid Build Coastguard Worker /* Sometimes the start and the end of the window are in the same 1561*e1eccf28SAndroid Build Coastguard Worker * chunk. In that case both ends need filler at the outset. 1562*e1eccf28SAndroid Build Coastguard Worker */ 1563*e1eccf28SAndroid Build Coastguard Worker sub x12, x11, #1 1564*e1eccf28SAndroid Build Coastguard Worker eor x12, x10, x12 1565*e1eccf28SAndroid Build Coastguard Worker cmp x12, #16 1566*e1eccf28SAndroid Build Coastguard Worker bhs 1f 1567*e1eccf28SAndroid Build Coastguard Worker bl prefill_sweepright\step 1568*e1eccf28SAndroid Build Coastguard Worker 1569*e1eccf28SAndroid Build Coastguard Worker /* Iterate through all the points in the window and fill them in 1570*e1eccf28SAndroid Build Coastguard Worker * with padding or image data as needed. 1571*e1eccf28SAndroid Build Coastguard Worker */ 1572*e1eccf28SAndroid Build Coastguard Worker1: prefill_body \max_r, \step, \label 1573*e1eccf28SAndroid Build Coastguard Worker.endm 1574*e1eccf28SAndroid Build Coastguard Worker 1575*e1eccf28SAndroid Build Coastguard Worker/* The main body of the convolve functions. Having already pre-filled the 1576*e1eccf28SAndroid Build Coastguard Worker * convolution window with 2*r input values, the logic settles into a regular 1577*e1eccf28SAndroid Build Coastguard Worker * pattern of reading and writing at a 1:1 rate until either input or output 1578*e1eccf28SAndroid Build Coastguard Worker * expires. The input leads the output by r values, so when processing all the 1579*e1eccf28SAndroid Build Coastguard Worker * way to the right-hand edge, or within r pixels of that edge, the input will 1580*e1eccf28SAndroid Build Coastguard Worker * run out first. In the case of very narrow images, or sub-windows starting 1581*e1eccf28SAndroid Build Coastguard Worker * near the right edge, the input may already have run out while the 1582*e1eccf28SAndroid Build Coastguard Worker * convolution window was being filled and this loop will start with a 1583*e1eccf28SAndroid Build Coastguard Worker * zero-length input. 1584*e1eccf28SAndroid Build Coastguard Worker * 1585*e1eccf28SAndroid Build Coastguard Worker * Once the input runs out, the rest of the output must be processed by padding 1586*e1eccf28SAndroid Build Coastguard Worker * the remainder of the window with pad value from the last valid pixel from 1587*e1eccf28SAndroid Build Coastguard Worker * the source. 1588*e1eccf28SAndroid Build Coastguard Worker * 1589*e1eccf28SAndroid Build Coastguard Worker * Input: 1590*e1eccf28SAndroid Build Coastguard Worker * x0 = dst 1591*e1eccf28SAndroid Build Coastguard Worker * x1 = src 1592*e1eccf28SAndroid Build Coastguard Worker * x2 = pitch 1593*e1eccf28SAndroid Build Coastguard Worker * x3 = count 1594*e1eccf28SAndroid Build Coastguard Worker * x4 = inlen 1595*e1eccf28SAndroid Build Coastguard Worker * x5 = r 1596*e1eccf28SAndroid Build Coastguard Worker * x6 = rup 1597*e1eccf28SAndroid Build Coastguard Worker * x7 = rdn 1598*e1eccf28SAndroid Build Coastguard Worker * x9 = buffer 1599*e1eccf28SAndroid Build Coastguard Worker * x13 = -pitch 1600*e1eccf28SAndroid Build Coastguard Worker * x15 = top-row in 1601*e1eccf28SAndroid Build Coastguard Worker * x19 = bottom-row in 1602*e1eccf28SAndroid Build Coastguard Worker * Modifies 1603*e1eccf28SAndroid Build Coastguard Worker * x8 = fetch code pointer 1604*e1eccf28SAndroid Build Coastguard Worker */ 1605*e1eccf28SAndroid Build Coastguard Worker.macro conv_body core, step=1, max_r=25, labelc="", labelnc="" 1606*e1eccf28SAndroid Build Coastguard Worker 1607*e1eccf28SAndroid Build Coastguard Worker /* If x4 >= x3 then there's no need for clipping. The main loop 1608*e1eccf28SAndroid Build Coastguard Worker * needs to exit when either x3 or x4 runs out, so clamp x4 to be 1609*e1eccf28SAndroid Build Coastguard Worker * no greater than x3 and use x4 for the loop. 1610*e1eccf28SAndroid Build Coastguard Worker * However, if x4 comes out of the loop with less than 16 bytes 1611*e1eccf28SAndroid Build Coastguard Worker * left, a partial read would be necessary to avoid reading beyond 1612*e1eccf28SAndroid Build Coastguard Worker * the end of the image. To avoid this, clamp x4 to the next 1613*e1eccf28SAndroid Build Coastguard Worker * multiple of 16, which is still sufficient to force it out of the 1614*e1eccf28SAndroid Build Coastguard Worker * loop but doesn't imply a rewind. 1615*e1eccf28SAndroid Build Coastguard Worker */ 1616*e1eccf28SAndroid Build Coastguard Worker add x12, x3, #15 1617*e1eccf28SAndroid Build Coastguard Worker bic x12, x12, #15 1618*e1eccf28SAndroid Build Coastguard Worker cmp x4, x12 1619*e1eccf28SAndroid Build Coastguard Worker csel x4, x12, x4, hi 1620*e1eccf28SAndroid Build Coastguard Worker 1621*e1eccf28SAndroid Build Coastguard Worker /* First calculate the entry-point into the internal fetch logic. 1622*e1eccf28SAndroid Build Coastguard Worker * This is done so the same function can service several kernel 1623*e1eccf28SAndroid Build Coastguard Worker * sizes. 1624*e1eccf28SAndroid Build Coastguard Worker */ 1625*e1eccf28SAndroid Build Coastguard Worker adrp x8, \labelnc 1626*e1eccf28SAndroid Build Coastguard Worker add x8, x8, #:lo12:\labelnc 1627*e1eccf28SAndroid Build Coastguard Worker sub x8, x8, x5, LSL #5 1628*e1eccf28SAndroid Build Coastguard Worker sub x8, x8, x5, LSL #3 1629*e1eccf28SAndroid Build Coastguard Worker cmp x5, x6 1630*e1eccf28SAndroid Build Coastguard Worker ccmp x5, x7, #0, eq 1631*e1eccf28SAndroid Build Coastguard Worker beq 5f 1632*e1eccf28SAndroid Build Coastguard Worker 1633*e1eccf28SAndroid Build Coastguard Worker /* if (r != rup || r != rdn) then the address-clamping table should 1634*e1eccf28SAndroid Build Coastguard Worker * be used rather than the short-cut version. 1635*e1eccf28SAndroid Build Coastguard Worker */ 1636*e1eccf28SAndroid Build Coastguard Worker adrp x8, \labelc 1637*e1eccf28SAndroid Build Coastguard Worker add x8, x8, #:lo12:\labelc 1638*e1eccf28SAndroid Build Coastguard Worker sub x8, x8, x5, LSL #6 1639*e1eccf28SAndroid Build Coastguard Worker add x8, x8, x5, LSL #3 1640*e1eccf28SAndroid Build Coastguard Worker b 5f 1641*e1eccf28SAndroid Build Coastguard Worker 1642*e1eccf28SAndroid Build Coastguard Worker /* Main loop: ... */ 1643*e1eccf28SAndroid Build Coastguard Worker .align 4 1644*e1eccf28SAndroid Build Coastguard Worker3: /* first perform a vertical convolution from memory to get the next 1645*e1eccf28SAndroid Build Coastguard Worker * 16 taps of the horizontal window into the register file... 1646*e1eccf28SAndroid Build Coastguard Worker */ 1647*e1eccf28SAndroid Build Coastguard Worker fetch max_r=\max_r, labelc=\labelc, labelnc=\labelnc, reg=x8 1648*e1eccf28SAndroid Build Coastguard Worker 1649*e1eccf28SAndroid Build Coastguard Worker /* ...then perform a horizontal convolution on that window to 1650*e1eccf28SAndroid Build Coastguard Worker * produce eight output bytes, and slide the window along. 1651*e1eccf28SAndroid Build Coastguard Worker * This has to be done twice to match the 16-way vertical pass. 1652*e1eccf28SAndroid Build Coastguard Worker * It would be preferable to have twice the work done in \core, but 1653*e1eccf28SAndroid Build Coastguard Worker * that would demand yet another variant on those macros and would 1654*e1eccf28SAndroid Build Coastguard Worker * perturb the register allocation severely. 1655*e1eccf28SAndroid Build Coastguard Worker */ 1656*e1eccf28SAndroid Build Coastguard Worker \core 1657*e1eccf28SAndroid Build Coastguard Worker st1 {v15.8b}, [x0], #8 1658*e1eccf28SAndroid Build Coastguard Worker \core 1659*e1eccf28SAndroid Build Coastguard Worker st1 {v15.8b}, [x0], #8 1660*e1eccf28SAndroid Build Coastguard Worker 1661*e1eccf28SAndroid Build Coastguard Worker sub x3, x3, #16 1662*e1eccf28SAndroid Build Coastguard Worker5: subs x4, x4, #16 1663*e1eccf28SAndroid Build Coastguard Worker bhi 3b 1664*e1eccf28SAndroid Build Coastguard Worker /* Here there's 16 or fewer bytes available before the edge of the 1665*e1eccf28SAndroid Build Coastguard Worker * source image. x4 holds that count minus 16 (because it was 1666*e1eccf28SAndroid Build Coastguard Worker * decremented before the first iteration ran). The last read may 1667*e1eccf28SAndroid Build Coastguard Worker * not be a whole chunk, and beyond that a fill value must be used. 1668*e1eccf28SAndroid Build Coastguard Worker * 1669*e1eccf28SAndroid Build Coastguard Worker * Of course, none of that matters if there's no more output to 1670*e1eccf28SAndroid Build Coastguard Worker * produce... 1671*e1eccf28SAndroid Build Coastguard Worker */ 1672*e1eccf28SAndroid Build Coastguard Worker cbz x3, 5f 1673*e1eccf28SAndroid Build Coastguard Worker 1674*e1eccf28SAndroid Build Coastguard Worker /* Oh well. */ 1675*e1eccf28SAndroid Build Coastguard Worker adds x4, x4, #16 1676*e1eccf28SAndroid Build Coastguard Worker bne 1f 1677*e1eccf28SAndroid Build Coastguard Worker .if \step==1 1678*e1eccf28SAndroid Build Coastguard Worker dup v10.8h, v9.h[7] 1679*e1eccf28SAndroid Build Coastguard Worker dup v11.8h, v9.h[7] 1680*e1eccf28SAndroid Build Coastguard Worker .else 1681*e1eccf28SAndroid Build Coastguard Worker dup v10.2d, v9.d[1] 1682*e1eccf28SAndroid Build Coastguard Worker dup v11.2d, v9.d[1] 1683*e1eccf28SAndroid Build Coastguard Worker .endif 1684*e1eccf28SAndroid Build Coastguard Worker b 3f 1685*e1eccf28SAndroid Build Coastguard Worker 1686*e1eccf28SAndroid Build Coastguard Worker /* To avoid reading past end of input, rewind pointers by (16-x4) 1687*e1eccf28SAndroid Build Coastguard Worker * to ensure that they're exactly 16 bytes from the edge. 1688*e1eccf28SAndroid Build Coastguard Worker */ 1689*e1eccf28SAndroid Build Coastguard Worker1: mov x11, x4 1690*e1eccf28SAndroid Build Coastguard Worker bl fetch_clampright\step 1691*e1eccf28SAndroid Build Coastguard Worker /* Now to put this padding to use, perform any remaining 1692*e1eccf28SAndroid Build Coastguard Worker * iterations. This is done at half the rate of the main loop, 1693*e1eccf28SAndroid Build Coastguard Worker * because there's no longer pressure from a 16-lane window filler. 1694*e1eccf28SAndroid Build Coastguard Worker */ 1695*e1eccf28SAndroid Build Coastguard Worker3: \core 1696*e1eccf28SAndroid Build Coastguard Worker .if \step==1 1697*e1eccf28SAndroid Build Coastguard Worker dup v11.8h, v11.h[7] 1698*e1eccf28SAndroid Build Coastguard Worker .else 1699*e1eccf28SAndroid Build Coastguard Worker dup v11.2d, v11.d[1] 1700*e1eccf28SAndroid Build Coastguard Worker .endif 1701*e1eccf28SAndroid Build Coastguard Worker subs x3, x3, #8 1702*e1eccf28SAndroid Build Coastguard Worker blo 4f 1703*e1eccf28SAndroid Build Coastguard Worker st1 {v15.8b}, [x0], #8 1704*e1eccf28SAndroid Build Coastguard Worker bne 3b 1705*e1eccf28SAndroid Build Coastguard Worker b 5f 1706*e1eccf28SAndroid Build Coastguard Worker 1707*e1eccf28SAndroid Build Coastguard Worker /* If the final iteration contained 0 < l < 8 values, then perform 1708*e1eccf28SAndroid Build Coastguard Worker * a piecewise store of the final vector. 1709*e1eccf28SAndroid Build Coastguard Worker */ 1710*e1eccf28SAndroid Build Coastguard Worker4: tbz x3, #2, 1f 1711*e1eccf28SAndroid Build Coastguard Worker st1 {v15.s}[0], [x0], #4 1712*e1eccf28SAndroid Build Coastguard Worker ext v15.8b, v15.8b, v15.8b, #4 1713*e1eccf28SAndroid Build Coastguard Worker1: tbz x3, #1, 1f 1714*e1eccf28SAndroid Build Coastguard Worker st1 {v15.h}[0], [x0], #2 1715*e1eccf28SAndroid Build Coastguard Worker ext v15.8b, v15.8b, v15.8b, #2 1716*e1eccf28SAndroid Build Coastguard Worker1: tbz x3, #0, 5f 1717*e1eccf28SAndroid Build Coastguard Worker st1 {v15.b}[0], [x0], #1 1718*e1eccf28SAndroid Build Coastguard Worker ext v15.8b, v15.8b, v15.8b, #1 1719*e1eccf28SAndroid Build Coastguard Worker5: mov x0, #0 1720*e1eccf28SAndroid Build Coastguard Worker.endm 1721*e1eccf28SAndroid Build Coastguard Worker 1722*e1eccf28SAndroid Build Coastguard Worker 1723*e1eccf28SAndroid Build Coastguard Worker.irp r, TUNED_LIST1, 25 1724*e1eccf28SAndroid Build Coastguard WorkerPRIVATE(convolve1_\r) 1725*e1eccf28SAndroid Build Coastguard Worker stp x29,x30, [sp, #-16]! 1726*e1eccf28SAndroid Build Coastguard Worker 1727*e1eccf28SAndroid Build Coastguard Worker prefill step=1, max_r=\r, label=.Lcnv1_\r 1728*e1eccf28SAndroid Build Coastguard Worker 1729*e1eccf28SAndroid Build Coastguard Worker conv_body core=hconv1_\r, step=1, max_r=\r, labelc=.Lcnv1_\r, labelnc=.Lcnvnc1_\r 1730*e1eccf28SAndroid Build Coastguard Worker 1731*e1eccf28SAndroid Build Coastguard Worker ldp x29,x30, [sp], #16 1732*e1eccf28SAndroid Build Coastguard Worker ret 1733*e1eccf28SAndroid Build Coastguard WorkerEND(convolve1_\r) 1734*e1eccf28SAndroid Build Coastguard Worker.endr 1735*e1eccf28SAndroid Build Coastguard Worker 1736*e1eccf28SAndroid Build Coastguard Worker.irp r, TUNED_LIST4, 25 1737*e1eccf28SAndroid Build Coastguard WorkerPRIVATE(convolve4_\r) 1738*e1eccf28SAndroid Build Coastguard Worker sub x9, sp, #0x40 1739*e1eccf28SAndroid Build Coastguard Worker stp x29,x30, [sp, #-(16 + 0x40 + 0x80)]! 1740*e1eccf28SAndroid Build Coastguard Worker bic x9, x9, #0x7f 1741*e1eccf28SAndroid Build Coastguard Worker 1742*e1eccf28SAndroid Build Coastguard Worker /* x9 now points to a 0x40 byte buffer on the stack whose address 1743*e1eccf28SAndroid Build Coastguard Worker * has the low 7 bits clear. This allows easy address calculation 1744*e1eccf28SAndroid Build Coastguard Worker * in the wrap-around cases. 1745*e1eccf28SAndroid Build Coastguard Worker */ 1746*e1eccf28SAndroid Build Coastguard Worker 1747*e1eccf28SAndroid Build Coastguard Worker prefill step=4, max_r=\r, label=.Lcnv4_\r 1748*e1eccf28SAndroid Build Coastguard Worker 1749*e1eccf28SAndroid Build Coastguard Worker conv_body core=hconv4_\r, step=4, max_r=\r, labelc=.Lcnv4_\r, labelnc=.Lcnvnc4_\r 1750*e1eccf28SAndroid Build Coastguard Worker 1751*e1eccf28SAndroid Build Coastguard Worker ldp x29,x30, [sp], #(16 + 0x40 + 0x80) 1752*e1eccf28SAndroid Build Coastguard Worker ret 1753*e1eccf28SAndroid Build Coastguard WorkerEND(convolve4_\r) 1754*e1eccf28SAndroid Build Coastguard Worker.endr 1755*e1eccf28SAndroid Build Coastguard Worker 1756*e1eccf28SAndroid Build Coastguard Worker/* void rsdIntrinsicBlurU1_K( 1757*e1eccf28SAndroid Build Coastguard Worker * void *out, // x0 1758*e1eccf28SAndroid Build Coastguard Worker * void *in, // x1 1759*e1eccf28SAndroid Build Coastguard Worker * size_t w, // x2 1760*e1eccf28SAndroid Build Coastguard Worker * size_t h, // x3 1761*e1eccf28SAndroid Build Coastguard Worker * size_t p, // x4 1762*e1eccf28SAndroid Build Coastguard Worker * size_t x, // x5 1763*e1eccf28SAndroid Build Coastguard Worker * size_t y, // x6 1764*e1eccf28SAndroid Build Coastguard Worker * size_t count, // x7 1765*e1eccf28SAndroid Build Coastguard Worker * size_t r, // [sp] 1766*e1eccf28SAndroid Build Coastguard Worker * uint16_t *tab); // [sp,#8] 1767*e1eccf28SAndroid Build Coastguard Worker */ 1768*e1eccf28SAndroid Build Coastguard WorkerENTRY(rsdIntrinsicBlurU1_K) 1769*e1eccf28SAndroid Build Coastguard Worker stp x19,x30, [sp, #-16]! 1770*e1eccf28SAndroid Build Coastguard Worker sub x8, sp, #32 1771*e1eccf28SAndroid Build Coastguard Worker sub sp, sp, #64 1772*e1eccf28SAndroid Build Coastguard Worker st1 {v8.1d - v11.1d}, [sp] 1773*e1eccf28SAndroid Build Coastguard Worker st1 {v12.1d - v15.1d}, [x8] 1774*e1eccf28SAndroid Build Coastguard Worker mov x8, x5 // x 1775*e1eccf28SAndroid Build Coastguard Worker ldr w5, [sp,#80] // r 1776*e1eccf28SAndroid Build Coastguard Worker sub x9, x2, x8 // w - x 1777*e1eccf28SAndroid Build Coastguard Worker sub x10, x3, x6 // h - y 1778*e1eccf28SAndroid Build Coastguard Worker mov x2, x4 // pitch 1779*e1eccf28SAndroid Build Coastguard Worker mov x3, x7 // count 1780*e1eccf28SAndroid Build Coastguard Worker sub x7, x10, #1 // h - y - 1 1781*e1eccf28SAndroid Build Coastguard Worker mov x4, x9 // inlen = (w - x) 1782*e1eccf28SAndroid Build Coastguard Worker 1783*e1eccf28SAndroid Build Coastguard Worker ldr x12, [sp, #88] // tab 1784*e1eccf28SAndroid Build Coastguard Worker 1785*e1eccf28SAndroid Build Coastguard Worker add x1, x1, x8 // src += x 1786*e1eccf28SAndroid Build Coastguard Worker 1787*e1eccf28SAndroid Build Coastguard Worker cmp x6, x5 1788*e1eccf28SAndroid Build Coastguard Worker csel x6, x5, x6, hs // rup = min(r, y) 1789*e1eccf28SAndroid Build Coastguard Worker cmp x7, x5 1790*e1eccf28SAndroid Build Coastguard Worker csel x7, x5, x7, hs // rdn = min(r, h - y - 1) 1791*e1eccf28SAndroid Build Coastguard Worker 1792*e1eccf28SAndroid Build Coastguard Worker sub x13, xzr, x2 // -pitch 1793*e1eccf28SAndroid Build Coastguard Worker msub x15, x2, x6, x1 1794*e1eccf28SAndroid Build Coastguard Worker madd x19, x2, x7, x1 1795*e1eccf28SAndroid Build Coastguard Worker 1796*e1eccf28SAndroid Build Coastguard Worker ld1 {v0.8h,v1.8h}, [x12], #32 1797*e1eccf28SAndroid Build Coastguard Worker ld1 {v2.8h,v3.8h}, [x12], #32 1798*e1eccf28SAndroid Build Coastguard Worker 1799*e1eccf28SAndroid Build Coastguard Worker adr x30, 1f 1800*e1eccf28SAndroid Build Coastguard Worker .irp r, TUNED_LIST1 1801*e1eccf28SAndroid Build Coastguard Worker cmp x5, #\r 1802*e1eccf28SAndroid Build Coastguard Worker bls convolve1_\r 1803*e1eccf28SAndroid Build Coastguard Worker .endr 1804*e1eccf28SAndroid Build Coastguard Worker b convolve1_25 1805*e1eccf28SAndroid Build Coastguard Worker 1806*e1eccf28SAndroid Build Coastguard Worker1: ld1 {v8.1d - v11.1d}, [sp], #32 1807*e1eccf28SAndroid Build Coastguard Worker ld1 {v12.1d - v15.1d}, [sp], #32 1808*e1eccf28SAndroid Build Coastguard Worker ldp x19,x30, [sp], #16 1809*e1eccf28SAndroid Build Coastguard Worker ret 1810*e1eccf28SAndroid Build Coastguard WorkerEND(rsdIntrinsicBlurU1_K) 1811*e1eccf28SAndroid Build Coastguard Worker 1812*e1eccf28SAndroid Build Coastguard Worker/* void rsdIntrinsicBlurU4_K( 1813*e1eccf28SAndroid Build Coastguard Worker * void *out, // x0 1814*e1eccf28SAndroid Build Coastguard Worker * void *in, // x1 1815*e1eccf28SAndroid Build Coastguard Worker * size_t w, // x2 1816*e1eccf28SAndroid Build Coastguard Worker * size_t h, // x3 1817*e1eccf28SAndroid Build Coastguard Worker * size_t p, // x4 1818*e1eccf28SAndroid Build Coastguard Worker * size_t x, // x5 1819*e1eccf28SAndroid Build Coastguard Worker * size_t y, // x6 1820*e1eccf28SAndroid Build Coastguard Worker * size_t count, // x7 1821*e1eccf28SAndroid Build Coastguard Worker * size_t r, // [sp] 1822*e1eccf28SAndroid Build Coastguard Worker * uint16_t *tab); // [sp,#8] 1823*e1eccf28SAndroid Build Coastguard Worker */ 1824*e1eccf28SAndroid Build Coastguard WorkerENTRY(rsdIntrinsicBlurU4_K) 1825*e1eccf28SAndroid Build Coastguard Worker stp x19,x30, [sp, #-16]! 1826*e1eccf28SAndroid Build Coastguard Worker sub x8, sp, #32 1827*e1eccf28SAndroid Build Coastguard Worker sub sp, sp, #64 1828*e1eccf28SAndroid Build Coastguard Worker st1 {v8.1d - v11.1d}, [sp] 1829*e1eccf28SAndroid Build Coastguard Worker st1 {v12.1d - v15.1d}, [x8] 1830*e1eccf28SAndroid Build Coastguard Worker lsl x8, x5, #2 // x 1831*e1eccf28SAndroid Build Coastguard Worker lsl x2, x2, #2 1832*e1eccf28SAndroid Build Coastguard Worker ldr w5, [sp,#80] // r 1833*e1eccf28SAndroid Build Coastguard Worker sub x9, x2, x8 // w - x 1834*e1eccf28SAndroid Build Coastguard Worker sub x10, x3, x6 // h - y 1835*e1eccf28SAndroid Build Coastguard Worker mov x2, x4 // pitch 1836*e1eccf28SAndroid Build Coastguard Worker lsl x3, x7, #2 // count 1837*e1eccf28SAndroid Build Coastguard Worker sub x7, x10, #1 // h - y - 1 1838*e1eccf28SAndroid Build Coastguard Worker mov x4, x9 // inlen = (w - x) 1839*e1eccf28SAndroid Build Coastguard Worker 1840*e1eccf28SAndroid Build Coastguard Worker ldr x12, [sp, #88] 1841*e1eccf28SAndroid Build Coastguard Worker 1842*e1eccf28SAndroid Build Coastguard Worker add x1, x1, x8 // in += x 1843*e1eccf28SAndroid Build Coastguard Worker 1844*e1eccf28SAndroid Build Coastguard Worker cmp x6, x5 1845*e1eccf28SAndroid Build Coastguard Worker csel x6, x5, x6, hs // rup = min(r, y) 1846*e1eccf28SAndroid Build Coastguard Worker cmp x7, x5 1847*e1eccf28SAndroid Build Coastguard Worker csel x7, x5, x7, hs // rdn = min(r, h - y - 1) 1848*e1eccf28SAndroid Build Coastguard Worker 1849*e1eccf28SAndroid Build Coastguard Worker 1850*e1eccf28SAndroid Build Coastguard Worker sub x13, xzr, x2 1851*e1eccf28SAndroid Build Coastguard Worker msub x15, x2, x6, x1 1852*e1eccf28SAndroid Build Coastguard Worker madd x19, x2, x7, x1 1853*e1eccf28SAndroid Build Coastguard Worker 1854*e1eccf28SAndroid Build Coastguard Worker ld1 {v0.8h,v1.8h}, [x12], #32 1855*e1eccf28SAndroid Build Coastguard Worker ld1 {v2.8h,v3.8h}, [x12], #32 1856*e1eccf28SAndroid Build Coastguard Worker 1857*e1eccf28SAndroid Build Coastguard Worker adr x30, 1f 1858*e1eccf28SAndroid Build Coastguard Worker .irp r, TUNED_LIST4 1859*e1eccf28SAndroid Build Coastguard Worker cmp x5, #\r 1860*e1eccf28SAndroid Build Coastguard Worker bls convolve4_\r 1861*e1eccf28SAndroid Build Coastguard Worker .endr 1862*e1eccf28SAndroid Build Coastguard Worker b convolve4_25 1863*e1eccf28SAndroid Build Coastguard Worker 1864*e1eccf28SAndroid Build Coastguard Worker1: ld1 {v8.1d - v11.1d}, [sp], #32 1865*e1eccf28SAndroid Build Coastguard Worker ld1 {v12.1d - v15.1d}, [sp], #32 1866*e1eccf28SAndroid Build Coastguard Worker ldp x19,x30, [sp], #16 1867*e1eccf28SAndroid Build Coastguard Worker ret 1868*e1eccf28SAndroid Build Coastguard WorkerEND(rsdIntrinsicBlurU4_K) 1869