1*e1eccf28SAndroid Build Coastguard Worker/* 2*e1eccf28SAndroid Build Coastguard Worker * Copyright (C) 2014 The Android Open Source Project 3*e1eccf28SAndroid Build Coastguard Worker * 4*e1eccf28SAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License"); 5*e1eccf28SAndroid Build Coastguard Worker * you may not use this file except in compliance with the License. 6*e1eccf28SAndroid Build Coastguard Worker * You may obtain a copy of the License at 7*e1eccf28SAndroid Build Coastguard Worker * 8*e1eccf28SAndroid Build Coastguard Worker * http://www.apache.org/licenses/LICENSE-2.0 9*e1eccf28SAndroid Build Coastguard Worker * 10*e1eccf28SAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software 11*e1eccf28SAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS, 12*e1eccf28SAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13*e1eccf28SAndroid Build Coastguard Worker * See the License for the specific language governing permissions and 14*e1eccf28SAndroid Build Coastguard Worker * limitations under the License. 15*e1eccf28SAndroid Build Coastguard Worker */ 16*e1eccf28SAndroid Build Coastguard Worker 17*e1eccf28SAndroid Build Coastguard Worker#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f: .fnstart 18*e1eccf28SAndroid Build Coastguard Worker#define PRIVATE(f) .text; .align 4; .type f,#function; f: .fnstart 19*e1eccf28SAndroid Build Coastguard Worker#define END(f) .fnend; .size f, .-f; 20*e1eccf28SAndroid Build Coastguard Worker 21*e1eccf28SAndroid Build Coastguard Worker#define ARCH_ARM_USE_BLUR_PRELOAD 22*e1eccf28SAndroid Build Coastguard Worker 23*e1eccf28SAndroid Build Coastguard Worker.eabi_attribute 25,1 @Tag_ABI_align8_preserved 24*e1eccf28SAndroid Build Coastguard Worker.arm 25*e1eccf28SAndroid Build Coastguard Worker 26*e1eccf28SAndroid Build Coastguard Worker/* Number of fractional bits to preserve in intermediate results. The 27*e1eccf28SAndroid Build Coastguard Worker * intermediate storage is 16-bit, and we started with 8 bit data (the integer 28*e1eccf28SAndroid Build Coastguard Worker * part), so this should be between 0 and 8. 29*e1eccf28SAndroid Build Coastguard Worker */ 30*e1eccf28SAndroid Build Coastguard Worker.set FRACTION_BITS, 7 31*e1eccf28SAndroid Build Coastguard Worker 32*e1eccf28SAndroid Build Coastguard Worker.set MAX_R, 25 33*e1eccf28SAndroid Build Coastguard Worker 34*e1eccf28SAndroid Build Coastguard Worker 35*e1eccf28SAndroid Build Coastguard Worker/* A quick way of making a line of code conditional on some other condition. 36*e1eccf28SAndroid Build Coastguard Worker * Use `.set cc, 1` or `.set cc, 0` to enable or disable lines prefixed with 37*e1eccf28SAndroid Build Coastguard Worker * `ifcc`: 38*e1eccf28SAndroid Build Coastguard Worker */ 39*e1eccf28SAndroid Build Coastguard Worker.macro ifcc zzz:vararg 40*e1eccf28SAndroid Build Coastguard Worker.if cc 41*e1eccf28SAndroid Build Coastguard Worker \zzz 42*e1eccf28SAndroid Build Coastguard Worker.endif 43*e1eccf28SAndroid Build Coastguard Worker.endm 44*e1eccf28SAndroid Build Coastguard Worker 45*e1eccf28SAndroid Build Coastguard Worker/* It's not always clear that prefetching is beneficial and this needs further 46*e1eccf28SAndroid Build Coastguard Worker * testing on different cores, so it's made switchable here. 47*e1eccf28SAndroid Build Coastguard Worker */ 48*e1eccf28SAndroid Build Coastguard Worker#if defined(ARCH_ARM_USE_BLUR_PRELOAD) 49*e1eccf28SAndroid Build Coastguard Worker#define VERTPLD(...) pld [__VA_ARGS__] 50*e1eccf28SAndroid Build Coastguard Worker#else 51*e1eccf28SAndroid Build Coastguard Worker#define VERTPLD(...) nop 52*e1eccf28SAndroid Build Coastguard Worker#endif 53*e1eccf28SAndroid Build Coastguard Worker 54*e1eccf28SAndroid Build Coastguard Worker/* Fetch 16 columns of bytes (regardless of image format), convolve these 55*e1eccf28SAndroid Build Coastguard Worker * vertically, and leave them in the register file. If working near the top or 56*e1eccf28SAndroid Build Coastguard Worker * bottom of an image then clamp the addressing while loading the data in. 57*e1eccf28SAndroid Build Coastguard Worker * 58*e1eccf28SAndroid Build Coastguard Worker * The convolution is fully unrolled for windows up to max_r, with the 59*e1eccf28SAndroid Build Coastguard Worker * outermost edges calculated first. This way it's possible to branch directly 60*e1eccf28SAndroid Build Coastguard Worker * into the relevant part of the code for an arbitrary convolution radius. Two 61*e1eccf28SAndroid Build Coastguard Worker * variants of the loop are produced; one eliminates the clamping code for a 62*e1eccf28SAndroid Build Coastguard Worker * slight speed advantage. 63*e1eccf28SAndroid Build Coastguard Worker * 64*e1eccf28SAndroid Build Coastguard Worker * Where the macro is called with reg=x, the specified register is taken to 65*e1eccf28SAndroid Build Coastguard Worker * contain a pre-calculated pointer into one of the two loops. 66*e1eccf28SAndroid Build Coastguard Worker * 67*e1eccf28SAndroid Build Coastguard Worker * Input: 68*e1eccf28SAndroid Build Coastguard Worker * r1 -- src 69*e1eccf28SAndroid Build Coastguard Worker * r2 -- pitch 70*e1eccf28SAndroid Build Coastguard Worker * r5 -- r 71*e1eccf28SAndroid Build Coastguard Worker * r6 -- rup (r, unless clipped to top of source image) 72*e1eccf28SAndroid Build Coastguard Worker * r7 -- rdn (r, unless clipped to bottom of source image) 73*e1eccf28SAndroid Build Coastguard Worker * r12 -- switch index 74*e1eccf28SAndroid Build Coastguard Worker * q0-q3 -- coefficient table 75*e1eccf28SAndroid Build Coastguard Worker * Output: 76*e1eccf28SAndroid Build Coastguard Worker * r1 += 16 77*e1eccf28SAndroid Build Coastguard Worker * q10,q11 -- 16 convolved columns 78*e1eccf28SAndroid Build Coastguard Worker * Modifies: 79*e1eccf28SAndroid Build Coastguard Worker * r10 = upper row pointer 80*e1eccf28SAndroid Build Coastguard Worker * r11 = lower row pointer 81*e1eccf28SAndroid Build Coastguard Worker * q12-q15 = temporary sums 82*e1eccf28SAndroid Build Coastguard Worker */ 83*e1eccf28SAndroid Build Coastguard Worker.macro fetch, max_r=MAX_R, labelc=1, labelnc=2, reg=r12 /*{{{*/ 84*e1eccf28SAndroid Build Coastguard Worker .ifc \reg,r12 ; .set cc, 1 ; .else ; .set cc, 0 ; .endif 85*e1eccf28SAndroid Build Coastguard Worker 86*e1eccf28SAndroid Build Coastguard Worker vld1.8 {d30,d31}, [r1] 87*e1eccf28SAndroid Build Coastguard Worker mls r10, r2, r6, r1 88*e1eccf28SAndroid Build Coastguard Worker 89*e1eccf28SAndroid Build Coastguard Worker vmovl.u8 q14, d30 90*e1eccf28SAndroid Build Coastguard Worker VERTPLD(r1, #32) 91*e1eccf28SAndroid Build Coastguard Worker vmovl.u8 q15, d31 92*e1eccf28SAndroid Build Coastguard Worker .if \max_r < 16 // approximate 93*e1eccf28SAndroid Build Coastguard Worker ifcc adr \reg, 1f 94*e1eccf28SAndroid Build Coastguard Worker .else 95*e1eccf28SAndroid Build Coastguard Worker ifcc ldr \reg, 2f 96*e1eccf28SAndroid Build Coastguard Worker1: ifcc add \reg, \reg, pc 97*e1eccf28SAndroid Build Coastguard Worker .endif 98*e1eccf28SAndroid Build Coastguard Worker 99*e1eccf28SAndroid Build Coastguard Worker vmull.u16 q12, d28, d0[0] 100*e1eccf28SAndroid Build Coastguard Worker ifcc sub \reg, r5, LSL #6 101*e1eccf28SAndroid Build Coastguard Worker vmull.u16 q13, d29, d0[0] 102*e1eccf28SAndroid Build Coastguard Worker mla r11, r2, r7, r1 103*e1eccf28SAndroid Build Coastguard Worker vmull.u16 q14, d30, d0[0] 104*e1eccf28SAndroid Build Coastguard Worker add r1, r1, #16 105*e1eccf28SAndroid Build Coastguard Worker vmull.u16 q15, d31, d0[0] 106*e1eccf28SAndroid Build Coastguard Worker bx \reg 107*e1eccf28SAndroid Build Coastguard Worker 108*e1eccf28SAndroid Build Coastguard Worker ifcc .align 2 109*e1eccf28SAndroid Build Coastguard Worker 2: ifcc .word 1f-1b-8 110*e1eccf28SAndroid Build Coastguard Worker 111*e1eccf28SAndroid Build Coastguard Worker /* This version of the vertical fetch loop body is used away from the edges 112*e1eccf28SAndroid Build Coastguard Worker * of the source image. The pointers start at the top and bottom source rows 113*e1eccf28SAndroid Build Coastguard Worker * and work their way towards the centre on each iteration. This way the 114*e1eccf28SAndroid Build Coastguard Worker * number of taps used can be controlled by jumping directly into the middle 115*e1eccf28SAndroid Build Coastguard Worker * of the loop and running to completion. 116*e1eccf28SAndroid Build Coastguard Worker * If the loop body changes size then the code which calculates the address of 117*e1eccf28SAndroid Build Coastguard Worker * the initial iteration must be updated to accordingly. 118*e1eccf28SAndroid Build Coastguard Worker */ 119*e1eccf28SAndroid Build Coastguard Worker .macro vertfetch_noclamp i, dreg 120*e1eccf28SAndroid Build Coastguard Worker .if 0 < \i && \i <= \max_r 121*e1eccf28SAndroid Build Coastguard Worker vld1.8 {d20,d21}, [r10], r2 122*e1eccf28SAndroid Build Coastguard Worker vld1.8 {d22,d23}, [r11] 123*e1eccf28SAndroid Build Coastguard Worker sub r11, r11, r2 124*e1eccf28SAndroid Build Coastguard Worker vswp d21, d22 125*e1eccf28SAndroid Build Coastguard Worker VERTPLD(r10, #32) 126*e1eccf28SAndroid Build Coastguard Worker vaddl.u8 q10, d20, d21 127*e1eccf28SAndroid Build Coastguard Worker vaddl.u8 q11, d22, d23 128*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q12, d20, \dreg 129*e1eccf28SAndroid Build Coastguard Worker VERTPLD(r11, #32) 130*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q13, d21, \dreg 131*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d22, \dreg 132*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d23, \dreg 133*e1eccf28SAndroid Build Coastguard Worker .endif 134*e1eccf28SAndroid Build Coastguard Worker .endm 135*e1eccf28SAndroid Build Coastguard Worker 136*e1eccf28SAndroid Build Coastguard Worker /* This version of the vertical fetch loop body is used near the edges of the 137*e1eccf28SAndroid Build Coastguard Worker * source image, where one or both of the accesses may start with a clamped 138*e1eccf28SAndroid Build Coastguard Worker * value, and the row addresses only begin to change after some number of 139*e1eccf28SAndroid Build Coastguard Worker * iterations before the end. 140*e1eccf28SAndroid Build Coastguard Worker * If the loop body changes size then the code which calculates the address of 141*e1eccf28SAndroid Build Coastguard Worker * the initial iteration must be updated to accordingly. 142*e1eccf28SAndroid Build Coastguard Worker */ 143*e1eccf28SAndroid Build Coastguard Worker .macro vertfetch_clamped i, dreg 144*e1eccf28SAndroid Build Coastguard Worker .if 0 < \i && \i <= \max_r 145*e1eccf28SAndroid Build Coastguard Worker vld1.8 {d20,d21}, [r10] 146*e1eccf28SAndroid Build Coastguard Worker vld1.8 {d22,d23}, [r11] 147*e1eccf28SAndroid Build Coastguard Worker cmp r6, #\i 148*e1eccf28SAndroid Build Coastguard Worker vswp d21, d22 149*e1eccf28SAndroid Build Coastguard Worker VERTPLD(r10, #32) 150*e1eccf28SAndroid Build Coastguard Worker vaddl.u8 q10, d20, d21 151*e1eccf28SAndroid Build Coastguard Worker addhs r10, r10, r2 152*e1eccf28SAndroid Build Coastguard Worker vaddl.u8 q11, d22, d23 153*e1eccf28SAndroid Build Coastguard Worker cmp r7, #\i 154*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q12, d20, \dreg 155*e1eccf28SAndroid Build Coastguard Worker VERTPLD(r11, #32) 156*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q13, d21, \dreg 157*e1eccf28SAndroid Build Coastguard Worker subhs r11, r11, r2 158*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d22, \dreg 159*e1eccf28SAndroid Build Coastguard Worker nop 160*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d23, \dreg 161*e1eccf28SAndroid Build Coastguard Worker .endif 162*e1eccf28SAndroid Build Coastguard Worker .endm 163*e1eccf28SAndroid Build Coastguard Worker 164*e1eccf28SAndroid Build Coastguard Worker /* Entry into this unrolled loop is computed as a negative index from 165*e1eccf28SAndroid Build Coastguard Worker * \labelc at the end of the block. 166*e1eccf28SAndroid Build Coastguard Worker */ 167*e1eccf28SAndroid Build Coastguard Worker .align 4 168*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 27, d6[3] 169*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 26, d6[2] 170*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 25, d6[1] 171*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 24, d6[0] 172*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 23, d5[3] 173*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 22, d5[2] 174*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 21, d5[1] 175*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 20, d5[0] 176*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 19, d4[3] 177*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 18, d4[2] 178*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 17, d4[1] 179*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 16, d4[0] 180*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 15, d3[3] 181*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 14, d3[2] 182*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 13, d3[1] 183*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 12, d3[0] 184*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 11, d2[3] 185*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 10, d2[2] 186*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 9, d2[1] 187*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 8, d2[0] 188*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 7, d1[3] 189*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 6, d1[2] 190*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 5, d1[1] 191*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 4, d1[0] 192*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 3, d0[3] 193*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 2, d0[2] 194*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 1, d0[1] 195*e1eccf28SAndroid Build Coastguard Worker vertfetch_clamped 0, d0[0] 196*e1eccf28SAndroid Build Coastguard Worker 1: 197*e1eccf28SAndroid Build Coastguard Worker \labelc : b 2f /* done with clamped loop, skip over non-clamped loop */ 198*e1eccf28SAndroid Build Coastguard Worker 199*e1eccf28SAndroid Build Coastguard Worker /* Entry into this unrolled loop is computed as a negative index from 200*e1eccf28SAndroid Build Coastguard Worker * \labelnc at the end of the block. 201*e1eccf28SAndroid Build Coastguard Worker */ 202*e1eccf28SAndroid Build Coastguard Worker .align 4 203*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 27, d6[3] 204*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 26, d6[2] 205*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 25, d6[1] 206*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 24, d6[0] 207*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 23, d5[3] 208*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 22, d5[2] 209*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 21, d5[1] 210*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 20, d5[0] 211*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 19, d4[3] 212*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 18, d4[2] 213*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 17, d4[1] 214*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 16, d4[0] 215*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 15, d3[3] 216*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 14, d3[2] 217*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 13, d3[1] 218*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 12, d3[0] 219*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 11, d2[3] 220*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 10, d2[2] 221*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 9, d2[1] 222*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 8, d2[0] 223*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 7, d1[3] 224*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 6, d1[2] 225*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 5, d1[1] 226*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 4, d1[0] 227*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 3, d0[3] 228*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 2, d0[2] 229*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 1, d0[1] 230*e1eccf28SAndroid Build Coastguard Worker vertfetch_noclamp 0, d0[0] 231*e1eccf28SAndroid Build Coastguard Worker \labelnc : 232*e1eccf28SAndroid Build Coastguard Worker 233*e1eccf28SAndroid Build Coastguard Worker .purgem vertfetch_clamped 234*e1eccf28SAndroid Build Coastguard Worker .purgem vertfetch_noclamp 235*e1eccf28SAndroid Build Coastguard Worker 236*e1eccf28SAndroid Build Coastguard Worker 2: vqrshrn.u32 d20, q12, #16 - FRACTION_BITS 237*e1eccf28SAndroid Build Coastguard Worker vqrshrn.u32 d21, q13, #16 - FRACTION_BITS 238*e1eccf28SAndroid Build Coastguard Worker vqrshrn.u32 d22, q14, #16 - FRACTION_BITS 239*e1eccf28SAndroid Build Coastguard Worker vqrshrn.u32 d23, q15, #16 - FRACTION_BITS 240*e1eccf28SAndroid Build Coastguard Worker.endm /*}}}*/ 241*e1eccf28SAndroid Build Coastguard Worker 242*e1eccf28SAndroid Build Coastguard Worker/* Some portion of the convolution window (as much as will fit, and all of it 243*e1eccf28SAndroid Build Coastguard Worker * for the uchar1 cases) is kept in the register file to avoid unnecessary 244*e1eccf28SAndroid Build Coastguard Worker * memory accesses. This forces the horizontal loops to be unrolled because 245*e1eccf28SAndroid Build Coastguard Worker * there's no indexed addressing into the register file. 246*e1eccf28SAndroid Build Coastguard Worker * 247*e1eccf28SAndroid Build Coastguard Worker * As in the fetch macro, the operations are ordered from outside to inside, so 248*e1eccf28SAndroid Build Coastguard Worker * that jumping into the middle of the block bypasses the unwanted window taps. 249*e1eccf28SAndroid Build Coastguard Worker * 250*e1eccf28SAndroid Build Coastguard Worker * There are several variants of the macro because of the fixed offets of the 251*e1eccf28SAndroid Build Coastguard Worker * taps -- the wider the maximum radius the further the centre tap is from the 252*e1eccf28SAndroid Build Coastguard Worker * most recently fetched data. This means that pre-filling the window requires 253*e1eccf28SAndroid Build Coastguard Worker * more data that won't be used and it means that rotating the window involves 254*e1eccf28SAndroid Build Coastguard Worker * more mov operations. 255*e1eccf28SAndroid Build Coastguard Worker * 256*e1eccf28SAndroid Build Coastguard Worker * When the buffer gets too big the buffer at [r9] is used. 257*e1eccf28SAndroid Build Coastguard Worker * 258*e1eccf28SAndroid Build Coastguard Worker * Input: 259*e1eccf28SAndroid Build Coastguard Worker * q4-q11 -- convoltion window 260*e1eccf28SAndroid Build Coastguard Worker * r9 -- pointer to additional convolution window data 261*e1eccf28SAndroid Build Coastguard Worker * Output: 262*e1eccf28SAndroid Build Coastguard Worker * r9 -- updated buffer pointer (if used) 263*e1eccf28SAndroid Build Coastguard Worker * d31 -- result to be stored 264*e1eccf28SAndroid Build Coastguard Worker * Modifies: 265*e1eccf28SAndroid Build Coastguard Worker * r12 -- temp buffer pointer 266*e1eccf28SAndroid Build Coastguard Worker * q12-q13 -- temporaries for load and vext operations. 267*e1eccf28SAndroid Build Coastguard Worker * q14-q15 -- intermediate sums 268*e1eccf28SAndroid Build Coastguard Worker */ 269*e1eccf28SAndroid Build Coastguard Worker#define TUNED_LIST1 8, 16 270*e1eccf28SAndroid Build Coastguard Worker.macro hconv1_8/*{{{*/ 271*e1eccf28SAndroid Build Coastguard Worker vmull.u16 q14, d18, d0[0] 272*e1eccf28SAndroid Build Coastguard Worker vmull.u16 q15, d19, d0[0] 273*e1eccf28SAndroid Build Coastguard Worker 274*e1eccf28SAndroid Build Coastguard Worker ldr r12, [pc, r5, LSL #2] 275*e1eccf28SAndroid Build Coastguard Worker add pc, pc, r12 276*e1eccf28SAndroid Build Coastguard Worker bkpt 277*e1eccf28SAndroid Build Coastguard Worker 100: .word 101f-100b 278*e1eccf28SAndroid Build Coastguard Worker .word 102f-100b 279*e1eccf28SAndroid Build Coastguard Worker .word 103f-100b 280*e1eccf28SAndroid Build Coastguard Worker .word 104f-100b 281*e1eccf28SAndroid Build Coastguard Worker .word 105f-100b 282*e1eccf28SAndroid Build Coastguard Worker .word 106f-100b 283*e1eccf28SAndroid Build Coastguard Worker .word 107f-100b 284*e1eccf28SAndroid Build Coastguard Worker .word 108f-100b 285*e1eccf28SAndroid Build Coastguard Worker 108: vmlal.u16 q14, d16, d2[0] 286*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d17, d2[0] 287*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d20, d2[0] 288*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d21, d2[0] 289*e1eccf28SAndroid Build Coastguard Worker 107: vext.u16 q12, q8, q9, #1 290*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q9, q10, #7 291*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d1[3] 292*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d1[3] 293*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d1[3] 294*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d1[3] 295*e1eccf28SAndroid Build Coastguard Worker 106: vext.u16 q12, q8, q9, #2 296*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q9, q10, #6 297*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d1[2] 298*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d1[2] 299*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d1[2] 300*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d1[2] 301*e1eccf28SAndroid Build Coastguard Worker 105: vext.u16 q12, q8, q9, #3 302*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q9, q10, #5 303*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d1[1] 304*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d1[1] 305*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d1[1] 306*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d1[1] 307*e1eccf28SAndroid Build Coastguard Worker 104: //vext.u16 q12, q8, q9, #4 308*e1eccf28SAndroid Build Coastguard Worker //vext.u16 q13, q9, q10, #4 309*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d17, d1[0] 310*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d18, d1[0] 311*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d19, d1[0] 312*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d20, d1[0] 313*e1eccf28SAndroid Build Coastguard Worker 103: vext.u16 q12, q8, q9, #5 314*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q9, q10, #3 315*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d0[3] 316*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d0[3] 317*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d0[3] 318*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d0[3] 319*e1eccf28SAndroid Build Coastguard Worker 102: vext.u16 q12, q8, q9, #6 320*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q9, q10, #2 321*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d0[2] 322*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d0[2] 323*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d0[2] 324*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d0[2] 325*e1eccf28SAndroid Build Coastguard Worker 101: vext.u16 q12, q8, q9, #7 326*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q9, q10, #1 327*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d0[1] 328*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d0[1] 329*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d0[1] 330*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d0[1] 331*e1eccf28SAndroid Build Coastguard Worker 332*e1eccf28SAndroid Build Coastguard Worker vqrshrn.u32 d28, q14, #16 333*e1eccf28SAndroid Build Coastguard Worker vqrshrn.u32 d29, q15, #16 334*e1eccf28SAndroid Build Coastguard Worker vqrshrn.u16 d31, q14, #FRACTION_BITS 335*e1eccf28SAndroid Build Coastguard Worker 336*e1eccf28SAndroid Build Coastguard Worker vmov q8, q9 337*e1eccf28SAndroid Build Coastguard Worker vmov q9, q10 338*e1eccf28SAndroid Build Coastguard Worker vmov q10, q11 339*e1eccf28SAndroid Build Coastguard Worker.endm/*}}}*/ 340*e1eccf28SAndroid Build Coastguard Worker 341*e1eccf28SAndroid Build Coastguard Worker.macro hconv1_16/*{{{*/ 342*e1eccf28SAndroid Build Coastguard Worker vmull.u16 q14, d16, d0[0] 343*e1eccf28SAndroid Build Coastguard Worker vmull.u16 q15, d17, d0[0] 344*e1eccf28SAndroid Build Coastguard Worker 345*e1eccf28SAndroid Build Coastguard Worker ldr r12, [pc, r5, LSL #2] 346*e1eccf28SAndroid Build Coastguard Worker add pc, pc, r12 347*e1eccf28SAndroid Build Coastguard Worker bkpt 348*e1eccf28SAndroid Build Coastguard Worker 100: .word 101f-100b 349*e1eccf28SAndroid Build Coastguard Worker .word 102f-100b 350*e1eccf28SAndroid Build Coastguard Worker .word 103f-100b 351*e1eccf28SAndroid Build Coastguard Worker .word 104f-100b 352*e1eccf28SAndroid Build Coastguard Worker .word 105f-100b 353*e1eccf28SAndroid Build Coastguard Worker .word 106f-100b 354*e1eccf28SAndroid Build Coastguard Worker .word 107f-100b 355*e1eccf28SAndroid Build Coastguard Worker .word 108f-100b 356*e1eccf28SAndroid Build Coastguard Worker .word 109f-100b 357*e1eccf28SAndroid Build Coastguard Worker .word 110f-100b 358*e1eccf28SAndroid Build Coastguard Worker .word 111f-100b 359*e1eccf28SAndroid Build Coastguard Worker .word 112f-100b 360*e1eccf28SAndroid Build Coastguard Worker .word 113f-100b 361*e1eccf28SAndroid Build Coastguard Worker .word 114f-100b 362*e1eccf28SAndroid Build Coastguard Worker .word 115f-100b 363*e1eccf28SAndroid Build Coastguard Worker .word 116f-100b 364*e1eccf28SAndroid Build Coastguard Worker 116: //vext.u16 q12, q6, q7, #0 365*e1eccf28SAndroid Build Coastguard Worker //vext.u16 q13, q10, q11, #0 366*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d12, d4[0] 367*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d13, d4[0] 368*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d20, d4[0] 369*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d21, d4[0] 370*e1eccf28SAndroid Build Coastguard Worker 115: vext.u16 q12, q6, q7, #1 371*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q9, q10, #7 372*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d3[3] 373*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d3[3] 374*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d3[3] 375*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d3[3] 376*e1eccf28SAndroid Build Coastguard Worker 114: vext.u16 q12, q6, q7, #2 377*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q9, q10, #6 378*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d3[2] 379*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d3[2] 380*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d3[2] 381*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d3[2] 382*e1eccf28SAndroid Build Coastguard Worker 113: vext.u16 q12, q6, q7, #3 383*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q9, q10, #5 384*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d3[1] 385*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d3[1] 386*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d3[1] 387*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d3[1] 388*e1eccf28SAndroid Build Coastguard Worker 112: //vext.u16 q12, q6, q7, #4 389*e1eccf28SAndroid Build Coastguard Worker //vext.u16 q13, q9, q10, #4 390*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d13, d3[0] 391*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d14, d3[0] 392*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d19, d3[0] 393*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d20, d3[0] 394*e1eccf28SAndroid Build Coastguard Worker 111: vext.u16 q12, q6, q7, #5 395*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q9, q10, #3 396*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d2[3] 397*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d2[3] 398*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d2[3] 399*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d2[3] 400*e1eccf28SAndroid Build Coastguard Worker 110: vext.u16 q12, q6, q7, #6 401*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q9, q10, #2 402*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d2[2] 403*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d2[2] 404*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d2[2] 405*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d2[2] 406*e1eccf28SAndroid Build Coastguard Worker 109: vext.u16 q12, q6, q7, #7 407*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q9, q10, #1 408*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d2[1] 409*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d2[1] 410*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d2[1] 411*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d2[1] 412*e1eccf28SAndroid Build Coastguard Worker 108: //vext.u16 q12, q7, q8, #0 413*e1eccf28SAndroid Build Coastguard Worker //vext.u16 q13, q9, q10, #0 414*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d14, d2[0] 415*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d15, d2[0] 416*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d18, d2[0] 417*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d19, d2[0] 418*e1eccf28SAndroid Build Coastguard Worker 107: vext.u16 q12, q7, q8, #1 419*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q8, q9, #7 420*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d1[3] 421*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d1[3] 422*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d1[3] 423*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d1[3] 424*e1eccf28SAndroid Build Coastguard Worker 106: vext.u16 q12, q7, q8, #2 425*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q8, q9, #6 426*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d1[2] 427*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d1[2] 428*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d1[2] 429*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d1[2] 430*e1eccf28SAndroid Build Coastguard Worker 105: vext.u16 q12, q7, q8, #3 431*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q8, q9, #5 432*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d1[1] 433*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d1[1] 434*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d1[1] 435*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d1[1] 436*e1eccf28SAndroid Build Coastguard Worker 104: //vext.u16 q12, q7, q8, #4 437*e1eccf28SAndroid Build Coastguard Worker //vext.u16 q13, q8, q9, #4 438*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d15, d1[0] 439*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d16, d1[0] 440*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d17, d1[0] 441*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d18, d1[0] 442*e1eccf28SAndroid Build Coastguard Worker 103: vext.u16 q12, q7, q8, #5 443*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q8, q9, #3 444*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d0[3] 445*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d0[3] 446*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d0[3] 447*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d0[3] 448*e1eccf28SAndroid Build Coastguard Worker 102: vext.u16 q12, q7, q8, #6 449*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q8, q9, #2 450*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d0[2] 451*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d0[2] 452*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d0[2] 453*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d0[2] 454*e1eccf28SAndroid Build Coastguard Worker 101: vext.u16 q12, q7, q8, #7 455*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q8, q9, #1 456*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d0[1] 457*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d0[1] 458*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d0[1] 459*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d0[1] 460*e1eccf28SAndroid Build Coastguard Worker 461*e1eccf28SAndroid Build Coastguard Worker vqrshrn.u32 d28, q14, #16 462*e1eccf28SAndroid Build Coastguard Worker vqrshrn.u32 d29, q15, #16 463*e1eccf28SAndroid Build Coastguard Worker vqrshrn.u16 d31, q14, #FRACTION_BITS 464*e1eccf28SAndroid Build Coastguard Worker 465*e1eccf28SAndroid Build Coastguard Worker vmov q6, q7 466*e1eccf28SAndroid Build Coastguard Worker vmov q7, q8 467*e1eccf28SAndroid Build Coastguard Worker vmov q8, q9 468*e1eccf28SAndroid Build Coastguard Worker vmov q9, q10 469*e1eccf28SAndroid Build Coastguard Worker vmov q10, q11 470*e1eccf28SAndroid Build Coastguard Worker.endm/*}}}*/ 471*e1eccf28SAndroid Build Coastguard Worker 472*e1eccf28SAndroid Build Coastguard Worker.macro hconv1_25/*{{{*/ 473*e1eccf28SAndroid Build Coastguard Worker vext.u16 q12, q6, q7, #7 474*e1eccf28SAndroid Build Coastguard Worker vmull.u16 q14, d24, d0[0] 475*e1eccf28SAndroid Build Coastguard Worker vmull.u16 q15, d25, d0[0] 476*e1eccf28SAndroid Build Coastguard Worker 477*e1eccf28SAndroid Build Coastguard Worker ldr r12, [pc, r5, LSL #2] 478*e1eccf28SAndroid Build Coastguard Worker add pc, pc, r12 479*e1eccf28SAndroid Build Coastguard Worker bkpt 480*e1eccf28SAndroid Build Coastguard Worker 100: .word 101f-100b 481*e1eccf28SAndroid Build Coastguard Worker .word 102f-100b 482*e1eccf28SAndroid Build Coastguard Worker .word 103f-100b 483*e1eccf28SAndroid Build Coastguard Worker .word 104f-100b 484*e1eccf28SAndroid Build Coastguard Worker .word 105f-100b 485*e1eccf28SAndroid Build Coastguard Worker .word 106f-100b 486*e1eccf28SAndroid Build Coastguard Worker .word 107f-100b 487*e1eccf28SAndroid Build Coastguard Worker .word 108f-100b 488*e1eccf28SAndroid Build Coastguard Worker .word 109f-100b 489*e1eccf28SAndroid Build Coastguard Worker .word 110f-100b 490*e1eccf28SAndroid Build Coastguard Worker .word 111f-100b 491*e1eccf28SAndroid Build Coastguard Worker .word 112f-100b 492*e1eccf28SAndroid Build Coastguard Worker .word 113f-100b 493*e1eccf28SAndroid Build Coastguard Worker .word 114f-100b 494*e1eccf28SAndroid Build Coastguard Worker .word 115f-100b 495*e1eccf28SAndroid Build Coastguard Worker .word 116f-100b 496*e1eccf28SAndroid Build Coastguard Worker .word 117f-100b 497*e1eccf28SAndroid Build Coastguard Worker .word 118f-100b 498*e1eccf28SAndroid Build Coastguard Worker .word 119f-100b 499*e1eccf28SAndroid Build Coastguard Worker .word 120f-100b 500*e1eccf28SAndroid Build Coastguard Worker .word 121f-100b 501*e1eccf28SAndroid Build Coastguard Worker .word 122f-100b 502*e1eccf28SAndroid Build Coastguard Worker .word 123f-100b 503*e1eccf28SAndroid Build Coastguard Worker .word 124f-100b 504*e1eccf28SAndroid Build Coastguard Worker .word 125f-100b 505*e1eccf28SAndroid Build Coastguard Worker 125: vext.u16 q12, q3, q4, #6 506*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q10, q11, #0 507*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d6[1] 508*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d6[1] 509*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d6[1] 510*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d6[1] 511*e1eccf28SAndroid Build Coastguard Worker 124: vext.u16 q12, q3, q4, #7 512*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q9, q10, #7 513*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d6[0] 514*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d6[0] 515*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d6[0] 516*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d6[0] 517*e1eccf28SAndroid Build Coastguard Worker 123: vext.u16 q12, q4, q5, #0 518*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q9, q10, #6 519*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d5[3] 520*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d5[3] 521*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d5[3] 522*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d5[3] 523*e1eccf28SAndroid Build Coastguard Worker 122: vext.u16 q12, q4, q5, #1 524*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q9, q10, #5 525*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d5[2] 526*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d5[2] 527*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d5[2] 528*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d5[2] 529*e1eccf28SAndroid Build Coastguard Worker 121: vext.u16 q12, q4, q5, #2 530*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q9, q10, #4 531*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d5[1] 532*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d5[1] 533*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d5[1] 534*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d5[1] 535*e1eccf28SAndroid Build Coastguard Worker 120: vext.u16 q12, q4, q5, #3 536*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q9, q10, #3 537*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d5[0] 538*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d5[0] 539*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d5[0] 540*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d5[0] 541*e1eccf28SAndroid Build Coastguard Worker 119: vext.u16 q12, q4, q5, #4 542*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q9, q10, #2 543*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d4[3] 544*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d4[3] 545*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d4[3] 546*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d4[3] 547*e1eccf28SAndroid Build Coastguard Worker 118: vext.u16 q12, q4, q5, #5 548*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q9, q10, #1 549*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d4[2] 550*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d4[2] 551*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d4[2] 552*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d4[2] 553*e1eccf28SAndroid Build Coastguard Worker 117: vext.u16 q12, q4, q5, #6 554*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q9, q10, #0 555*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d4[1] 556*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d4[1] 557*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d4[1] 558*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d4[1] 559*e1eccf28SAndroid Build Coastguard Worker 116: vext.u16 q12, q4, q5, #7 560*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q8, q9, #7 561*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d4[0] 562*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d4[0] 563*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d4[0] 564*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d4[0] 565*e1eccf28SAndroid Build Coastguard Worker 115: vext.u16 q12, q5, q6, #0 566*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q8, q9, #6 567*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d3[3] 568*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d3[3] 569*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d3[3] 570*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d3[3] 571*e1eccf28SAndroid Build Coastguard Worker 114: vext.u16 q12, q5, q6, #1 572*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q8, q9, #5 573*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d3[2] 574*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d3[2] 575*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d3[2] 576*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d3[2] 577*e1eccf28SAndroid Build Coastguard Worker 113: vext.u16 q12, q5, q6, #2 578*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q8, q9, #4 579*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d3[1] 580*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d3[1] 581*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d3[1] 582*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d3[1] 583*e1eccf28SAndroid Build Coastguard Worker 112: vext.u16 q12, q5, q6, #3 584*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q8, q9, #3 585*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d3[0] 586*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d3[0] 587*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d3[0] 588*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d3[0] 589*e1eccf28SAndroid Build Coastguard Worker 111: vext.u16 q12, q5, q6, #4 590*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q8, q9, #2 591*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d2[3] 592*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d2[3] 593*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d2[3] 594*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d2[3] 595*e1eccf28SAndroid Build Coastguard Worker 110: vext.u16 q12, q5, q6, #5 596*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q8, q9, #1 597*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d2[2] 598*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d2[2] 599*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d2[2] 600*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d2[2] 601*e1eccf28SAndroid Build Coastguard Worker 109: vext.u16 q12, q5, q6, #6 602*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q8, q9, #0 603*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d2[1] 604*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d2[1] 605*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d2[1] 606*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d2[1] 607*e1eccf28SAndroid Build Coastguard Worker 108: vext.u16 q12, q5, q6, #7 608*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q7, q8, #7 609*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d2[0] 610*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d2[0] 611*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d2[0] 612*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d2[0] 613*e1eccf28SAndroid Build Coastguard Worker 107: vext.u16 q12, q6, q7, #0 614*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q7, q8, #6 615*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d1[3] 616*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d1[3] 617*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d1[3] 618*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d1[3] 619*e1eccf28SAndroid Build Coastguard Worker 106: vext.u16 q12, q6, q7, #1 620*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q7, q8, #5 621*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d1[2] 622*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d1[2] 623*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d1[2] 624*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d1[2] 625*e1eccf28SAndroid Build Coastguard Worker 105: vext.u16 q12, q6, q7, #2 626*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q7, q8, #4 627*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d1[1] 628*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d1[1] 629*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d1[1] 630*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d1[1] 631*e1eccf28SAndroid Build Coastguard Worker 104: vext.u16 q12, q6, q7, #3 632*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q7, q8, #3 633*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d1[0] 634*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d1[0] 635*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d1[0] 636*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d1[0] 637*e1eccf28SAndroid Build Coastguard Worker 103: vext.u16 q12, q6, q7, #4 638*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q7, q8, #2 639*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d0[3] 640*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d0[3] 641*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d0[3] 642*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d0[3] 643*e1eccf28SAndroid Build Coastguard Worker 102: vext.u16 q12, q6, q7, #5 644*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q7, q8, #1 645*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d0[2] 646*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d0[2] 647*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d0[2] 648*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d0[2] 649*e1eccf28SAndroid Build Coastguard Worker 101: vext.u16 q12, q6, q7, #6 650*e1eccf28SAndroid Build Coastguard Worker vext.u16 q13, q7, q8, #0 651*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d0[1] 652*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d0[1] 653*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d0[1] 654*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d0[1] 655*e1eccf28SAndroid Build Coastguard Worker 656*e1eccf28SAndroid Build Coastguard Worker vqrshrn.u32 d28, q14, #16 657*e1eccf28SAndroid Build Coastguard Worker vqrshrn.u32 d29, q15, #16 658*e1eccf28SAndroid Build Coastguard Worker vqrshrn.u16 d31, q14, #FRACTION_BITS 659*e1eccf28SAndroid Build Coastguard Worker 660*e1eccf28SAndroid Build Coastguard Worker vmov d7, d9 661*e1eccf28SAndroid Build Coastguard Worker vmov q4, q5 662*e1eccf28SAndroid Build Coastguard Worker vmov q5, q6 663*e1eccf28SAndroid Build Coastguard Worker vmov q6, q7 664*e1eccf28SAndroid Build Coastguard Worker vmov q7, q8 665*e1eccf28SAndroid Build Coastguard Worker vmov q8, q9 666*e1eccf28SAndroid Build Coastguard Worker vmov q9, q10 667*e1eccf28SAndroid Build Coastguard Worker vmov q10, q11 668*e1eccf28SAndroid Build Coastguard Worker.endm/*}}}*/ 669*e1eccf28SAndroid Build Coastguard Worker 670*e1eccf28SAndroid Build Coastguard Worker#define TUNED_LIST4 6, 12 671*e1eccf28SAndroid Build Coastguard Worker.macro hconv4_6/*{{{*/ 672*e1eccf28SAndroid Build Coastguard Worker vmull.u16 q14, d14, d0[0] 673*e1eccf28SAndroid Build Coastguard Worker vmull.u16 q15, d15, d0[0] 674*e1eccf28SAndroid Build Coastguard Worker 675*e1eccf28SAndroid Build Coastguard Worker ldr r12, [pc, r5, LSL #2] 676*e1eccf28SAndroid Build Coastguard Worker add pc, pc, r12 677*e1eccf28SAndroid Build Coastguard Worker bkpt 678*e1eccf28SAndroid Build Coastguard Worker 100: .word 101f-100b 679*e1eccf28SAndroid Build Coastguard Worker .word 102f-100b 680*e1eccf28SAndroid Build Coastguard Worker .word 103f-100b 681*e1eccf28SAndroid Build Coastguard Worker .word 104f-100b 682*e1eccf28SAndroid Build Coastguard Worker .word 105f-100b 683*e1eccf28SAndroid Build Coastguard Worker .word 106f-100b 684*e1eccf28SAndroid Build Coastguard Worker 106: vmlal.u16 q14, d8, d1[2] 685*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d9, d1[2] 686*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d20, d1[2] 687*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d21, d1[2] 688*e1eccf28SAndroid Build Coastguard Worker 105: vmlal.u16 q14, d9, d1[1] 689*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d10, d1[1] 690*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d19, d1[1] 691*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d20, d1[1] 692*e1eccf28SAndroid Build Coastguard Worker 104: vmlal.u16 q14, d10, d1[0] 693*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d11, d1[0] 694*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d18, d1[0] 695*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d19, d1[0] 696*e1eccf28SAndroid Build Coastguard Worker 103: vmlal.u16 q14, d11, d0[3] 697*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d12, d0[3] 698*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d17, d0[3] 699*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d18, d0[3] 700*e1eccf28SAndroid Build Coastguard Worker 102: vmlal.u16 q14, d12, d0[2] 701*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d13, d0[2] 702*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d16, d0[2] 703*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d17, d0[2] 704*e1eccf28SAndroid Build Coastguard Worker 101: vmlal.u16 q14, d13, d0[1] 705*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d14, d0[1] 706*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d15, d0[1] 707*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d16, d0[1] 708*e1eccf28SAndroid Build Coastguard Worker 709*e1eccf28SAndroid Build Coastguard Worker vqrshrn.u32 d28, q14, #16 710*e1eccf28SAndroid Build Coastguard Worker vqrshrn.u32 d29, q15, #16 711*e1eccf28SAndroid Build Coastguard Worker vqrshrn.u16 d31, q14, #FRACTION_BITS 712*e1eccf28SAndroid Build Coastguard Worker 713*e1eccf28SAndroid Build Coastguard Worker vmov q4, q5 714*e1eccf28SAndroid Build Coastguard Worker vmov q5, q6 715*e1eccf28SAndroid Build Coastguard Worker vmov q6, q7 716*e1eccf28SAndroid Build Coastguard Worker vmov q7, q8 717*e1eccf28SAndroid Build Coastguard Worker vmov q8, q9 718*e1eccf28SAndroid Build Coastguard Worker vmov q9, q10 719*e1eccf28SAndroid Build Coastguard Worker vmov q10, q11 720*e1eccf28SAndroid Build Coastguard Worker.endm/*}}}*/ 721*e1eccf28SAndroid Build Coastguard Worker 722*e1eccf28SAndroid Build Coastguard Worker.macro hconv4_12/*{{{*/ 723*e1eccf28SAndroid Build Coastguard Worker vmull.u16 q14, d8, d0[0] 724*e1eccf28SAndroid Build Coastguard Worker vmull.u16 q15, d9, d0[0] 725*e1eccf28SAndroid Build Coastguard Worker 726*e1eccf28SAndroid Build Coastguard Worker ldr r12, [pc, r5, LSL #2] 727*e1eccf28SAndroid Build Coastguard Worker add pc, pc, r12 728*e1eccf28SAndroid Build Coastguard Worker bkpt 729*e1eccf28SAndroid Build Coastguard Worker 100: .word 101f-100b 730*e1eccf28SAndroid Build Coastguard Worker .word 102f-100b 731*e1eccf28SAndroid Build Coastguard Worker .word 103f-100b 732*e1eccf28SAndroid Build Coastguard Worker .word 104f-100b 733*e1eccf28SAndroid Build Coastguard Worker .word 105f-100b 734*e1eccf28SAndroid Build Coastguard Worker .word 106f-100b 735*e1eccf28SAndroid Build Coastguard Worker .word 107f-100b 736*e1eccf28SAndroid Build Coastguard Worker .word 108f-100b 737*e1eccf28SAndroid Build Coastguard Worker .word 109f-100b 738*e1eccf28SAndroid Build Coastguard Worker .word 110f-100b 739*e1eccf28SAndroid Build Coastguard Worker .word 111f-100b 740*e1eccf28SAndroid Build Coastguard Worker .word 112f-100b 741*e1eccf28SAndroid Build Coastguard Worker 112: add r12, r9, #0x1a0 742*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 743*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24,d25}, [r12:128] 744*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d3[0] 745*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d3[0] 746*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d20, d3[0] 747*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d21, d3[0] 748*e1eccf28SAndroid Build Coastguard Worker 111: add r12, r9, #0x1a8 749*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 750*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24}, [r12:64]! 751*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 752*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d25}, [r12:64] 753*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d2[3] 754*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d2[3] 755*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d19, d2[3] 756*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d20, d2[3] 757*e1eccf28SAndroid Build Coastguard Worker 110: add r12, r9, #0x1b0 758*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 759*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24,d25}, [r12:128] 760*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d2[2] 761*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d2[2] 762*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d18, d2[2] 763*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d19, d2[2] 764*e1eccf28SAndroid Build Coastguard Worker 109: add r12, r9, #0x1b8 765*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 766*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24}, [r12:64]! 767*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 768*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d25}, [r12:64] 769*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d2[1] 770*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d2[1] 771*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d17, d2[1] 772*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d18, d2[1] 773*e1eccf28SAndroid Build Coastguard Worker 108: add r12, r9, #0x1c0 774*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 775*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24,d25}, [r12:128] 776*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d2[0] 777*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d2[0] 778*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d16, d2[0] 779*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d17, d2[0] 780*e1eccf28SAndroid Build Coastguard Worker 107: add r12, r9, #0x1c8 781*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 782*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24}, [r12:64]! 783*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 784*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d25}, [r12:64] 785*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d1[3] 786*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d1[3] 787*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d15, d1[3] 788*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d16, d1[3] 789*e1eccf28SAndroid Build Coastguard Worker 106: add r12, r9, #0x1d0 790*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 791*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24,d25}, [r12:128] 792*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d1[2] 793*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d1[2] 794*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d14, d1[2] 795*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d15, d1[2] 796*e1eccf28SAndroid Build Coastguard Worker 105: add r12, r9, #0x1d8 797*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 798*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24}, [r12:64]! 799*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 800*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d25}, [r12:64] 801*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d1[1] 802*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d1[1] 803*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d13, d1[1] 804*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d14, d1[1] 805*e1eccf28SAndroid Build Coastguard Worker 104: add r12, r9, #0x1e0 806*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 807*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24,d25}, [r12:128] 808*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d1[0] 809*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d1[0] 810*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d12, d1[0] 811*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d13, d1[0] 812*e1eccf28SAndroid Build Coastguard Worker 103: add r12, r9, #0x1e8 813*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 814*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24}, [r12:64]! 815*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 816*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d25}, [r12:64] 817*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d0[3] 818*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d0[3] 819*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d11, d0[3] 820*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d12, d0[3] 821*e1eccf28SAndroid Build Coastguard Worker 102: add r12, r9, #0x1f0 822*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 823*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24,d25}, [r12:128] 824*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d0[2] 825*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d0[2] 826*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d10, d0[2] 827*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d11, d0[2] 828*e1eccf28SAndroid Build Coastguard Worker 101: add r12, r9, #0x1f8 829*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 830*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24}, [r12:64] 831*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d0[1] 832*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d8, d0[1] 833*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d9, d0[1] 834*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d10, d0[1] 835*e1eccf28SAndroid Build Coastguard Worker 836*e1eccf28SAndroid Build Coastguard Worker vqrshrn.u32 d28, q14, #16 837*e1eccf28SAndroid Build Coastguard Worker vqrshrn.u32 d29, q15, #16 838*e1eccf28SAndroid Build Coastguard Worker vqrshrn.u16 d31, q14, #FRACTION_BITS 839*e1eccf28SAndroid Build Coastguard Worker 840*e1eccf28SAndroid Build Coastguard Worker vst1.u8 {q4}, [r9:128]! 841*e1eccf28SAndroid Build Coastguard Worker bic r9, r9, #0x200 842*e1eccf28SAndroid Build Coastguard Worker vmov q4, q5 843*e1eccf28SAndroid Build Coastguard Worker vmov q5, q6 844*e1eccf28SAndroid Build Coastguard Worker vmov q6, q7 845*e1eccf28SAndroid Build Coastguard Worker vmov q7, q8 846*e1eccf28SAndroid Build Coastguard Worker vmov q8, q9 847*e1eccf28SAndroid Build Coastguard Worker vmov q9, q10 848*e1eccf28SAndroid Build Coastguard Worker vmov q10, q11 849*e1eccf28SAndroid Build Coastguard Worker.endm/*}}}*/ 850*e1eccf28SAndroid Build Coastguard Worker 851*e1eccf28SAndroid Build Coastguard Worker.macro hconv4_25/*{{{*/ 852*e1eccf28SAndroid Build Coastguard Worker add r12, r9, #0x198 853*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 854*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24}, [r12:64]! 855*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 856*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d25}, [r12:64] 857*e1eccf28SAndroid Build Coastguard Worker vmull.u16 q14, d24, d0[0] 858*e1eccf28SAndroid Build Coastguard Worker vmull.u16 q15, d25, d0[0] 859*e1eccf28SAndroid Build Coastguard Worker 860*e1eccf28SAndroid Build Coastguard Worker ldr r12, [pc, r5, LSL #2] 861*e1eccf28SAndroid Build Coastguard Worker add pc, pc, r12 862*e1eccf28SAndroid Build Coastguard Worker bkpt 863*e1eccf28SAndroid Build Coastguard Worker 100: .word 101f-100b 864*e1eccf28SAndroid Build Coastguard Worker .word 102f-100b 865*e1eccf28SAndroid Build Coastguard Worker .word 103f-100b 866*e1eccf28SAndroid Build Coastguard Worker .word 104f-100b 867*e1eccf28SAndroid Build Coastguard Worker .word 105f-100b 868*e1eccf28SAndroid Build Coastguard Worker .word 106f-100b 869*e1eccf28SAndroid Build Coastguard Worker .word 107f-100b 870*e1eccf28SAndroid Build Coastguard Worker .word 108f-100b 871*e1eccf28SAndroid Build Coastguard Worker .word 109f-100b 872*e1eccf28SAndroid Build Coastguard Worker .word 110f-100b 873*e1eccf28SAndroid Build Coastguard Worker .word 111f-100b 874*e1eccf28SAndroid Build Coastguard Worker .word 112f-100b 875*e1eccf28SAndroid Build Coastguard Worker .word 113f-100b 876*e1eccf28SAndroid Build Coastguard Worker .word 114f-100b 877*e1eccf28SAndroid Build Coastguard Worker .word 115f-100b 878*e1eccf28SAndroid Build Coastguard Worker .word 116f-100b 879*e1eccf28SAndroid Build Coastguard Worker .word 117f-100b 880*e1eccf28SAndroid Build Coastguard Worker .word 118f-100b 881*e1eccf28SAndroid Build Coastguard Worker .word 119f-100b 882*e1eccf28SAndroid Build Coastguard Worker .word 120f-100b 883*e1eccf28SAndroid Build Coastguard Worker .word 121f-100b 884*e1eccf28SAndroid Build Coastguard Worker .word 122f-100b 885*e1eccf28SAndroid Build Coastguard Worker .word 123f-100b 886*e1eccf28SAndroid Build Coastguard Worker .word 124f-100b 887*e1eccf28SAndroid Build Coastguard Worker .word 125f-100b 888*e1eccf28SAndroid Build Coastguard Worker 125: add r12, r9, #0x0d0 889*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 890*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24,d25}, [r12:128] 891*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d6[1] 892*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d6[1] 893*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d20, d6[1] 894*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d21, d6[1] 895*e1eccf28SAndroid Build Coastguard Worker 124: add r12, r9, #0x0d8 896*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 897*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24}, [r12:64]! 898*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 899*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d25}, [r12] 900*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d6[0] 901*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d6[0] 902*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d19, d6[0] 903*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d20, d6[0] 904*e1eccf28SAndroid Build Coastguard Worker 123: add r12, r9, #0x0e0 905*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 906*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24,d25}, [r12:128] 907*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d5[3] 908*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d5[3] 909*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d18, d5[3] 910*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d19, d5[3] 911*e1eccf28SAndroid Build Coastguard Worker 122: add r12, r9, #0x0e8 912*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 913*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24}, [r12:64]! 914*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 915*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d25}, [r12] 916*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d5[2] 917*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d5[2] 918*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d17, d5[2] 919*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d18, d5[2] 920*e1eccf28SAndroid Build Coastguard Worker 121: add r12, r9, #0x0f0 921*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 922*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24,d25}, [r12:128] 923*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d5[1] 924*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d5[1] 925*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d16, d5[1] 926*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d17, d5[1] 927*e1eccf28SAndroid Build Coastguard Worker 120: add r12, r9, #0x0f8 928*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 929*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24}, [r12:64]! 930*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 931*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d25}, [r12] 932*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d5[0] 933*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d5[0] 934*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d15, d5[0] 935*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d16, d5[0] 936*e1eccf28SAndroid Build Coastguard Worker 119: add r12, r9, #0x100 937*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 938*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24,d25}, [r12:128] 939*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d4[3] 940*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d4[3] 941*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d14, d4[3] 942*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d15, d4[3] 943*e1eccf28SAndroid Build Coastguard Worker 118: add r12, r9, #0x108 944*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 945*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24}, [r12:64]! 946*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 947*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d25}, [r12] 948*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d4[2] 949*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d4[2] 950*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d13, d4[2] 951*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d14, d4[2] 952*e1eccf28SAndroid Build Coastguard Worker 117: add r12, r9, #0x110 953*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 954*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24,d25}, [r12:128] 955*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d4[1] 956*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d4[1] 957*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d12, d4[1] 958*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d13, d4[1] 959*e1eccf28SAndroid Build Coastguard Worker 116: add r12, r9, #0x118 960*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 961*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24}, [r12:64]! 962*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 963*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d25}, [r12] 964*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d4[0] 965*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d4[0] 966*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d11, d4[0] 967*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d12, d4[0] 968*e1eccf28SAndroid Build Coastguard Worker 115: add r12, r9, #0x120 969*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 970*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24,d25}, [r12:128] 971*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d3[3] 972*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d3[3] 973*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d10, d3[3] 974*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d11, d3[3] 975*e1eccf28SAndroid Build Coastguard Worker 114: add r12, r9, #0x128 976*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 977*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24}, [r12:64]! 978*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 979*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d25}, [r12] 980*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d3[2] 981*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d3[2] 982*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d9, d3[2] 983*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d10, d3[2] 984*e1eccf28SAndroid Build Coastguard Worker 113: add r12, r9, #0x130 985*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 986*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24,d25}, [r12:128] 987*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d3[1] 988*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d3[1] 989*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d8, d3[1] 990*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d9, d3[1] 991*e1eccf28SAndroid Build Coastguard Worker 112: add r12, r9, #0x138 992*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 993*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24}, [r12:64]! 994*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 995*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d25}, [r12] 996*e1eccf28SAndroid Build Coastguard Worker add r12, r9, #0x1f8 997*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 998*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d26}, [r12:64] 999*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d3[0] 1000*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d3[0] 1001*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d3[0] @ Could be d7, without the load, right? 1002*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d8, d3[0] 1003*e1eccf28SAndroid Build Coastguard Worker 111: add r12, r9, #0x140 1004*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1005*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24,d25}, [r12:128] 1006*e1eccf28SAndroid Build Coastguard Worker add r12, r9, #0x1f0 1007*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1008*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d26,d27}, [r12:128] 1009*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d2[3] 1010*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d2[3] 1011*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d2[3] 1012*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d2[3] 1013*e1eccf28SAndroid Build Coastguard Worker 110: add r12, r9, #0x148 1014*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1015*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24}, [r12:64]! 1016*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1017*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d25}, [r12] 1018*e1eccf28SAndroid Build Coastguard Worker add r12, r9, #0x1e8 1019*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1020*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d26}, [r12:64]! 1021*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1022*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d27}, [r12:64] 1023*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d2[2] 1024*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d2[2] 1025*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d2[2] 1026*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d2[2] 1027*e1eccf28SAndroid Build Coastguard Worker 109: add r12, r9, #0x150 1028*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1029*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24,d25}, [r12:128] 1030*e1eccf28SAndroid Build Coastguard Worker add r12, r9, #0x1e0 1031*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1032*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d26,d27}, [r12:128] 1033*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d2[1] 1034*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d2[1] 1035*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d2[1] 1036*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d2[1] 1037*e1eccf28SAndroid Build Coastguard Worker 108: add r12, r9, #0x158 1038*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1039*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24}, [r12:64]! 1040*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1041*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d25}, [r12] 1042*e1eccf28SAndroid Build Coastguard Worker add r12, r9, #0x1d8 1043*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1044*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d26}, [r12:64]! 1045*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1046*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d27}, [r12:64] 1047*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d2[0] 1048*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d2[0] 1049*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d2[0] 1050*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d2[0] 1051*e1eccf28SAndroid Build Coastguard Worker 107: add r12, r9, #0x160 1052*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1053*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24,d25}, [r12:128] 1054*e1eccf28SAndroid Build Coastguard Worker add r12, r9, #0x1d0 1055*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1056*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d26,d27}, [r12:128] 1057*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d1[3] 1058*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d1[3] 1059*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d1[3] 1060*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d1[3] 1061*e1eccf28SAndroid Build Coastguard Worker 106: add r12, r9, #0x168 1062*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1063*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24}, [r12:64]! 1064*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1065*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d25}, [r12] 1066*e1eccf28SAndroid Build Coastguard Worker add r12, r9, #0x1c8 1067*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1068*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d26}, [r12:64]! 1069*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1070*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d27}, [r12:64] 1071*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d1[2] 1072*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d1[2] 1073*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d1[2] 1074*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d1[2] 1075*e1eccf28SAndroid Build Coastguard Worker 105: add r12, r9, #0x170 1076*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1077*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24,d25}, [r12:128] 1078*e1eccf28SAndroid Build Coastguard Worker add r12, r9, #0x1c0 1079*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1080*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d26,d27}, [r12:128] 1081*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d1[1] 1082*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d1[1] 1083*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d1[1] 1084*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d1[1] 1085*e1eccf28SAndroid Build Coastguard Worker 104: add r12, r9, #0x178 1086*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1087*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24}, [r12:64]! 1088*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1089*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d25}, [r12] 1090*e1eccf28SAndroid Build Coastguard Worker add r12, r9, #0x1b8 1091*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1092*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d26}, [r12:64]! 1093*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1094*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d27}, [r12:64] 1095*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d1[0] 1096*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d1[0] 1097*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d1[0] 1098*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d1[0] 1099*e1eccf28SAndroid Build Coastguard Worker 103: add r12, r9, #0x180 1100*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1101*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24,d25}, [r12:128] 1102*e1eccf28SAndroid Build Coastguard Worker add r12, r9, #0x1b0 1103*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1104*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d26,d27}, [r12:128] 1105*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d0[3] 1106*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d0[3] 1107*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d0[3] 1108*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d0[3] 1109*e1eccf28SAndroid Build Coastguard Worker 102: add r12, r9, #0x188 1110*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1111*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24}, [r12:64]! 1112*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1113*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d25}, [r12] 1114*e1eccf28SAndroid Build Coastguard Worker add r12, r9, #0x1a8 1115*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1116*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d26}, [r12:64]! 1117*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1118*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d27}, [r12:64] 1119*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d0[2] 1120*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d0[2] 1121*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d0[2] 1122*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d0[2] 1123*e1eccf28SAndroid Build Coastguard Worker 101: add r12, r9, #0x190 1124*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1125*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24,d25}, [r12:128]! 1126*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #0x200 1127*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d26,d27}, [r12:128] 1128*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d24, d0[1] 1129*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d25, d0[1] 1130*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q14, d26, d0[1] 1131*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q15, d27, d0[1] 1132*e1eccf28SAndroid Build Coastguard Worker 1133*e1eccf28SAndroid Build Coastguard Worker vqrshrn.u32 d28, q14, #16 1134*e1eccf28SAndroid Build Coastguard Worker vqrshrn.u32 d29, q15, #16 1135*e1eccf28SAndroid Build Coastguard Worker vqrshrn.u16 d31, q14, #FRACTION_BITS 1136*e1eccf28SAndroid Build Coastguard Worker 1137*e1eccf28SAndroid Build Coastguard Worker vst1.u8 {q4}, [r9:128]! 1138*e1eccf28SAndroid Build Coastguard Worker bic r9, r9, #0x200 1139*e1eccf28SAndroid Build Coastguard Worker vmov q4, q5 1140*e1eccf28SAndroid Build Coastguard Worker vmov q5, q6 1141*e1eccf28SAndroid Build Coastguard Worker vmov q6, q7 1142*e1eccf28SAndroid Build Coastguard Worker vmov q7, q8 1143*e1eccf28SAndroid Build Coastguard Worker vmov q8, q9 1144*e1eccf28SAndroid Build Coastguard Worker vmov q9, q10 1145*e1eccf28SAndroid Build Coastguard Worker vmov q10, q11 1146*e1eccf28SAndroid Build Coastguard Worker.endm/*}}}*/ 1147*e1eccf28SAndroid Build Coastguard Worker 1148*e1eccf28SAndroid Build Coastguard Worker/* Dedicated function wrapper for the fetch macro, for the cases where 1149*e1eccf28SAndroid Build Coastguard Worker * performance isn't that important, to keep code size down. 1150*e1eccf28SAndroid Build Coastguard Worker */ 1151*e1eccf28SAndroid Build Coastguard WorkerPRIVATE(fetch_generic_asm) 1152*e1eccf28SAndroid Build Coastguard Worker push {r10,r11} 1153*e1eccf28SAndroid Build Coastguard Worker fetch 1154*e1eccf28SAndroid Build Coastguard Worker pop {r10,r11} 1155*e1eccf28SAndroid Build Coastguard Worker bx lr 1156*e1eccf28SAndroid Build Coastguard WorkerEND(fetch_generic_asm) 1157*e1eccf28SAndroid Build Coastguard Worker 1158*e1eccf28SAndroid Build Coastguard Worker 1159*e1eccf28SAndroid Build Coastguard Worker/* Fetch the next (16 - (r10 & 15)) columns of data, avoiding reading memory 1160*e1eccf28SAndroid Build Coastguard Worker * beyond that limit, and filling the rest of the vector with the last legal 1161*e1eccf28SAndroid Build Coastguard Worker * pixel. 1162*e1eccf28SAndroid Build Coastguard Worker * Result is in q10 and q11. q8 and q9 are filled with the first legal pixel. 1163*e1eccf28SAndroid Build Coastguard Worker * Note: This function can read beyond the right edge of input if the image is 1164*e1eccf28SAndroid Build Coastguard Worker * narrower than 16 bytes. 1165*e1eccf28SAndroid Build Coastguard Worker */ 1166*e1eccf28SAndroid Build Coastguard WorkerPRIVATE(fetch_clampleft1) 1167*e1eccf28SAndroid Build Coastguard Worker push {r12,lr} 1168*e1eccf28SAndroid Build Coastguard Worker bl fetch_generic_asm 1169*e1eccf28SAndroid Build Coastguard Worker vdup.u16 q8, d20[0] 1170*e1eccf28SAndroid Build Coastguard Worker vdup.u16 q9, d20[0] 1171*e1eccf28SAndroid Build Coastguard Worker ands r12, r10, #15 1172*e1eccf28SAndroid Build Coastguard Worker beq 1f 1173*e1eccf28SAndroid Build Coastguard Worker sub r1, r1, r12 1174*e1eccf28SAndroid Build Coastguard Worker sub r10, r10, r12 1175*e1eccf28SAndroid Build Coastguard Worker sub sp, sp, #32 1176*e1eccf28SAndroid Build Coastguard Worker vst1.u16 {q10,q11}, [sp] 1177*e1eccf28SAndroid Build Coastguard Worker sub r12, sp, r12, LSL #1 1178*e1eccf28SAndroid Build Coastguard Worker sub sp, sp, #32 1179*e1eccf28SAndroid Build Coastguard Worker vst1.u16 {q8,q9}, [sp] 1180*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {q10,q11}, [r12] 1181*e1eccf28SAndroid Build Coastguard Worker add sp, sp, #64 1182*e1eccf28SAndroid Build Coastguard Worker1: pop {r12,pc} 1183*e1eccf28SAndroid Build Coastguard WorkerEND(fetch_clampleft1) 1184*e1eccf28SAndroid Build Coastguard Worker 1185*e1eccf28SAndroid Build Coastguard WorkerPRIVATE(fetch_clampleft4) 1186*e1eccf28SAndroid Build Coastguard Worker push {r12,lr} 1187*e1eccf28SAndroid Build Coastguard Worker bl fetch_generic_asm 1188*e1eccf28SAndroid Build Coastguard Worker vmov.u16 d16, d20 1189*e1eccf28SAndroid Build Coastguard Worker vmov.u16 d17, d20 1190*e1eccf28SAndroid Build Coastguard Worker vmov.u16 d18, d20 1191*e1eccf28SAndroid Build Coastguard Worker vmov.u16 d19, d20 1192*e1eccf28SAndroid Build Coastguard Worker ands r12, r10, #15 1193*e1eccf28SAndroid Build Coastguard Worker beq 1f 1194*e1eccf28SAndroid Build Coastguard Worker sub r1, r1, r12 1195*e1eccf28SAndroid Build Coastguard Worker sub r10, r10, r12 1196*e1eccf28SAndroid Build Coastguard Worker sub sp, sp, #32 1197*e1eccf28SAndroid Build Coastguard Worker vst1.u16 {q10-q11}, [sp] 1198*e1eccf28SAndroid Build Coastguard Worker sub r12, sp, r12, LSL #1 1199*e1eccf28SAndroid Build Coastguard Worker sub sp, sp, #32 1200*e1eccf28SAndroid Build Coastguard Worker vst1.u16 {q8,q9}, [sp] 1201*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {q10,q11}, [r12] 1202*e1eccf28SAndroid Build Coastguard Worker add sp, sp, #64 1203*e1eccf28SAndroid Build Coastguard Worker1: pop {r12,pc} 1204*e1eccf28SAndroid Build Coastguard WorkerEND(fetch_clampleft4) 1205*e1eccf28SAndroid Build Coastguard Worker 1206*e1eccf28SAndroid Build Coastguard Worker/* Fetch only the next (r11 & 15) (where 0 means 16) columns of data, avoiding 1207*e1eccf28SAndroid Build Coastguard Worker * reading memory beyond that limit, and filling the rest of the vector with 1208*e1eccf28SAndroid Build Coastguard Worker * the last legal pixel. 1209*e1eccf28SAndroid Build Coastguard Worker * Result is in q10 and q11. q12 and q13 are filled with the last legal pixel. 1210*e1eccf28SAndroid Build Coastguard Worker * Note: This function can read beyond the left edge of input if the image is 1211*e1eccf28SAndroid Build Coastguard Worker * narrower than 16 bytes. 1212*e1eccf28SAndroid Build Coastguard Worker */ 1213*e1eccf28SAndroid Build Coastguard WorkerPRIVATE(fetch_clampright1) 1214*e1eccf28SAndroid Build Coastguard Worker push {r12, lr} 1215*e1eccf28SAndroid Build Coastguard Worker rsb r12, r11, #0 1216*e1eccf28SAndroid Build Coastguard Worker ands r12, r12, #15 1217*e1eccf28SAndroid Build Coastguard Worker beq 1f 1218*e1eccf28SAndroid Build Coastguard Worker sub r1, r1, r12 1219*e1eccf28SAndroid Build Coastguard Worker bl fetch_generic_asm 1220*e1eccf28SAndroid Build Coastguard Worker vdup.u16 q12, d23[3] 1221*e1eccf28SAndroid Build Coastguard Worker vdup.u16 q13, d23[3] 1222*e1eccf28SAndroid Build Coastguard Worker rsb r12, r11, #0 1223*e1eccf28SAndroid Build Coastguard Worker and r12, r12, #15 1224*e1eccf28SAndroid Build Coastguard Worker sub sp, sp, #32 1225*e1eccf28SAndroid Build Coastguard Worker vst1.u16 {q12,q13}, [sp] 1226*e1eccf28SAndroid Build Coastguard Worker sub sp, sp, #32 1227*e1eccf28SAndroid Build Coastguard Worker add r12, sp, r12, LSL #1 1228*e1eccf28SAndroid Build Coastguard Worker vst1.u16 {q10,q11}, [sp] 1229*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {q10,q11}, [r12] 1230*e1eccf28SAndroid Build Coastguard Worker add sp, sp, #64 1231*e1eccf28SAndroid Build Coastguard Worker pop {r12,pc} 1232*e1eccf28SAndroid Build Coastguard Worker1: bl fetch_generic_asm 1233*e1eccf28SAndroid Build Coastguard Worker vdup.u16 q12, d23[3] 1234*e1eccf28SAndroid Build Coastguard Worker vdup.u16 q13, d23[3] 1235*e1eccf28SAndroid Build Coastguard Worker pop {r12,pc} 1236*e1eccf28SAndroid Build Coastguard WorkerEND(fetch_clampright1) 1237*e1eccf28SAndroid Build Coastguard Worker 1238*e1eccf28SAndroid Build Coastguard WorkerPRIVATE(fetch_clampright4) 1239*e1eccf28SAndroid Build Coastguard Worker push {r12, lr} 1240*e1eccf28SAndroid Build Coastguard Worker rsb r12, r11, #0 1241*e1eccf28SAndroid Build Coastguard Worker ands r12, r12, #15 1242*e1eccf28SAndroid Build Coastguard Worker beq 1f 1243*e1eccf28SAndroid Build Coastguard Worker sub r1, r1, r12 1244*e1eccf28SAndroid Build Coastguard Worker bl fetch_generic_asm 1245*e1eccf28SAndroid Build Coastguard Worker vmov.u16 d24, d23 1246*e1eccf28SAndroid Build Coastguard Worker vmov.u16 d25, d23 1247*e1eccf28SAndroid Build Coastguard Worker vmov.u16 d26, d23 1248*e1eccf28SAndroid Build Coastguard Worker vmov.u16 d27, d23 1249*e1eccf28SAndroid Build Coastguard Worker rsb r12, r11, #0 1250*e1eccf28SAndroid Build Coastguard Worker and r12, r12, #15 1251*e1eccf28SAndroid Build Coastguard Worker sub sp, sp, #32 1252*e1eccf28SAndroid Build Coastguard Worker vst1.u16 {q12-q13}, [sp] 1253*e1eccf28SAndroid Build Coastguard Worker sub sp, sp, #32 1254*e1eccf28SAndroid Build Coastguard Worker add r12, sp, r12, LSL #1 1255*e1eccf28SAndroid Build Coastguard Worker vst1.u16 {q10,q11}, [sp] 1256*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {q10,q11}, [r12] 1257*e1eccf28SAndroid Build Coastguard Worker add sp, sp, #64 1258*e1eccf28SAndroid Build Coastguard Worker pop {r12,pc} 1259*e1eccf28SAndroid Build Coastguard Worker1: bl fetch_generic_asm 1260*e1eccf28SAndroid Build Coastguard Worker vmov.u16 d24, d23 1261*e1eccf28SAndroid Build Coastguard Worker vmov.u16 d25, d23 1262*e1eccf28SAndroid Build Coastguard Worker vmov.u16 d26, d23 1263*e1eccf28SAndroid Build Coastguard Worker vmov.u16 d27, d23 1264*e1eccf28SAndroid Build Coastguard Worker pop {r12,pc} 1265*e1eccf28SAndroid Build Coastguard WorkerEND(fetch_clampright4) 1266*e1eccf28SAndroid Build Coastguard Worker 1267*e1eccf28SAndroid Build Coastguard Worker/* Given values in q10 and q11, and an index in r11, sweep the (r11 & 15)th 1268*e1eccf28SAndroid Build Coastguard Worker * value across to fill the rest of the register pair. Used for filling the 1269*e1eccf28SAndroid Build Coastguard Worker * right hand edge of the window when reading too close to the right hand edge 1270*e1eccf28SAndroid Build Coastguard Worker * of the image. 1271*e1eccf28SAndroid Build Coastguard Worker * Also returns a dup-ed copy of the last element in q12 for the tail-fill 1272*e1eccf28SAndroid Build Coastguard Worker * case (this happens incidentally in common path, but must be done 1273*e1eccf28SAndroid Build Coastguard Worker * deliberately in the fast-out path). 1274*e1eccf28SAndroid Build Coastguard Worker */ 1275*e1eccf28SAndroid Build Coastguard WorkerPRIVATE(prefill_sweepright1) 1276*e1eccf28SAndroid Build Coastguard Worker ands r12, r11, #15 1277*e1eccf28SAndroid Build Coastguard Worker beq 1f 1278*e1eccf28SAndroid Build Coastguard Worker sub r12, r12, #1 1279*e1eccf28SAndroid Build Coastguard Worker sub sp, sp, #64 1280*e1eccf28SAndroid Build Coastguard Worker vst1.u16 {q10,q11}, [sp] 1281*e1eccf28SAndroid Build Coastguard Worker add r12, sp, r12, LSL #1 1282*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d24[],d25[]}, [r12] 1283*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d26[],d27[]}, [r12] 1284*e1eccf28SAndroid Build Coastguard Worker vst1.u16 {q12,q13}, [r12] 1285*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {q10,q11}, [sp] 1286*e1eccf28SAndroid Build Coastguard Worker add sp, sp, #64 1287*e1eccf28SAndroid Build Coastguard Worker bx lr 1288*e1eccf28SAndroid Build Coastguard Worker1: vdup.u16 q12, d23[3] 1289*e1eccf28SAndroid Build Coastguard Worker vdup.u16 q13, d23[3] 1290*e1eccf28SAndroid Build Coastguard Worker bx lr 1291*e1eccf28SAndroid Build Coastguard WorkerEND(prefill_sweepright1) 1292*e1eccf28SAndroid Build Coastguard Worker 1293*e1eccf28SAndroid Build Coastguard WorkerPRIVATE(prefill_sweepright4) 1294*e1eccf28SAndroid Build Coastguard Worker ands r12, r11, #15 1295*e1eccf28SAndroid Build Coastguard Worker beq 1f 1296*e1eccf28SAndroid Build Coastguard Worker sub r12, r12, #4 1297*e1eccf28SAndroid Build Coastguard Worker sub sp, sp, #64 1298*e1eccf28SAndroid Build Coastguard Worker vst1.u16 {q10,q11}, [sp] 1299*e1eccf28SAndroid Build Coastguard Worker add r12, sp, r12, LSL #1 1300*e1eccf28SAndroid Build Coastguard Worker vld1.u64 {d24}, [r12] 1301*e1eccf28SAndroid Build Coastguard Worker vld1.u64 {d25}, [r12] 1302*e1eccf28SAndroid Build Coastguard Worker vld1.u64 {d26}, [r12] 1303*e1eccf28SAndroid Build Coastguard Worker vld1.u64 {d27}, [r12] 1304*e1eccf28SAndroid Build Coastguard Worker vst1.u16 {q12,q13}, [r12] 1305*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {q10,q11}, [sp] 1306*e1eccf28SAndroid Build Coastguard Worker add sp, sp, #64 1307*e1eccf28SAndroid Build Coastguard Worker bx lr 1308*e1eccf28SAndroid Build Coastguard Worker1: vmov.u16 d24, d23 1309*e1eccf28SAndroid Build Coastguard Worker vmov.u16 d25, d23 1310*e1eccf28SAndroid Build Coastguard Worker vmov.u16 d26, d23 1311*e1eccf28SAndroid Build Coastguard Worker vmov.u16 d27, d23 1312*e1eccf28SAndroid Build Coastguard Worker bx lr 1313*e1eccf28SAndroid Build Coastguard WorkerEND(prefill_sweepright4) 1314*e1eccf28SAndroid Build Coastguard Worker 1315*e1eccf28SAndroid Build Coastguard Worker/* The main loop keeps a sliding window of data that has already been convolved 1316*e1eccf28SAndroid Build Coastguard Worker * in the vertical axis for the current line. This usually stays in the 1317*e1eccf28SAndroid Build Coastguard Worker * register file, but spills to memory for large windows. The first thing that 1318*e1eccf28SAndroid Build Coastguard Worker * needs to be done at start-up is to fill this window with image data, taking 1319*e1eccf28SAndroid Build Coastguard Worker * into account the padding needed if the left or right edges of the image fall 1320*e1eccf28SAndroid Build Coastguard Worker * within this window. 1321*e1eccf28SAndroid Build Coastguard Worker */ 1322*e1eccf28SAndroid Build Coastguard Worker 1323*e1eccf28SAndroid Build Coastguard Worker/* Because the window is in the register file writes to it cannot be indexed 1324*e1eccf28SAndroid Build Coastguard Worker * by another register. Consequently the fill loops are unrolled to address 1325*e1eccf28SAndroid Build Coastguard Worker * the registers directly. This macro distinguishes between writes to the 1326*e1eccf28SAndroid Build Coastguard Worker * register file and writes to the spill buffer (indicated by a destination 1327*e1eccf28SAndroid Build Coastguard Worker * register named xx). 1328*e1eccf28SAndroid Build Coastguard Worker */ 1329*e1eccf28SAndroid Build Coastguard Worker.macro prefill_out ra, rb, sra, srb, srb_hi 1330*e1eccf28SAndroid Build Coastguard Worker .ifc \ra,xx 1331*e1eccf28SAndroid Build Coastguard Worker .ifc \rb,xx 1332*e1eccf28SAndroid Build Coastguard Worker vst1.u16 {\sra,\srb}, [r9:128]! 1333*e1eccf28SAndroid Build Coastguard Worker .else 1334*e1eccf28SAndroid Build Coastguard Worker /* this case is used only for the last tap of uchar1 r=25 */ 1335*e1eccf28SAndroid Build Coastguard Worker /* discard \sra */ 1336*e1eccf28SAndroid Build Coastguard Worker vmov.u16 \rb, \srb_hi 1337*e1eccf28SAndroid Build Coastguard Worker .endif 1338*e1eccf28SAndroid Build Coastguard Worker .else 1339*e1eccf28SAndroid Build Coastguard Worker .ifnc \ra,\sra 1340*e1eccf28SAndroid Build Coastguard Worker vmov.u16 \ra, \sra 1341*e1eccf28SAndroid Build Coastguard Worker .endif 1342*e1eccf28SAndroid Build Coastguard Worker .ifnc \rb,\srb 1343*e1eccf28SAndroid Build Coastguard Worker vmov.u16 \rb, \srb 1344*e1eccf28SAndroid Build Coastguard Worker .endif 1345*e1eccf28SAndroid Build Coastguard Worker .endif 1346*e1eccf28SAndroid Build Coastguard Worker.endm 1347*e1eccf28SAndroid Build Coastguard Worker 1348*e1eccf28SAndroid Build Coastguard Worker/* This macro provides the list of registers representing the window, and the 1349*e1eccf28SAndroid Build Coastguard Worker * cases where the register file is too small and a spill buffer is used 1350*e1eccf28SAndroid Build Coastguard Worker * instead. 1351*e1eccf28SAndroid Build Coastguard Worker * Since several specialisations of each function are generated, this also 1352*e1eccf28SAndroid Build Coastguard Worker * culls superfluous iterations, and sets the variable `i` for subsequent 1353*e1eccf28SAndroid Build Coastguard Worker * macros indicating the current index into the window. 1354*e1eccf28SAndroid Build Coastguard Worker */ 1355*e1eccf28SAndroid Build Coastguard Worker.macro prefill_list, macro, nextmacro, max_r, step, label 1356*e1eccf28SAndroid Build Coastguard Worker .macro ifneeded macro, nextmacro, line, nextline, ra, rb, step, label 1357*e1eccf28SAndroid Build Coastguard Worker .if windowsize >= (\line * 16) 1358*e1eccf28SAndroid Build Coastguard Worker .set i, windowsize - (\line * 16) 1359*e1eccf28SAndroid Build Coastguard Worker\label\macro\line: 1360*e1eccf28SAndroid Build Coastguard Worker prefill_\macro \label\nextmacro\line, \label\nextmacro\nextline, \ra, \rb, \step 1361*e1eccf28SAndroid Build Coastguard Worker .endif 1362*e1eccf28SAndroid Build Coastguard Worker .endm 1363*e1eccf28SAndroid Build Coastguard Worker .if \step > 1 1364*e1eccf28SAndroid Build Coastguard Worker ifneeded \macro \nextmacro, 13, 12, xx, xx, \step, \label 1365*e1eccf28SAndroid Build Coastguard Worker ifneeded \macro \nextmacro, 12, 11, xx, xx, \step, \label 1366*e1eccf28SAndroid Build Coastguard Worker ifneeded \macro \nextmacro, 11, 10, xx, xx, \step, \label 1367*e1eccf28SAndroid Build Coastguard Worker ifneeded \macro \nextmacro, 10, 9, xx, xx, \step, \label 1368*e1eccf28SAndroid Build Coastguard Worker ifneeded \macro \nextmacro, 9, 8, xx, xx, \step, \label 1369*e1eccf28SAndroid Build Coastguard Worker ifneeded \macro \nextmacro, 8, 7, xx, xx, \step, \label 1370*e1eccf28SAndroid Build Coastguard Worker ifneeded \macro \nextmacro, 7, 6, xx, xx, \step, \label 1371*e1eccf28SAndroid Build Coastguard Worker ifneeded \macro \nextmacro, 6, 5, xx, xx, \step, \label 1372*e1eccf28SAndroid Build Coastguard Worker ifneeded \macro \nextmacro, 5, 4, xx, xx, \step, \label 1373*e1eccf28SAndroid Build Coastguard Worker ifneeded \macro \nextmacro, 4, 3, xx, xx, \step, \label 1374*e1eccf28SAndroid Build Coastguard Worker .else 1375*e1eccf28SAndroid Build Coastguard Worker /* q3 normally contains the coefficient table, but it's not fully 1376*e1eccf28SAndroid Build Coastguard Worker * used. In the uchar1, r=25 case the other half of q3 is used for 1377*e1eccf28SAndroid Build Coastguard Worker * the last two window taps to avoid falling out to memory. 1378*e1eccf28SAndroid Build Coastguard Worker */ 1379*e1eccf28SAndroid Build Coastguard Worker ifneeded \macro \nextmacro, 4, 3, xx, d7, \step, \label 1380*e1eccf28SAndroid Build Coastguard Worker .endif 1381*e1eccf28SAndroid Build Coastguard Worker ifneeded \macro \nextmacro, 3, 2, q4, q5, \step, \label 1382*e1eccf28SAndroid Build Coastguard Worker ifneeded \macro \nextmacro, 2, 1, q6, q7, \step, \label 1383*e1eccf28SAndroid Build Coastguard Worker ifneeded \macro \nextmacro, 1, 0, q8, q9, \step, \label 1384*e1eccf28SAndroid Build Coastguard Worker 1385*e1eccf28SAndroid Build Coastguard Worker\label\macro\()0: 1386*e1eccf28SAndroid Build Coastguard Worker b \label\()_end 1387*e1eccf28SAndroid Build Coastguard Worker .purgem ifneeded 1388*e1eccf28SAndroid Build Coastguard Worker.endm 1389*e1eccf28SAndroid Build Coastguard Worker 1390*e1eccf28SAndroid Build Coastguard Worker/* These macros represent the possible stages of filling the window. 1391*e1eccf28SAndroid Build Coastguard Worker * Each macro is unrolled enough times that it can fill the entire window 1392*e1eccf28SAndroid Build Coastguard Worker * itself, but normally it will have to hand control to subsequent macros 1393*e1eccf28SAndroid Build Coastguard Worker * part-way through and this is done using labels named \next and \after, where 1394*e1eccf28SAndroid Build Coastguard Worker * \next is the next macro starting at the same window position and \after is 1395*e1eccf28SAndroid Build Coastguard Worker * the next macro starting after the current window position. 1396*e1eccf28SAndroid Build Coastguard Worker */ 1397*e1eccf28SAndroid Build Coastguard Worker 1398*e1eccf28SAndroid Build Coastguard Worker/* leftfill: v8 and v9 contain the left padding value. While the window 1399*e1eccf28SAndroid Build Coastguard Worker * extends outside of the image on the left-hand side, and at least 16 more 1400*e1eccf28SAndroid Build Coastguard Worker * padding values are needed in the window, store v8 and v9 into the window. 1401*e1eccf28SAndroid Build Coastguard Worker * Otherwise skip forward to storing image data. 1402*e1eccf28SAndroid Build Coastguard Worker */ 1403*e1eccf28SAndroid Build Coastguard Worker.macro prefill_leftfill, next, after, ra, rb, step 1404*e1eccf28SAndroid Build Coastguard Worker cmp r10, #i+16 1405*e1eccf28SAndroid Build Coastguard Worker blo \next 1406*e1eccf28SAndroid Build Coastguard Worker prefill_out \ra, \rb, q8, q9, d19 1407*e1eccf28SAndroid Build Coastguard Worker.endm 1408*e1eccf28SAndroid Build Coastguard Worker 1409*e1eccf28SAndroid Build Coastguard Worker/* leftedge: The very first non-fill or partial-fill chunk from the image is 1410*e1eccf28SAndroid Build Coastguard Worker * already loaded (as it was used to calculate the left padding value), so 1411*e1eccf28SAndroid Build Coastguard Worker * store it here, and then drop into the regular load/store cycle in the next 1412*e1eccf28SAndroid Build Coastguard Worker * macro. 1413*e1eccf28SAndroid Build Coastguard Worker */ 1414*e1eccf28SAndroid Build Coastguard Worker.macro prefill_leftedge, next, after, ra, rb, step 1415*e1eccf28SAndroid Build Coastguard Worker1: prefill_out \ra, \rb, q10, q11, d23 1416*e1eccf28SAndroid Build Coastguard Worker b \after 1417*e1eccf28SAndroid Build Coastguard Worker.endm 1418*e1eccf28SAndroid Build Coastguard Worker 1419*e1eccf28SAndroid Build Coastguard Worker/* dofetch: Copy chunks of the image into the window without any complications 1420*e1eccf28SAndroid Build Coastguard Worker * from edge conditions. 1421*e1eccf28SAndroid Build Coastguard Worker */ 1422*e1eccf28SAndroid Build Coastguard Worker.macro prefill_dofetch, next, after, ra, rb, step 1423*e1eccf28SAndroid Build Coastguard Worker cmp r11, #i+16 1424*e1eccf28SAndroid Build Coastguard Worker bls \next 1425*e1eccf28SAndroid Build Coastguard Worker bl fetch_generic_asm 1426*e1eccf28SAndroid Build Coastguard Worker prefill_out \ra, \rb, q10, q11, d23 1427*e1eccf28SAndroid Build Coastguard Worker.endm 1428*e1eccf28SAndroid Build Coastguard Worker 1429*e1eccf28SAndroid Build Coastguard Worker/* rightedge: The last fetch (currently in v10 and v11) may have gone beyond 1430*e1eccf28SAndroid Build Coastguard Worker * the right-hand edge of the image. In that case sweep the last valid pixel 1431*e1eccf28SAndroid Build Coastguard Worker * across the rest of the chunk, and in either case prepare padding data in v12 1432*e1eccf28SAndroid Build Coastguard Worker * and v13 for the next macro. This is done in fetch_clampright. 1433*e1eccf28SAndroid Build Coastguard Worker * This only happens once before going on to the next macro. 1434*e1eccf28SAndroid Build Coastguard Worker * Sometimes leftedge also covers the rightedge case, in which case this has 1435*e1eccf28SAndroid Build Coastguard Worker * to be skipped altogether. 1436*e1eccf28SAndroid Build Coastguard Worker */ 1437*e1eccf28SAndroid Build Coastguard Worker.macro prefill_rightedge, next, after, ra, rb, step 1438*e1eccf28SAndroid Build Coastguard Worker cmp r11, #i 1439*e1eccf28SAndroid Build Coastguard Worker bls \next 1440*e1eccf28SAndroid Build Coastguard Worker bl fetch_clampright\step 1441*e1eccf28SAndroid Build Coastguard Worker prefill_out \ra, \rb, q10, q11, d23 1442*e1eccf28SAndroid Build Coastguard Worker b \after 1443*e1eccf28SAndroid Build Coastguard Worker.endm 1444*e1eccf28SAndroid Build Coastguard Worker 1445*e1eccf28SAndroid Build Coastguard Worker/* rightfill: The rest of the window is simply filled with right padding from 1446*e1eccf28SAndroid Build Coastguard Worker * v12 and v13. 1447*e1eccf28SAndroid Build Coastguard Worker */ 1448*e1eccf28SAndroid Build Coastguard Worker.macro prefill_rightfill, next, after, ra, rb, step 1449*e1eccf28SAndroid Build Coastguard Worker prefill_out \ra, \rb, q12, q13, d25 1450*e1eccf28SAndroid Build Coastguard Worker.endm 1451*e1eccf28SAndroid Build Coastguard Worker 1452*e1eccf28SAndroid Build Coastguard Worker/* Here all of the macros above are unrolled and laid out in the proper order. 1453*e1eccf28SAndroid Build Coastguard Worker */ 1454*e1eccf28SAndroid Build Coastguard Worker.macro prefill_body, max_r, step, label 1455*e1eccf28SAndroid Build Coastguard Worker prefill_list leftfill, leftedge, \max_r, \step, \label 1456*e1eccf28SAndroid Build Coastguard Worker prefill_list leftedge, dofetch, \max_r, \step, \label 1457*e1eccf28SAndroid Build Coastguard Worker prefill_list dofetch, rightedge, \max_r, \step, \label 1458*e1eccf28SAndroid Build Coastguard Worker prefill_list rightedge, rightfill, \max_r, \step, \label 1459*e1eccf28SAndroid Build Coastguard Worker prefill_list rightfill, oops, \max_r, \step, \label 1460*e1eccf28SAndroid Build Coastguard Worker\label\()_end: 1461*e1eccf28SAndroid Build Coastguard Worker.endm 1462*e1eccf28SAndroid Build Coastguard Worker 1463*e1eccf28SAndroid Build Coastguard Worker/* Fill the convolution window with context data. The aim here is to load 1464*e1eccf28SAndroid Build Coastguard Worker * exactly 2*r columns, and in the main loop to read as many columns as will be 1465*e1eccf28SAndroid Build Coastguard Worker * written. This is complicated by the window being divided into chunks at 1466*e1eccf28SAndroid Build Coastguard Worker * register boundaries, and the need to handle cases when the input starts very 1467*e1eccf28SAndroid Build Coastguard Worker * close to the left or right (or both) edges of the image and the need to fill 1468*e1eccf28SAndroid Build Coastguard Worker * the spaces that leaves with left and right edge padding values. 1469*e1eccf28SAndroid Build Coastguard Worker * 1470*e1eccf28SAndroid Build Coastguard Worker * Input: 1471*e1eccf28SAndroid Build Coastguard Worker * r1 -- src 1472*e1eccf28SAndroid Build Coastguard Worker * r2 -- pitch 1473*e1eccf28SAndroid Build Coastguard Worker * r3 -- count 1474*e1eccf28SAndroid Build Coastguard Worker * r4 -- available image data right of src pointer 1475*e1eccf28SAndroid Build Coastguard Worker * r5 -- r 1476*e1eccf28SAndroid Build Coastguard Worker * r6 -- rup 1477*e1eccf28SAndroid Build Coastguard Worker * r7 -- rdn 1478*e1eccf28SAndroid Build Coastguard Worker * r8 -- available image data left of src pointer 1479*e1eccf28SAndroid Build Coastguard Worker * r9 -- buffer (if needed) 1480*e1eccf28SAndroid Build Coastguard Worker * Output: 1481*e1eccf28SAndroid Build Coastguard Worker * r4 -= min(inlen, count + windowsize - centertap) 1482*e1eccf28SAndroid Build Coastguard Worker * r1 += min(inlen, count + windowsize - centertap) 1483*e1eccf28SAndroid Build Coastguard Worker * Modifies: 1484*e1eccf28SAndroid Build Coastguard Worker * r10 -- fill start index in the window 1485*e1eccf28SAndroid Build Coastguard Worker * r11 -- fill stop index in the window 1486*e1eccf28SAndroid Build Coastguard Worker * r12 -- scratch 1487*e1eccf28SAndroid Build Coastguard Worker */ 1488*e1eccf28SAndroid Build Coastguard Worker.macro prefill step=1, max_r=25, label=xx 1489*e1eccf28SAndroid Build Coastguard Worker.set windowsize, (((\max_r + \max_r) * \step + 15) & ~15) 1490*e1eccf28SAndroid Build Coastguard Worker.set centertap, (windowsize - \max_r * \step) 1491*e1eccf28SAndroid Build Coastguard Worker mov r10, #centertap 1492*e1eccf28SAndroid Build Coastguard Worker subs r10, r10, r8 1493*e1eccf28SAndroid Build Coastguard Worker movlo r10, #0 1494*e1eccf28SAndroid Build Coastguard Worker 1495*e1eccf28SAndroid Build Coastguard Worker subs r11, r4, #windowsize - centertap 1496*e1eccf28SAndroid Build Coastguard Worker movhs r11, #0 1497*e1eccf28SAndroid Build Coastguard Worker add r11, r11, #windowsize 1498*e1eccf28SAndroid Build Coastguard Worker 1499*e1eccf28SAndroid Build Coastguard Worker /* r10 indicates where in the window legal image data begins. 1500*e1eccf28SAndroid Build Coastguard Worker * r11 indicates where in the window legal image date ends. 1501*e1eccf28SAndroid Build Coastguard Worker * When starting near the centre of a large image these would be 1502*e1eccf28SAndroid Build Coastguard Worker * zero and windowsize respectively, but when starting near the 1503*e1eccf28SAndroid Build Coastguard Worker * edges this can change. 1504*e1eccf28SAndroid Build Coastguard Worker * When starting on the leftmost pixel, r10 will be centertap. 1505*e1eccf28SAndroid Build Coastguard Worker * When starting on the rightmost pixel, r11 will be centertap+1. 1506*e1eccf28SAndroid Build Coastguard Worker */ 1507*e1eccf28SAndroid Build Coastguard Worker 1508*e1eccf28SAndroid Build Coastguard Worker /* r4 indicates how much data there is between the current pointers 1509*e1eccf28SAndroid Build Coastguard Worker * and the right edge of the image. The pointers currently point 1510*e1eccf28SAndroid Build Coastguard Worker * to the data needed at centertap. The subsequent code will 1511*e1eccf28SAndroid Build Coastguard Worker * consume (windowsize - r10) data, but only the data from 1512*e1eccf28SAndroid Build Coastguard Worker * centertap to windowsize comes out of r4's budget. 1513*e1eccf28SAndroid Build Coastguard Worker */ 1514*e1eccf28SAndroid Build Coastguard Worker1: subs r4, r4, #windowsize - centertap 1515*e1eccf28SAndroid Build Coastguard Worker movlo r4, #0 1516*e1eccf28SAndroid Build Coastguard Worker 1517*e1eccf28SAndroid Build Coastguard Worker /* And the pointers need to rewind to the start of the window. 1518*e1eccf28SAndroid Build Coastguard Worker */ 1519*e1eccf28SAndroid Build Coastguard Worker sub r1, r1, #centertap 1520*e1eccf28SAndroid Build Coastguard Worker 1521*e1eccf28SAndroid Build Coastguard Worker /* Unless x8 indicated that there wasn't that much data available. 1522*e1eccf28SAndroid Build Coastguard Worker */ 1523*e1eccf28SAndroid Build Coastguard Worker add r1, r1, r10 1524*e1eccf28SAndroid Build Coastguard Worker 1525*e1eccf28SAndroid Build Coastguard Worker 1526*e1eccf28SAndroid Build Coastguard Worker /* Get the first chunk, and add padding to align it to the window 1527*e1eccf28SAndroid Build Coastguard Worker * if necessary. 1528*e1eccf28SAndroid Build Coastguard Worker */ 1529*e1eccf28SAndroid Build Coastguard Worker bl fetch_clampleft\step 1530*e1eccf28SAndroid Build Coastguard Worker 1531*e1eccf28SAndroid Build Coastguard Worker /* Sometimes the start and the end of the window are in the same 1532*e1eccf28SAndroid Build Coastguard Worker * chunk. In that case both ends need filler at the outset. 1533*e1eccf28SAndroid Build Coastguard Worker */ 1534*e1eccf28SAndroid Build Coastguard Worker sub r12, r11, #1 1535*e1eccf28SAndroid Build Coastguard Worker eor r12, r10, r12 1536*e1eccf28SAndroid Build Coastguard Worker cmp r12, #16 1537*e1eccf28SAndroid Build Coastguard Worker bllo prefill_sweepright\step 1538*e1eccf28SAndroid Build Coastguard Worker 1539*e1eccf28SAndroid Build Coastguard Worker /* Iterate through all the points in the window and fill them in 1540*e1eccf28SAndroid Build Coastguard Worker * with padding or image data as needed. 1541*e1eccf28SAndroid Build Coastguard Worker */ 1542*e1eccf28SAndroid Build Coastguard Worker prefill_body \max_r, \step, \label 1543*e1eccf28SAndroid Build Coastguard Worker.endm 1544*e1eccf28SAndroid Build Coastguard Worker 1545*e1eccf28SAndroid Build Coastguard Worker/* The main body of the convolve functions. Having already pre-filled the 1546*e1eccf28SAndroid Build Coastguard Worker * convolution window with 2*r input values, the logic settles into a regular 1547*e1eccf28SAndroid Build Coastguard Worker * pattern of reading and writing at a 1:1 rate until either input or output 1548*e1eccf28SAndroid Build Coastguard Worker * expires. The input leads the output by r values, so when processing all the 1549*e1eccf28SAndroid Build Coastguard Worker * way to the right-hand edge, or within r pixels of that edge, the input will 1550*e1eccf28SAndroid Build Coastguard Worker * run out first. In the case of very narrow images, or sub-windows starting 1551*e1eccf28SAndroid Build Coastguard Worker * near the right edge, the input may already have run out while the 1552*e1eccf28SAndroid Build Coastguard Worker * convolution window was being filled and this loop will start with a 1553*e1eccf28SAndroid Build Coastguard Worker * zero-length input. 1554*e1eccf28SAndroid Build Coastguard Worker * 1555*e1eccf28SAndroid Build Coastguard Worker * Once the input runs out, the rest of the output must be processed by padding 1556*e1eccf28SAndroid Build Coastguard Worker * the remainder of the window with pad value from the last valid pixel from 1557*e1eccf28SAndroid Build Coastguard Worker * the source. 1558*e1eccf28SAndroid Build Coastguard Worker * 1559*e1eccf28SAndroid Build Coastguard Worker * Input: 1560*e1eccf28SAndroid Build Coastguard Worker * r0 = dst 1561*e1eccf28SAndroid Build Coastguard Worker * r1 = src 1562*e1eccf28SAndroid Build Coastguard Worker * r2 = pitch 1563*e1eccf28SAndroid Build Coastguard Worker * r3 = count 1564*e1eccf28SAndroid Build Coastguard Worker * r4 = inlen 1565*e1eccf28SAndroid Build Coastguard Worker * r5 = r 1566*e1eccf28SAndroid Build Coastguard Worker * r6 = rup 1567*e1eccf28SAndroid Build Coastguard Worker * r7 = rdn 1568*e1eccf28SAndroid Build Coastguard Worker * r9 = buffer 1569*e1eccf28SAndroid Build Coastguard Worker * Modifies 1570*e1eccf28SAndroid Build Coastguard Worker * r8 = fetch code pointer 1571*e1eccf28SAndroid Build Coastguard Worker */ 1572*e1eccf28SAndroid Build Coastguard Worker.macro conv_body core, step=1, max_r=25, labelc="", labelnc="" 1573*e1eccf28SAndroid Build Coastguard Worker 1574*e1eccf28SAndroid Build Coastguard Worker /* If x4 >= x3 then there's no need for clipping. The main loop 1575*e1eccf28SAndroid Build Coastguard Worker * needs to exit when either x3 or x4 runs out, so clamp x4 to be 1576*e1eccf28SAndroid Build Coastguard Worker * no greater than x3 and use x4 for the loop. 1577*e1eccf28SAndroid Build Coastguard Worker * However, if x4 comes out of the loop with less than 16 bytes 1578*e1eccf28SAndroid Build Coastguard Worker * left, a partial read would be necessary to avoid reading beyond 1579*e1eccf28SAndroid Build Coastguard Worker * the end of the image. To avoid this, clamp x4 to the next 1580*e1eccf28SAndroid Build Coastguard Worker * multiple of 16, which is still sufficient to force it out of the 1581*e1eccf28SAndroid Build Coastguard Worker * loop but doesn't imply a rewind. 1582*e1eccf28SAndroid Build Coastguard Worker */ 1583*e1eccf28SAndroid Build Coastguard Worker add r12, r3, #15 1584*e1eccf28SAndroid Build Coastguard Worker bic r12, r12, #15 1585*e1eccf28SAndroid Build Coastguard Worker cmp r4, r12 1586*e1eccf28SAndroid Build Coastguard Worker movhi r4, r12 1587*e1eccf28SAndroid Build Coastguard Worker 1588*e1eccf28SAndroid Build Coastguard Worker /* First calculate the entry-point into the internal fetch logic. 1589*e1eccf28SAndroid Build Coastguard Worker * This is done so the same function can service several kernel 1590*e1eccf28SAndroid Build Coastguard Worker * sizes. 1591*e1eccf28SAndroid Build Coastguard Worker */ 1592*e1eccf28SAndroid Build Coastguard Worker ldr r8, 3f 1593*e1eccf28SAndroid Build Coastguard Worker1: add r8, r8, pc 1594*e1eccf28SAndroid Build Coastguard Worker sub r8, r5, LSL #5 1595*e1eccf28SAndroid Build Coastguard Worker sub r8, r5, LSL #4 1596*e1eccf28SAndroid Build Coastguard Worker cmp r5, r6 1597*e1eccf28SAndroid Build Coastguard Worker cmpeq r5, r7 1598*e1eccf28SAndroid Build Coastguard Worker beq 5f 1599*e1eccf28SAndroid Build Coastguard Worker 1600*e1eccf28SAndroid Build Coastguard Worker /* if (r != rup || r != rdn) then the address-clamping table should 1601*e1eccf28SAndroid Build Coastguard Worker * be used rather than the short-cut version. 1602*e1eccf28SAndroid Build Coastguard Worker */ 1603*e1eccf28SAndroid Build Coastguard Worker ldr r8, 3f+4 1604*e1eccf28SAndroid Build Coastguard Worker2: add r8, r8, pc 1605*e1eccf28SAndroid Build Coastguard Worker sub r8, r5, LSL #6 1606*e1eccf28SAndroid Build Coastguard Worker b 5f 1607*e1eccf28SAndroid Build Coastguard Worker .align 3 1608*e1eccf28SAndroid Build Coastguard Worker3: .word \labelnc-1b-8 1609*e1eccf28SAndroid Build Coastguard Worker .word \labelc-2b-8 1610*e1eccf28SAndroid Build Coastguard Worker 1611*e1eccf28SAndroid Build Coastguard Worker /* Main loop: ... */ 1612*e1eccf28SAndroid Build Coastguard Worker .align 4 1613*e1eccf28SAndroid Build Coastguard Worker3: /* first perform a vertical convolution from memory to get the next 1614*e1eccf28SAndroid Build Coastguard Worker * 16 taps of the horizontal window into the register file... 1615*e1eccf28SAndroid Build Coastguard Worker */ 1616*e1eccf28SAndroid Build Coastguard Worker fetch max_r=\max_r, labelc=\labelc, labelnc=\labelnc, reg=r8 1617*e1eccf28SAndroid Build Coastguard Worker 1618*e1eccf28SAndroid Build Coastguard Worker /* ...then perform a horizontal convolution on that window to 1619*e1eccf28SAndroid Build Coastguard Worker * produce eight output bytes, and slide the window along. 1620*e1eccf28SAndroid Build Coastguard Worker * This has to be done twice to match the 16-way vertical pass. 1621*e1eccf28SAndroid Build Coastguard Worker * It would be preferable to have twice the work done in \core, but 1622*e1eccf28SAndroid Build Coastguard Worker * that would demand yet another variant on those macros and would 1623*e1eccf28SAndroid Build Coastguard Worker * perturb the register allocation severely. 1624*e1eccf28SAndroid Build Coastguard Worker */ 1625*e1eccf28SAndroid Build Coastguard Worker \core 1626*e1eccf28SAndroid Build Coastguard Worker vst1.u8 {d31}, [r0]! 1627*e1eccf28SAndroid Build Coastguard Worker \core 1628*e1eccf28SAndroid Build Coastguard Worker vst1.u8 {d31}, [r0]! 1629*e1eccf28SAndroid Build Coastguard Worker 1630*e1eccf28SAndroid Build Coastguard Worker sub r3, r3, #16 1631*e1eccf28SAndroid Build Coastguard Worker5: subs r4, r4, #16 1632*e1eccf28SAndroid Build Coastguard Worker bhi 3b 1633*e1eccf28SAndroid Build Coastguard Worker /* Here there's 16 or fewer bytes available before the edge of the 1634*e1eccf28SAndroid Build Coastguard Worker * source image. x4 holds that count minus 16 (because it was 1635*e1eccf28SAndroid Build Coastguard Worker * decremented before the first iteration ran). The last read may 1636*e1eccf28SAndroid Build Coastguard Worker * not be a whole chunk, and beyond that a fill value must be used. 1637*e1eccf28SAndroid Build Coastguard Worker * 1638*e1eccf28SAndroid Build Coastguard Worker * Of course, none of that matters if there's no more output to 1639*e1eccf28SAndroid Build Coastguard Worker * produce... 1640*e1eccf28SAndroid Build Coastguard Worker */ 1641*e1eccf28SAndroid Build Coastguard Worker cmp r3, #0 1642*e1eccf28SAndroid Build Coastguard Worker beq 5f 1643*e1eccf28SAndroid Build Coastguard Worker 1644*e1eccf28SAndroid Build Coastguard Worker /* Oh well. */ 1645*e1eccf28SAndroid Build Coastguard Worker adds r4, r4, #16 1646*e1eccf28SAndroid Build Coastguard Worker bne 1f 1647*e1eccf28SAndroid Build Coastguard Worker .if \step==1 1648*e1eccf28SAndroid Build Coastguard Worker vdup.u16 q10, d19[3] 1649*e1eccf28SAndroid Build Coastguard Worker vdup.u16 q11, d19[3] 1650*e1eccf28SAndroid Build Coastguard Worker .else 1651*e1eccf28SAndroid Build Coastguard Worker vmov.u64 d20, d19 1652*e1eccf28SAndroid Build Coastguard Worker vmov.u64 d21, d19 1653*e1eccf28SAndroid Build Coastguard Worker vmov.u64 d22, d19 1654*e1eccf28SAndroid Build Coastguard Worker vmov.u64 d23, d19 1655*e1eccf28SAndroid Build Coastguard Worker .endif 1656*e1eccf28SAndroid Build Coastguard Worker b 3f 1657*e1eccf28SAndroid Build Coastguard Worker 1658*e1eccf28SAndroid Build Coastguard Worker /* To avoid reading past end of input, rewind pointers by (16-r4) 1659*e1eccf28SAndroid Build Coastguard Worker * to ensure that they're exactly 16 bytes from the edge. 1660*e1eccf28SAndroid Build Coastguard Worker */ 1661*e1eccf28SAndroid Build Coastguard Worker1: mov r11, r4 1662*e1eccf28SAndroid Build Coastguard Worker bl fetch_clampright\step 1663*e1eccf28SAndroid Build Coastguard Worker /* Now to put this padding to use, perform any remaining 1664*e1eccf28SAndroid Build Coastguard Worker * iterations. This is done at half the rate of the main loop, 1665*e1eccf28SAndroid Build Coastguard Worker * because there's no longer pressure from a 16-lane window filler. 1666*e1eccf28SAndroid Build Coastguard Worker */ 1667*e1eccf28SAndroid Build Coastguard Worker3: \core 1668*e1eccf28SAndroid Build Coastguard Worker .if \step==1 1669*e1eccf28SAndroid Build Coastguard Worker vdup.u16 q11, d23[3] 1670*e1eccf28SAndroid Build Coastguard Worker .else 1671*e1eccf28SAndroid Build Coastguard Worker vmov.u64 d22, d23 1672*e1eccf28SAndroid Build Coastguard Worker .endif 1673*e1eccf28SAndroid Build Coastguard Worker subs r3, r3, #8 1674*e1eccf28SAndroid Build Coastguard Worker blo 4f 1675*e1eccf28SAndroid Build Coastguard Worker vst1.u8 {d31}, [r0]! 1676*e1eccf28SAndroid Build Coastguard Worker bne 3b 1677*e1eccf28SAndroid Build Coastguard Worker b 5f 1678*e1eccf28SAndroid Build Coastguard Worker 1679*e1eccf28SAndroid Build Coastguard Worker /* If the final iteration contained 0 < l < 8 values, then perform 1680*e1eccf28SAndroid Build Coastguard Worker * a piecewise store of the final vector. 1681*e1eccf28SAndroid Build Coastguard Worker */ 1682*e1eccf28SAndroid Build Coastguard Worker4: tst r3, #4 1683*e1eccf28SAndroid Build Coastguard Worker beq 1f 1684*e1eccf28SAndroid Build Coastguard Worker vst1.u32 {d31[0]}, [r0]! 1685*e1eccf28SAndroid Build Coastguard Worker vext.u8 d31, d31, d31, #4 1686*e1eccf28SAndroid Build Coastguard Worker1: tst r3, #2 1687*e1eccf28SAndroid Build Coastguard Worker beq 1f 1688*e1eccf28SAndroid Build Coastguard Worker vst1.u16 {d31[0]}, [r0]! 1689*e1eccf28SAndroid Build Coastguard Worker vext.u8 d31, d31, d31, #2 1690*e1eccf28SAndroid Build Coastguard Worker1: tst r3, #1 1691*e1eccf28SAndroid Build Coastguard Worker beq 5f 1692*e1eccf28SAndroid Build Coastguard Worker vst1.u8 {d31[0]}, [r0]! 1693*e1eccf28SAndroid Build Coastguard Worker vext.u8 d31, d31, d31, #1 1694*e1eccf28SAndroid Build Coastguard Worker5: mov r0, #0 1695*e1eccf28SAndroid Build Coastguard Worker.endm 1696*e1eccf28SAndroid Build Coastguard Worker 1697*e1eccf28SAndroid Build Coastguard Worker.irp r, TUNED_LIST1, 25 1698*e1eccf28SAndroid Build Coastguard WorkerPRIVATE(convolve1_\r) 1699*e1eccf28SAndroid Build Coastguard Worker push {r12,lr} 1700*e1eccf28SAndroid Build Coastguard Worker 1701*e1eccf28SAndroid Build Coastguard Worker prefill step=1, max_r=\r, label=.Lcnv1_\r 1702*e1eccf28SAndroid Build Coastguard Worker 1703*e1eccf28SAndroid Build Coastguard Worker conv_body core=hconv1_\r, step=1, max_r=\r, labelc=.Lcnv1_\r, labelnc=.Lcnvnc1_\r 1704*e1eccf28SAndroid Build Coastguard Worker 1705*e1eccf28SAndroid Build Coastguard Worker pop {r12,pc} 1706*e1eccf28SAndroid Build Coastguard WorkerEND(convolve1_\r) 1707*e1eccf28SAndroid Build Coastguard Worker.endr 1708*e1eccf28SAndroid Build Coastguard Worker 1709*e1eccf28SAndroid Build Coastguard Worker.irp r, TUNED_LIST4, 25 1710*e1eccf28SAndroid Build Coastguard WorkerPRIVATE(convolve4_\r) 1711*e1eccf28SAndroid Build Coastguard Worker push {r12,lr} 1712*e1eccf28SAndroid Build Coastguard Worker sub r9, sp, #0x200 1713*e1eccf28SAndroid Build Coastguard Worker sub sp, sp, #0x200 + 0x400 1714*e1eccf28SAndroid Build Coastguard Worker bic r9, r9, #0x3fc 1715*e1eccf28SAndroid Build Coastguard Worker 1716*e1eccf28SAndroid Build Coastguard Worker /* r9 now points to a 0x200 byte buffer on the stack whose address 1717*e1eccf28SAndroid Build Coastguard Worker * has the low 10 bits clear. This allows easy address calculation 1718*e1eccf28SAndroid Build Coastguard Worker * in the wrap-around cases. 1719*e1eccf28SAndroid Build Coastguard Worker */ 1720*e1eccf28SAndroid Build Coastguard Worker 1721*e1eccf28SAndroid Build Coastguard Worker prefill step=4, max_r=\r, label=.Lcnv4_\r 1722*e1eccf28SAndroid Build Coastguard Worker 1723*e1eccf28SAndroid Build Coastguard Worker conv_body core=hconv4_\r, step=4, max_r=\r, labelc=.Lcnv4_\r, labelnc=.Lcnvnc4_\r 1724*e1eccf28SAndroid Build Coastguard Worker 1725*e1eccf28SAndroid Build Coastguard Worker add sp, sp, #0x200 + 0x400 1726*e1eccf28SAndroid Build Coastguard Worker pop {r12,pc} 1727*e1eccf28SAndroid Build Coastguard WorkerEND(convolve4_\r) 1728*e1eccf28SAndroid Build Coastguard Worker.endr 1729*e1eccf28SAndroid Build Coastguard Worker 1730*e1eccf28SAndroid Build Coastguard Worker/* void rsdIntrinsicBlurU1_K( 1731*e1eccf28SAndroid Build Coastguard Worker * void *out, // r0 1732*e1eccf28SAndroid Build Coastguard Worker * void *in, // r1 1733*e1eccf28SAndroid Build Coastguard Worker * size_t w, // r2 1734*e1eccf28SAndroid Build Coastguard Worker * size_t h, // r3 1735*e1eccf28SAndroid Build Coastguard Worker * size_t p, // [sp] 1736*e1eccf28SAndroid Build Coastguard Worker * size_t x, // [sp,#4] 1737*e1eccf28SAndroid Build Coastguard Worker * size_t y, // [sp,#8] 1738*e1eccf28SAndroid Build Coastguard Worker * size_t count, // [sp,#12] 1739*e1eccf28SAndroid Build Coastguard Worker * size_t r, // [sp,#16] 1740*e1eccf28SAndroid Build Coastguard Worker * uint16_t *tab); // [sp,#20] 1741*e1eccf28SAndroid Build Coastguard Worker */ 1742*e1eccf28SAndroid Build Coastguard WorkerENTRY(rsdIntrinsicBlurU1_K) 1743*e1eccf28SAndroid Build Coastguard Worker push {r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} 1744*e1eccf28SAndroid Build Coastguard Worker vpush {d8-d15} 1745*e1eccf28SAndroid Build Coastguard Worker ldr r6, [sp,#112] // y 1746*e1eccf28SAndroid Build Coastguard Worker ldr r8, [sp,#108] // x 1747*e1eccf28SAndroid Build Coastguard Worker ldr r5, [sp,#120] // r 1748*e1eccf28SAndroid Build Coastguard Worker sub r4, r2, r8 // inlen = w - x 1749*e1eccf28SAndroid Build Coastguard Worker sub r7, r3, r6 // h - y 1750*e1eccf28SAndroid Build Coastguard Worker ldr r2, [sp,#104] // pitch 1751*e1eccf28SAndroid Build Coastguard Worker ldr r3, [sp,#116] // count 1752*e1eccf28SAndroid Build Coastguard Worker sub r7, r7, #1 // h - y - 1 1753*e1eccf28SAndroid Build Coastguard Worker 1754*e1eccf28SAndroid Build Coastguard Worker ldr r12, [sp,#124] 1755*e1eccf28SAndroid Build Coastguard Worker 1756*e1eccf28SAndroid Build Coastguard Worker add r1, r1, r8 // src += x 1757*e1eccf28SAndroid Build Coastguard Worker 1758*e1eccf28SAndroid Build Coastguard Worker cmp r6, r5 1759*e1eccf28SAndroid Build Coastguard Worker movhi r6, r5 // rup = min(r, y) 1760*e1eccf28SAndroid Build Coastguard Worker cmp r7, r5 1761*e1eccf28SAndroid Build Coastguard Worker movhi r7, r5 // rdn = min(r, h - y - 1) 1762*e1eccf28SAndroid Build Coastguard Worker 1763*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d0,d1,d2,d3}, [r12]! 1764*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d4,d5,d6}, [r12]! 1765*e1eccf28SAndroid Build Coastguard Worker 1766*e1eccf28SAndroid Build Coastguard Worker adr lr, 1f 1767*e1eccf28SAndroid Build Coastguard Worker .irp r, TUNED_LIST1 1768*e1eccf28SAndroid Build Coastguard Worker cmp r5, #\r 1769*e1eccf28SAndroid Build Coastguard Worker bls convolve1_\r 1770*e1eccf28SAndroid Build Coastguard Worker .endr 1771*e1eccf28SAndroid Build Coastguard Worker b convolve1_25 1772*e1eccf28SAndroid Build Coastguard Worker 1773*e1eccf28SAndroid Build Coastguard Worker1: vpop {d8-d15} 1774*e1eccf28SAndroid Build Coastguard Worker pop {r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} 1775*e1eccf28SAndroid Build Coastguard WorkerEND(rsdIntrinsicBlurU1_K) 1776*e1eccf28SAndroid Build Coastguard Worker 1777*e1eccf28SAndroid Build Coastguard Worker/* void rsdIntrinsicBlurU4_K( 1778*e1eccf28SAndroid Build Coastguard Worker * void *out, // r0 1779*e1eccf28SAndroid Build Coastguard Worker * void *in, // r1 1780*e1eccf28SAndroid Build Coastguard Worker * size_t w, // r2 1781*e1eccf28SAndroid Build Coastguard Worker * size_t h, // r3 1782*e1eccf28SAndroid Build Coastguard Worker * size_t p, // [sp] 1783*e1eccf28SAndroid Build Coastguard Worker * size_t x, // [sp,#4] 1784*e1eccf28SAndroid Build Coastguard Worker * size_t y, // [sp,#8] 1785*e1eccf28SAndroid Build Coastguard Worker * size_t count, // [sp,#12] 1786*e1eccf28SAndroid Build Coastguard Worker * size_t r, // [sp,#16] 1787*e1eccf28SAndroid Build Coastguard Worker * uint16_t *tab); // [sp,#20] 1788*e1eccf28SAndroid Build Coastguard Worker */ 1789*e1eccf28SAndroid Build Coastguard WorkerENTRY(rsdIntrinsicBlurU4_K) 1790*e1eccf28SAndroid Build Coastguard Worker push {r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} 1791*e1eccf28SAndroid Build Coastguard Worker vpush {d8-d15} 1792*e1eccf28SAndroid Build Coastguard Worker ldr r6, [sp,#112] // y 1793*e1eccf28SAndroid Build Coastguard Worker ldr r8, [sp,#108] // x 1794*e1eccf28SAndroid Build Coastguard Worker ldr r5, [sp,#120] // r 1795*e1eccf28SAndroid Build Coastguard Worker lsl r8, r8, #2 1796*e1eccf28SAndroid Build Coastguard Worker rsb r4, r8, r2, LSL #2 // inlen = (w - x) 1797*e1eccf28SAndroid Build Coastguard Worker sub r7, r3, r6 // h - y 1798*e1eccf28SAndroid Build Coastguard Worker ldr r2, [sp,#104] // pitch 1799*e1eccf28SAndroid Build Coastguard Worker ldr r3, [sp,#116] // count 1800*e1eccf28SAndroid Build Coastguard Worker sub r7, r7, #1 // h - y - 1 1801*e1eccf28SAndroid Build Coastguard Worker lsl r3, r3, #2 // count 1802*e1eccf28SAndroid Build Coastguard Worker 1803*e1eccf28SAndroid Build Coastguard Worker ldr r12, [sp,#124] 1804*e1eccf28SAndroid Build Coastguard Worker 1805*e1eccf28SAndroid Build Coastguard Worker add r1, r1, r8 // in += x 1806*e1eccf28SAndroid Build Coastguard Worker 1807*e1eccf28SAndroid Build Coastguard Worker cmp r6, r5 1808*e1eccf28SAndroid Build Coastguard Worker movhi r6, r5 // rup = min(r, y) 1809*e1eccf28SAndroid Build Coastguard Worker cmp r7, r5 1810*e1eccf28SAndroid Build Coastguard Worker movhi r7, r5 // rdn = min(r, h - y - 1) 1811*e1eccf28SAndroid Build Coastguard Worker 1812*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d0,d1,d2,d3}, [r12]! 1813*e1eccf28SAndroid Build Coastguard Worker vld1.u16 {d4,d5,d6}, [r12]! 1814*e1eccf28SAndroid Build Coastguard Worker 1815*e1eccf28SAndroid Build Coastguard Worker adr lr, 1f 1816*e1eccf28SAndroid Build Coastguard Worker .irp r, TUNED_LIST4 1817*e1eccf28SAndroid Build Coastguard Worker cmp r5, #\r 1818*e1eccf28SAndroid Build Coastguard Worker bls convolve4_\r 1819*e1eccf28SAndroid Build Coastguard Worker .endr 1820*e1eccf28SAndroid Build Coastguard Worker b convolve4_25 1821*e1eccf28SAndroid Build Coastguard Worker 1822*e1eccf28SAndroid Build Coastguard Worker1: vpop {d8-d15} 1823*e1eccf28SAndroid Build Coastguard Worker pop {r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} 1824*e1eccf28SAndroid Build Coastguard WorkerEND(rsdIntrinsicBlurU4_K) 1825