xref: /aosp_15_r20/frameworks/rs/toolkit/Blur_neon.S (revision e1eccf28f96817838ad6867f7f39d2351ec11f56)
1*e1eccf28SAndroid Build Coastguard Worker/*
2*e1eccf28SAndroid Build Coastguard Worker * Copyright (C) 2014 The Android Open Source Project
3*e1eccf28SAndroid Build Coastguard Worker *
4*e1eccf28SAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License");
5*e1eccf28SAndroid Build Coastguard Worker * you may not use this file except in compliance with the License.
6*e1eccf28SAndroid Build Coastguard Worker * You may obtain a copy of the License at
7*e1eccf28SAndroid Build Coastguard Worker *
8*e1eccf28SAndroid Build Coastguard Worker *      http://www.apache.org/licenses/LICENSE-2.0
9*e1eccf28SAndroid Build Coastguard Worker *
10*e1eccf28SAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software
11*e1eccf28SAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS,
12*e1eccf28SAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*e1eccf28SAndroid Build Coastguard Worker * See the License for the specific language governing permissions and
14*e1eccf28SAndroid Build Coastguard Worker * limitations under the License.
15*e1eccf28SAndroid Build Coastguard Worker */
16*e1eccf28SAndroid Build Coastguard Worker
17*e1eccf28SAndroid Build Coastguard Worker#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f: .fnstart
18*e1eccf28SAndroid Build Coastguard Worker#define PRIVATE(f) .text; .align 4; .type f,#function; f: .fnstart
19*e1eccf28SAndroid Build Coastguard Worker#define END(f) .fnend; .size f, .-f;
20*e1eccf28SAndroid Build Coastguard Worker
21*e1eccf28SAndroid Build Coastguard Worker#define ARCH_ARM_USE_BLUR_PRELOAD
22*e1eccf28SAndroid Build Coastguard Worker
23*e1eccf28SAndroid Build Coastguard Worker.eabi_attribute 25,1 @Tag_ABI_align8_preserved
24*e1eccf28SAndroid Build Coastguard Worker.arm
25*e1eccf28SAndroid Build Coastguard Worker
26*e1eccf28SAndroid Build Coastguard Worker/* Number of fractional bits to preserve in intermediate results.  The
27*e1eccf28SAndroid Build Coastguard Worker * intermediate storage is 16-bit, and we started with 8 bit data (the integer
28*e1eccf28SAndroid Build Coastguard Worker * part), so this should be between 0 and 8.
29*e1eccf28SAndroid Build Coastguard Worker */
30*e1eccf28SAndroid Build Coastguard Worker.set FRACTION_BITS, 7
31*e1eccf28SAndroid Build Coastguard Worker
32*e1eccf28SAndroid Build Coastguard Worker.set MAX_R, 25
33*e1eccf28SAndroid Build Coastguard Worker
34*e1eccf28SAndroid Build Coastguard Worker
35*e1eccf28SAndroid Build Coastguard Worker/* A quick way of making a line of code conditional on some other condition.
36*e1eccf28SAndroid Build Coastguard Worker * Use `.set cc, 1` or `.set cc, 0` to enable or disable lines prefixed with
37*e1eccf28SAndroid Build Coastguard Worker * `ifcc`:
38*e1eccf28SAndroid Build Coastguard Worker */
39*e1eccf28SAndroid Build Coastguard Worker.macro ifcc zzz:vararg
40*e1eccf28SAndroid Build Coastguard Worker.if cc
41*e1eccf28SAndroid Build Coastguard Worker            \zzz
42*e1eccf28SAndroid Build Coastguard Worker.endif
43*e1eccf28SAndroid Build Coastguard Worker.endm
44*e1eccf28SAndroid Build Coastguard Worker
45*e1eccf28SAndroid Build Coastguard Worker/* It's not always clear that prefetching is beneficial and this needs further
46*e1eccf28SAndroid Build Coastguard Worker * testing on different cores, so it's made switchable here.
47*e1eccf28SAndroid Build Coastguard Worker */
48*e1eccf28SAndroid Build Coastguard Worker#if defined(ARCH_ARM_USE_BLUR_PRELOAD)
49*e1eccf28SAndroid Build Coastguard Worker#define VERTPLD(...) pld [__VA_ARGS__]
50*e1eccf28SAndroid Build Coastguard Worker#else
51*e1eccf28SAndroid Build Coastguard Worker#define VERTPLD(...) nop
52*e1eccf28SAndroid Build Coastguard Worker#endif
53*e1eccf28SAndroid Build Coastguard Worker
54*e1eccf28SAndroid Build Coastguard Worker/* Fetch 16 columns of bytes (regardless of image format), convolve these
55*e1eccf28SAndroid Build Coastguard Worker * vertically, and leave them in the register file.  If working near the top or
56*e1eccf28SAndroid Build Coastguard Worker * bottom of an image then clamp the addressing while loading the data in.
57*e1eccf28SAndroid Build Coastguard Worker *
58*e1eccf28SAndroid Build Coastguard Worker * The convolution is fully unrolled for windows up to max_r, with the
59*e1eccf28SAndroid Build Coastguard Worker * outermost edges calculated first.  This way it's possible to branch directly
60*e1eccf28SAndroid Build Coastguard Worker * into the relevant part of the code for an arbitrary convolution radius.  Two
61*e1eccf28SAndroid Build Coastguard Worker * variants of the loop are produced; one eliminates the clamping code for a
62*e1eccf28SAndroid Build Coastguard Worker * slight speed advantage.
63*e1eccf28SAndroid Build Coastguard Worker *
64*e1eccf28SAndroid Build Coastguard Worker * Where the macro is called with reg=x, the specified register is taken to
65*e1eccf28SAndroid Build Coastguard Worker * contain a pre-calculated pointer into one of the two loops.
66*e1eccf28SAndroid Build Coastguard Worker *
67*e1eccf28SAndroid Build Coastguard Worker * Input:
68*e1eccf28SAndroid Build Coastguard Worker *      r1 -- src
69*e1eccf28SAndroid Build Coastguard Worker *      r2 -- pitch
70*e1eccf28SAndroid Build Coastguard Worker *      r5 -- r
71*e1eccf28SAndroid Build Coastguard Worker *      r6 -- rup (r, unless clipped to top of source image)
72*e1eccf28SAndroid Build Coastguard Worker *      r7 -- rdn (r, unless clipped to bottom of source image)
73*e1eccf28SAndroid Build Coastguard Worker *      r12 -- switch index
74*e1eccf28SAndroid Build Coastguard Worker *      q0-q3 -- coefficient table
75*e1eccf28SAndroid Build Coastguard Worker * Output:
76*e1eccf28SAndroid Build Coastguard Worker *      r1 += 16
77*e1eccf28SAndroid Build Coastguard Worker *      q10,q11 -- 16 convolved columns
78*e1eccf28SAndroid Build Coastguard Worker * Modifies:
79*e1eccf28SAndroid Build Coastguard Worker *      r10 = upper row pointer
80*e1eccf28SAndroid Build Coastguard Worker *      r11 = lower row pointer
81*e1eccf28SAndroid Build Coastguard Worker *      q12-q15 = temporary sums
82*e1eccf28SAndroid Build Coastguard Worker */
83*e1eccf28SAndroid Build Coastguard Worker.macro fetch, max_r=MAX_R, labelc=1, labelnc=2, reg=r12 /*{{{*/
84*e1eccf28SAndroid Build Coastguard Worker  .ifc \reg,r12 ; .set cc, 1 ; .else ; .set cc, 0 ; .endif
85*e1eccf28SAndroid Build Coastguard Worker
86*e1eccf28SAndroid Build Coastguard Worker            vld1.8      {d30,d31}, [r1]
87*e1eccf28SAndroid Build Coastguard Worker            mls         r10, r2, r6, r1
88*e1eccf28SAndroid Build Coastguard Worker
89*e1eccf28SAndroid Build Coastguard Worker            vmovl.u8    q14, d30
90*e1eccf28SAndroid Build Coastguard Worker            VERTPLD(r1, #32)
91*e1eccf28SAndroid Build Coastguard Worker            vmovl.u8    q15, d31
92*e1eccf28SAndroid Build Coastguard Worker  .if \max_r < 16 // approximate
93*e1eccf28SAndroid Build Coastguard Worker    ifcc    adr         \reg, 1f
94*e1eccf28SAndroid Build Coastguard Worker  .else
95*e1eccf28SAndroid Build Coastguard Worker    ifcc    ldr         \reg, 2f
96*e1eccf28SAndroid Build Coastguard Worker1:  ifcc    add         \reg, \reg, pc
97*e1eccf28SAndroid Build Coastguard Worker  .endif
98*e1eccf28SAndroid Build Coastguard Worker
99*e1eccf28SAndroid Build Coastguard Worker            vmull.u16   q12, d28, d0[0]
100*e1eccf28SAndroid Build Coastguard Worker    ifcc    sub         \reg, r5, LSL #6
101*e1eccf28SAndroid Build Coastguard Worker            vmull.u16   q13, d29, d0[0]
102*e1eccf28SAndroid Build Coastguard Worker            mla         r11, r2, r7, r1
103*e1eccf28SAndroid Build Coastguard Worker            vmull.u16   q14, d30, d0[0]
104*e1eccf28SAndroid Build Coastguard Worker            add         r1, r1, #16
105*e1eccf28SAndroid Build Coastguard Worker            vmull.u16   q15, d31, d0[0]
106*e1eccf28SAndroid Build Coastguard Worker            bx          \reg
107*e1eccf28SAndroid Build Coastguard Worker
108*e1eccf28SAndroid Build Coastguard Worker     ifcc   .align 2
109*e1eccf28SAndroid Build Coastguard Worker  2: ifcc   .word       1f-1b-8
110*e1eccf28SAndroid Build Coastguard Worker
111*e1eccf28SAndroid Build Coastguard Worker  /* This version of the vertical fetch loop body is used away from the edges
112*e1eccf28SAndroid Build Coastguard Worker   * of the source image.  The pointers start at the top and bottom source rows
113*e1eccf28SAndroid Build Coastguard Worker   * and work their way towards the centre on each iteration.  This way the
114*e1eccf28SAndroid Build Coastguard Worker   * number of taps used can be controlled by jumping directly into the middle
115*e1eccf28SAndroid Build Coastguard Worker   * of the loop and running to completion.
116*e1eccf28SAndroid Build Coastguard Worker   * If the loop body changes size then the code which calculates the address of
117*e1eccf28SAndroid Build Coastguard Worker   * the initial iteration must be updated to accordingly.
118*e1eccf28SAndroid Build Coastguard Worker   */
119*e1eccf28SAndroid Build Coastguard Worker  .macro vertfetch_noclamp i, dreg
120*e1eccf28SAndroid Build Coastguard Worker    .if 0 < \i && \i <= \max_r
121*e1eccf28SAndroid Build Coastguard Worker            vld1.8      {d20,d21}, [r10], r2
122*e1eccf28SAndroid Build Coastguard Worker            vld1.8      {d22,d23}, [r11]
123*e1eccf28SAndroid Build Coastguard Worker            sub         r11, r11, r2
124*e1eccf28SAndroid Build Coastguard Worker            vswp        d21, d22
125*e1eccf28SAndroid Build Coastguard Worker            VERTPLD(r10, #32)
126*e1eccf28SAndroid Build Coastguard Worker            vaddl.u8    q10, d20, d21
127*e1eccf28SAndroid Build Coastguard Worker            vaddl.u8    q11, d22, d23
128*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q12, d20, \dreg
129*e1eccf28SAndroid Build Coastguard Worker            VERTPLD(r11, #32)
130*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q13, d21, \dreg
131*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d22, \dreg
132*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d23, \dreg
133*e1eccf28SAndroid Build Coastguard Worker    .endif
134*e1eccf28SAndroid Build Coastguard Worker  .endm
135*e1eccf28SAndroid Build Coastguard Worker
136*e1eccf28SAndroid Build Coastguard Worker  /* This version of the vertical fetch loop body is used near the edges of the
137*e1eccf28SAndroid Build Coastguard Worker   * source image, where one or both of the accesses may start with a clamped
138*e1eccf28SAndroid Build Coastguard Worker   * value, and the row addresses only begin to change after some number of
139*e1eccf28SAndroid Build Coastguard Worker   * iterations before the end.
140*e1eccf28SAndroid Build Coastguard Worker   * If the loop body changes size then the code which calculates the address of
141*e1eccf28SAndroid Build Coastguard Worker   * the initial iteration must be updated to accordingly.
142*e1eccf28SAndroid Build Coastguard Worker   */
143*e1eccf28SAndroid Build Coastguard Worker  .macro vertfetch_clamped i, dreg
144*e1eccf28SAndroid Build Coastguard Worker    .if 0 < \i && \i <= \max_r
145*e1eccf28SAndroid Build Coastguard Worker            vld1.8      {d20,d21}, [r10]
146*e1eccf28SAndroid Build Coastguard Worker            vld1.8      {d22,d23}, [r11]
147*e1eccf28SAndroid Build Coastguard Worker            cmp         r6, #\i
148*e1eccf28SAndroid Build Coastguard Worker            vswp        d21, d22
149*e1eccf28SAndroid Build Coastguard Worker            VERTPLD(r10, #32)
150*e1eccf28SAndroid Build Coastguard Worker            vaddl.u8    q10, d20, d21
151*e1eccf28SAndroid Build Coastguard Worker            addhs       r10, r10, r2
152*e1eccf28SAndroid Build Coastguard Worker            vaddl.u8    q11, d22, d23
153*e1eccf28SAndroid Build Coastguard Worker            cmp         r7, #\i
154*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q12, d20, \dreg
155*e1eccf28SAndroid Build Coastguard Worker            VERTPLD(r11, #32)
156*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q13, d21, \dreg
157*e1eccf28SAndroid Build Coastguard Worker            subhs       r11, r11, r2
158*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d22, \dreg
159*e1eccf28SAndroid Build Coastguard Worker            nop
160*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d23, \dreg
161*e1eccf28SAndroid Build Coastguard Worker    .endif
162*e1eccf28SAndroid Build Coastguard Worker  .endm
163*e1eccf28SAndroid Build Coastguard Worker
164*e1eccf28SAndroid Build Coastguard Worker  /* Entry into this unrolled loop is computed as a negative index from
165*e1eccf28SAndroid Build Coastguard Worker   * \labelc at the end of the block.
166*e1eccf28SAndroid Build Coastguard Worker   */
167*e1eccf28SAndroid Build Coastguard Worker  .align 4
168*e1eccf28SAndroid Build Coastguard Worker  vertfetch_clamped 27, d6[3]
169*e1eccf28SAndroid Build Coastguard Worker  vertfetch_clamped 26, d6[2]
170*e1eccf28SAndroid Build Coastguard Worker  vertfetch_clamped 25, d6[1]
171*e1eccf28SAndroid Build Coastguard Worker  vertfetch_clamped 24, d6[0]
172*e1eccf28SAndroid Build Coastguard Worker  vertfetch_clamped 23, d5[3]
173*e1eccf28SAndroid Build Coastguard Worker  vertfetch_clamped 22, d5[2]
174*e1eccf28SAndroid Build Coastguard Worker  vertfetch_clamped 21, d5[1]
175*e1eccf28SAndroid Build Coastguard Worker  vertfetch_clamped 20, d5[0]
176*e1eccf28SAndroid Build Coastguard Worker  vertfetch_clamped 19, d4[3]
177*e1eccf28SAndroid Build Coastguard Worker  vertfetch_clamped 18, d4[2]
178*e1eccf28SAndroid Build Coastguard Worker  vertfetch_clamped 17, d4[1]
179*e1eccf28SAndroid Build Coastguard Worker  vertfetch_clamped 16, d4[0]
180*e1eccf28SAndroid Build Coastguard Worker  vertfetch_clamped 15, d3[3]
181*e1eccf28SAndroid Build Coastguard Worker  vertfetch_clamped 14, d3[2]
182*e1eccf28SAndroid Build Coastguard Worker  vertfetch_clamped 13, d3[1]
183*e1eccf28SAndroid Build Coastguard Worker  vertfetch_clamped 12, d3[0]
184*e1eccf28SAndroid Build Coastguard Worker  vertfetch_clamped 11, d2[3]
185*e1eccf28SAndroid Build Coastguard Worker  vertfetch_clamped 10, d2[2]
186*e1eccf28SAndroid Build Coastguard Worker  vertfetch_clamped  9, d2[1]
187*e1eccf28SAndroid Build Coastguard Worker  vertfetch_clamped  8, d2[0]
188*e1eccf28SAndroid Build Coastguard Worker  vertfetch_clamped  7, d1[3]
189*e1eccf28SAndroid Build Coastguard Worker  vertfetch_clamped  6, d1[2]
190*e1eccf28SAndroid Build Coastguard Worker  vertfetch_clamped  5, d1[1]
191*e1eccf28SAndroid Build Coastguard Worker  vertfetch_clamped  4, d1[0]
192*e1eccf28SAndroid Build Coastguard Worker  vertfetch_clamped  3, d0[3]
193*e1eccf28SAndroid Build Coastguard Worker  vertfetch_clamped  2, d0[2]
194*e1eccf28SAndroid Build Coastguard Worker  vertfetch_clamped  1, d0[1]
195*e1eccf28SAndroid Build Coastguard Worker  vertfetch_clamped  0, d0[0]
196*e1eccf28SAndroid Build Coastguard Worker  1:
197*e1eccf28SAndroid Build Coastguard Worker  \labelc : b 2f    /* done with clamped loop, skip over non-clamped loop */
198*e1eccf28SAndroid Build Coastguard Worker
199*e1eccf28SAndroid Build Coastguard Worker  /* Entry into this unrolled loop is computed as a negative index from
200*e1eccf28SAndroid Build Coastguard Worker   * \labelnc at the end of the block.
201*e1eccf28SAndroid Build Coastguard Worker   */
202*e1eccf28SAndroid Build Coastguard Worker  .align 4
203*e1eccf28SAndroid Build Coastguard Worker  vertfetch_noclamp 27, d6[3]
204*e1eccf28SAndroid Build Coastguard Worker  vertfetch_noclamp 26, d6[2]
205*e1eccf28SAndroid Build Coastguard Worker  vertfetch_noclamp 25, d6[1]
206*e1eccf28SAndroid Build Coastguard Worker  vertfetch_noclamp 24, d6[0]
207*e1eccf28SAndroid Build Coastguard Worker  vertfetch_noclamp 23, d5[3]
208*e1eccf28SAndroid Build Coastguard Worker  vertfetch_noclamp 22, d5[2]
209*e1eccf28SAndroid Build Coastguard Worker  vertfetch_noclamp 21, d5[1]
210*e1eccf28SAndroid Build Coastguard Worker  vertfetch_noclamp 20, d5[0]
211*e1eccf28SAndroid Build Coastguard Worker  vertfetch_noclamp 19, d4[3]
212*e1eccf28SAndroid Build Coastguard Worker  vertfetch_noclamp 18, d4[2]
213*e1eccf28SAndroid Build Coastguard Worker  vertfetch_noclamp 17, d4[1]
214*e1eccf28SAndroid Build Coastguard Worker  vertfetch_noclamp 16, d4[0]
215*e1eccf28SAndroid Build Coastguard Worker  vertfetch_noclamp 15, d3[3]
216*e1eccf28SAndroid Build Coastguard Worker  vertfetch_noclamp 14, d3[2]
217*e1eccf28SAndroid Build Coastguard Worker  vertfetch_noclamp 13, d3[1]
218*e1eccf28SAndroid Build Coastguard Worker  vertfetch_noclamp 12, d3[0]
219*e1eccf28SAndroid Build Coastguard Worker  vertfetch_noclamp 11, d2[3]
220*e1eccf28SAndroid Build Coastguard Worker  vertfetch_noclamp 10, d2[2]
221*e1eccf28SAndroid Build Coastguard Worker  vertfetch_noclamp  9, d2[1]
222*e1eccf28SAndroid Build Coastguard Worker  vertfetch_noclamp  8, d2[0]
223*e1eccf28SAndroid Build Coastguard Worker  vertfetch_noclamp  7, d1[3]
224*e1eccf28SAndroid Build Coastguard Worker  vertfetch_noclamp  6, d1[2]
225*e1eccf28SAndroid Build Coastguard Worker  vertfetch_noclamp  5, d1[1]
226*e1eccf28SAndroid Build Coastguard Worker  vertfetch_noclamp  4, d1[0]
227*e1eccf28SAndroid Build Coastguard Worker  vertfetch_noclamp  3, d0[3]
228*e1eccf28SAndroid Build Coastguard Worker  vertfetch_noclamp  2, d0[2]
229*e1eccf28SAndroid Build Coastguard Worker  vertfetch_noclamp  1, d0[1]
230*e1eccf28SAndroid Build Coastguard Worker  vertfetch_noclamp  0, d0[0]
231*e1eccf28SAndroid Build Coastguard Worker  \labelnc :
232*e1eccf28SAndroid Build Coastguard Worker
233*e1eccf28SAndroid Build Coastguard Worker  .purgem vertfetch_clamped
234*e1eccf28SAndroid Build Coastguard Worker  .purgem vertfetch_noclamp
235*e1eccf28SAndroid Build Coastguard Worker
236*e1eccf28SAndroid Build Coastguard Worker  2:        vqrshrn.u32 d20, q12, #16 - FRACTION_BITS
237*e1eccf28SAndroid Build Coastguard Worker            vqrshrn.u32 d21, q13, #16 - FRACTION_BITS
238*e1eccf28SAndroid Build Coastguard Worker            vqrshrn.u32 d22, q14, #16 - FRACTION_BITS
239*e1eccf28SAndroid Build Coastguard Worker            vqrshrn.u32 d23, q15, #16 - FRACTION_BITS
240*e1eccf28SAndroid Build Coastguard Worker.endm /*}}}*/
241*e1eccf28SAndroid Build Coastguard Worker
242*e1eccf28SAndroid Build Coastguard Worker/* Some portion of the convolution window (as much as will fit, and all of it
243*e1eccf28SAndroid Build Coastguard Worker * for the uchar1 cases) is kept in the register file to avoid unnecessary
244*e1eccf28SAndroid Build Coastguard Worker * memory accesses.  This forces the horizontal loops to be unrolled because
245*e1eccf28SAndroid Build Coastguard Worker * there's no indexed addressing into the register file.
246*e1eccf28SAndroid Build Coastguard Worker *
247*e1eccf28SAndroid Build Coastguard Worker * As in the fetch macro, the operations are ordered from outside to inside, so
248*e1eccf28SAndroid Build Coastguard Worker * that jumping into the middle of the block bypasses the unwanted window taps.
249*e1eccf28SAndroid Build Coastguard Worker *
250*e1eccf28SAndroid Build Coastguard Worker * There are several variants of the macro because of the fixed offets of the
251*e1eccf28SAndroid Build Coastguard Worker * taps -- the wider the maximum radius the further the centre tap is from the
252*e1eccf28SAndroid Build Coastguard Worker * most recently fetched data.  This means that pre-filling the window requires
253*e1eccf28SAndroid Build Coastguard Worker * more data that won't be used and it means that rotating the window involves
254*e1eccf28SAndroid Build Coastguard Worker * more mov operations.
255*e1eccf28SAndroid Build Coastguard Worker *
256*e1eccf28SAndroid Build Coastguard Worker * When the buffer gets too big the buffer at [r9] is used.
257*e1eccf28SAndroid Build Coastguard Worker *
258*e1eccf28SAndroid Build Coastguard Worker * Input:
259*e1eccf28SAndroid Build Coastguard Worker *      q4-q11 -- convoltion window
260*e1eccf28SAndroid Build Coastguard Worker *      r9 -- pointer to additional convolution window data
261*e1eccf28SAndroid Build Coastguard Worker * Output:
262*e1eccf28SAndroid Build Coastguard Worker *      r9 -- updated buffer pointer (if used)
263*e1eccf28SAndroid Build Coastguard Worker *      d31 -- result to be stored
264*e1eccf28SAndroid Build Coastguard Worker * Modifies:
265*e1eccf28SAndroid Build Coastguard Worker *      r12 -- temp buffer pointer
266*e1eccf28SAndroid Build Coastguard Worker *      q12-q13 -- temporaries for load and vext operations.
267*e1eccf28SAndroid Build Coastguard Worker *      q14-q15 -- intermediate sums
268*e1eccf28SAndroid Build Coastguard Worker */
269*e1eccf28SAndroid Build Coastguard Worker#define TUNED_LIST1 8, 16
270*e1eccf28SAndroid Build Coastguard Worker.macro hconv1_8/*{{{*/
271*e1eccf28SAndroid Build Coastguard Worker            vmull.u16   q14, d18, d0[0]
272*e1eccf28SAndroid Build Coastguard Worker            vmull.u16   q15, d19, d0[0]
273*e1eccf28SAndroid Build Coastguard Worker
274*e1eccf28SAndroid Build Coastguard Worker            ldr         r12, [pc, r5, LSL #2]
275*e1eccf28SAndroid Build Coastguard Worker            add         pc, pc, r12
276*e1eccf28SAndroid Build Coastguard Worker            bkpt
277*e1eccf28SAndroid Build Coastguard Worker    100:    .word 101f-100b
278*e1eccf28SAndroid Build Coastguard Worker            .word 102f-100b
279*e1eccf28SAndroid Build Coastguard Worker            .word 103f-100b
280*e1eccf28SAndroid Build Coastguard Worker            .word 104f-100b
281*e1eccf28SAndroid Build Coastguard Worker            .word 105f-100b
282*e1eccf28SAndroid Build Coastguard Worker            .word 106f-100b
283*e1eccf28SAndroid Build Coastguard Worker            .word 107f-100b
284*e1eccf28SAndroid Build Coastguard Worker            .word 108f-100b
285*e1eccf28SAndroid Build Coastguard Worker    108:    vmlal.u16   q14, d16, d2[0]
286*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d17, d2[0]
287*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d20, d2[0]
288*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d21, d2[0]
289*e1eccf28SAndroid Build Coastguard Worker    107:    vext.u16    q12, q8, q9, #1
290*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q9, q10, #7
291*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d1[3]
292*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d1[3]
293*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d1[3]
294*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d1[3]
295*e1eccf28SAndroid Build Coastguard Worker    106:    vext.u16    q12, q8, q9, #2
296*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q9, q10, #6
297*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d1[2]
298*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d1[2]
299*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d1[2]
300*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d1[2]
301*e1eccf28SAndroid Build Coastguard Worker    105:    vext.u16    q12, q8, q9, #3
302*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q9, q10, #5
303*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d1[1]
304*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d1[1]
305*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d1[1]
306*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d1[1]
307*e1eccf28SAndroid Build Coastguard Worker    104:    //vext.u16    q12, q8, q9, #4
308*e1eccf28SAndroid Build Coastguard Worker            //vext.u16    q13, q9, q10, #4
309*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d17, d1[0]
310*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d18, d1[0]
311*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d19, d1[0]
312*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d20, d1[0]
313*e1eccf28SAndroid Build Coastguard Worker    103:    vext.u16    q12, q8, q9, #5
314*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q9, q10, #3
315*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d0[3]
316*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d0[3]
317*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d0[3]
318*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d0[3]
319*e1eccf28SAndroid Build Coastguard Worker    102:    vext.u16    q12, q8, q9, #6
320*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q9, q10, #2
321*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d0[2]
322*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d0[2]
323*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d0[2]
324*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d0[2]
325*e1eccf28SAndroid Build Coastguard Worker    101:    vext.u16    q12, q8, q9, #7
326*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q9, q10, #1
327*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d0[1]
328*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d0[1]
329*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d0[1]
330*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d0[1]
331*e1eccf28SAndroid Build Coastguard Worker
332*e1eccf28SAndroid Build Coastguard Worker            vqrshrn.u32 d28, q14, #16
333*e1eccf28SAndroid Build Coastguard Worker            vqrshrn.u32 d29, q15, #16
334*e1eccf28SAndroid Build Coastguard Worker            vqrshrn.u16 d31, q14, #FRACTION_BITS
335*e1eccf28SAndroid Build Coastguard Worker
336*e1eccf28SAndroid Build Coastguard Worker            vmov        q8, q9
337*e1eccf28SAndroid Build Coastguard Worker            vmov        q9, q10
338*e1eccf28SAndroid Build Coastguard Worker            vmov        q10, q11
339*e1eccf28SAndroid Build Coastguard Worker.endm/*}}}*/
340*e1eccf28SAndroid Build Coastguard Worker
341*e1eccf28SAndroid Build Coastguard Worker.macro hconv1_16/*{{{*/
342*e1eccf28SAndroid Build Coastguard Worker            vmull.u16   q14, d16, d0[0]
343*e1eccf28SAndroid Build Coastguard Worker            vmull.u16   q15, d17, d0[0]
344*e1eccf28SAndroid Build Coastguard Worker
345*e1eccf28SAndroid Build Coastguard Worker            ldr         r12, [pc, r5, LSL #2]
346*e1eccf28SAndroid Build Coastguard Worker            add         pc, pc, r12
347*e1eccf28SAndroid Build Coastguard Worker            bkpt
348*e1eccf28SAndroid Build Coastguard Worker    100:    .word 101f-100b
349*e1eccf28SAndroid Build Coastguard Worker            .word 102f-100b
350*e1eccf28SAndroid Build Coastguard Worker            .word 103f-100b
351*e1eccf28SAndroid Build Coastguard Worker            .word 104f-100b
352*e1eccf28SAndroid Build Coastguard Worker            .word 105f-100b
353*e1eccf28SAndroid Build Coastguard Worker            .word 106f-100b
354*e1eccf28SAndroid Build Coastguard Worker            .word 107f-100b
355*e1eccf28SAndroid Build Coastguard Worker            .word 108f-100b
356*e1eccf28SAndroid Build Coastguard Worker            .word 109f-100b
357*e1eccf28SAndroid Build Coastguard Worker            .word 110f-100b
358*e1eccf28SAndroid Build Coastguard Worker            .word 111f-100b
359*e1eccf28SAndroid Build Coastguard Worker            .word 112f-100b
360*e1eccf28SAndroid Build Coastguard Worker            .word 113f-100b
361*e1eccf28SAndroid Build Coastguard Worker            .word 114f-100b
362*e1eccf28SAndroid Build Coastguard Worker            .word 115f-100b
363*e1eccf28SAndroid Build Coastguard Worker            .word 116f-100b
364*e1eccf28SAndroid Build Coastguard Worker    116:    //vext.u16    q12, q6, q7, #0
365*e1eccf28SAndroid Build Coastguard Worker            //vext.u16    q13, q10, q11, #0
366*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d12, d4[0]
367*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d13, d4[0]
368*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d20, d4[0]
369*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d21, d4[0]
370*e1eccf28SAndroid Build Coastguard Worker    115:    vext.u16    q12, q6, q7, #1
371*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q9, q10, #7
372*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d3[3]
373*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d3[3]
374*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d3[3]
375*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d3[3]
376*e1eccf28SAndroid Build Coastguard Worker    114:    vext.u16    q12, q6, q7, #2
377*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q9, q10, #6
378*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d3[2]
379*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d3[2]
380*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d3[2]
381*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d3[2]
382*e1eccf28SAndroid Build Coastguard Worker    113:    vext.u16    q12, q6, q7, #3
383*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q9, q10, #5
384*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d3[1]
385*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d3[1]
386*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d3[1]
387*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d3[1]
388*e1eccf28SAndroid Build Coastguard Worker    112:    //vext.u16    q12, q6, q7, #4
389*e1eccf28SAndroid Build Coastguard Worker            //vext.u16    q13, q9, q10, #4
390*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d13, d3[0]
391*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d14, d3[0]
392*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d19, d3[0]
393*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d20, d3[0]
394*e1eccf28SAndroid Build Coastguard Worker    111:    vext.u16    q12, q6, q7, #5
395*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q9, q10, #3
396*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d2[3]
397*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d2[3]
398*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d2[3]
399*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d2[3]
400*e1eccf28SAndroid Build Coastguard Worker    110:    vext.u16    q12, q6, q7, #6
401*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q9, q10, #2
402*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d2[2]
403*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d2[2]
404*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d2[2]
405*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d2[2]
406*e1eccf28SAndroid Build Coastguard Worker    109:    vext.u16    q12, q6, q7, #7
407*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q9, q10, #1
408*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d2[1]
409*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d2[1]
410*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d2[1]
411*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d2[1]
412*e1eccf28SAndroid Build Coastguard Worker    108:    //vext.u16    q12, q7, q8, #0
413*e1eccf28SAndroid Build Coastguard Worker            //vext.u16    q13, q9, q10, #0
414*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d14, d2[0]
415*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d15, d2[0]
416*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d18, d2[0]
417*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d19, d2[0]
418*e1eccf28SAndroid Build Coastguard Worker    107:    vext.u16    q12, q7, q8, #1
419*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q8, q9, #7
420*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d1[3]
421*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d1[3]
422*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d1[3]
423*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d1[3]
424*e1eccf28SAndroid Build Coastguard Worker    106:    vext.u16    q12, q7, q8, #2
425*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q8, q9, #6
426*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d1[2]
427*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d1[2]
428*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d1[2]
429*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d1[2]
430*e1eccf28SAndroid Build Coastguard Worker    105:    vext.u16    q12, q7, q8, #3
431*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q8, q9, #5
432*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d1[1]
433*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d1[1]
434*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d1[1]
435*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d1[1]
436*e1eccf28SAndroid Build Coastguard Worker    104:    //vext.u16    q12, q7, q8, #4
437*e1eccf28SAndroid Build Coastguard Worker            //vext.u16    q13, q8, q9, #4
438*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d15, d1[0]
439*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d16, d1[0]
440*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d17, d1[0]
441*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d18, d1[0]
442*e1eccf28SAndroid Build Coastguard Worker    103:    vext.u16    q12, q7, q8, #5
443*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q8, q9, #3
444*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d0[3]
445*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d0[3]
446*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d0[3]
447*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d0[3]
448*e1eccf28SAndroid Build Coastguard Worker    102:    vext.u16    q12, q7, q8, #6
449*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q8, q9, #2
450*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d0[2]
451*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d0[2]
452*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d0[2]
453*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d0[2]
454*e1eccf28SAndroid Build Coastguard Worker    101:    vext.u16    q12, q7, q8, #7
455*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q8, q9, #1
456*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d0[1]
457*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d0[1]
458*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d0[1]
459*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d0[1]
460*e1eccf28SAndroid Build Coastguard Worker
461*e1eccf28SAndroid Build Coastguard Worker            vqrshrn.u32 d28, q14, #16
462*e1eccf28SAndroid Build Coastguard Worker            vqrshrn.u32 d29, q15, #16
463*e1eccf28SAndroid Build Coastguard Worker            vqrshrn.u16 d31, q14, #FRACTION_BITS
464*e1eccf28SAndroid Build Coastguard Worker
465*e1eccf28SAndroid Build Coastguard Worker            vmov        q6, q7
466*e1eccf28SAndroid Build Coastguard Worker            vmov        q7, q8
467*e1eccf28SAndroid Build Coastguard Worker            vmov        q8, q9
468*e1eccf28SAndroid Build Coastguard Worker            vmov        q9, q10
469*e1eccf28SAndroid Build Coastguard Worker            vmov        q10, q11
470*e1eccf28SAndroid Build Coastguard Worker.endm/*}}}*/
471*e1eccf28SAndroid Build Coastguard Worker
472*e1eccf28SAndroid Build Coastguard Worker.macro hconv1_25/*{{{*/
473*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q12, q6, q7, #7
474*e1eccf28SAndroid Build Coastguard Worker            vmull.u16   q14, d24, d0[0]
475*e1eccf28SAndroid Build Coastguard Worker            vmull.u16   q15, d25, d0[0]
476*e1eccf28SAndroid Build Coastguard Worker
477*e1eccf28SAndroid Build Coastguard Worker            ldr         r12, [pc, r5, LSL #2]
478*e1eccf28SAndroid Build Coastguard Worker            add         pc, pc, r12
479*e1eccf28SAndroid Build Coastguard Worker            bkpt
480*e1eccf28SAndroid Build Coastguard Worker    100:    .word 101f-100b
481*e1eccf28SAndroid Build Coastguard Worker            .word 102f-100b
482*e1eccf28SAndroid Build Coastguard Worker            .word 103f-100b
483*e1eccf28SAndroid Build Coastguard Worker            .word 104f-100b
484*e1eccf28SAndroid Build Coastguard Worker            .word 105f-100b
485*e1eccf28SAndroid Build Coastguard Worker            .word 106f-100b
486*e1eccf28SAndroid Build Coastguard Worker            .word 107f-100b
487*e1eccf28SAndroid Build Coastguard Worker            .word 108f-100b
488*e1eccf28SAndroid Build Coastguard Worker            .word 109f-100b
489*e1eccf28SAndroid Build Coastguard Worker            .word 110f-100b
490*e1eccf28SAndroid Build Coastguard Worker            .word 111f-100b
491*e1eccf28SAndroid Build Coastguard Worker            .word 112f-100b
492*e1eccf28SAndroid Build Coastguard Worker            .word 113f-100b
493*e1eccf28SAndroid Build Coastguard Worker            .word 114f-100b
494*e1eccf28SAndroid Build Coastguard Worker            .word 115f-100b
495*e1eccf28SAndroid Build Coastguard Worker            .word 116f-100b
496*e1eccf28SAndroid Build Coastguard Worker            .word 117f-100b
497*e1eccf28SAndroid Build Coastguard Worker            .word 118f-100b
498*e1eccf28SAndroid Build Coastguard Worker            .word 119f-100b
499*e1eccf28SAndroid Build Coastguard Worker            .word 120f-100b
500*e1eccf28SAndroid Build Coastguard Worker            .word 121f-100b
501*e1eccf28SAndroid Build Coastguard Worker            .word 122f-100b
502*e1eccf28SAndroid Build Coastguard Worker            .word 123f-100b
503*e1eccf28SAndroid Build Coastguard Worker            .word 124f-100b
504*e1eccf28SAndroid Build Coastguard Worker            .word 125f-100b
505*e1eccf28SAndroid Build Coastguard Worker    125:    vext.u16    q12, q3, q4, #6
506*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q10, q11, #0
507*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d6[1]
508*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d6[1]
509*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d6[1]
510*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d6[1]
511*e1eccf28SAndroid Build Coastguard Worker    124:    vext.u16    q12, q3, q4, #7
512*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q9, q10, #7
513*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d6[0]
514*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d6[0]
515*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d6[0]
516*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d6[0]
517*e1eccf28SAndroid Build Coastguard Worker    123:    vext.u16    q12, q4, q5, #0
518*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q9, q10, #6
519*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d5[3]
520*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d5[3]
521*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d5[3]
522*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d5[3]
523*e1eccf28SAndroid Build Coastguard Worker    122:    vext.u16    q12, q4, q5, #1
524*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q9, q10, #5
525*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d5[2]
526*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d5[2]
527*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d5[2]
528*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d5[2]
529*e1eccf28SAndroid Build Coastguard Worker    121:    vext.u16    q12, q4, q5, #2
530*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q9, q10, #4
531*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d5[1]
532*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d5[1]
533*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d5[1]
534*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d5[1]
535*e1eccf28SAndroid Build Coastguard Worker    120:    vext.u16    q12, q4, q5, #3
536*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q9, q10, #3
537*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d5[0]
538*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d5[0]
539*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d5[0]
540*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d5[0]
541*e1eccf28SAndroid Build Coastguard Worker    119:    vext.u16    q12, q4, q5, #4
542*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q9, q10, #2
543*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d4[3]
544*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d4[3]
545*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d4[3]
546*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d4[3]
547*e1eccf28SAndroid Build Coastguard Worker    118:    vext.u16    q12, q4, q5, #5
548*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q9, q10, #1
549*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d4[2]
550*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d4[2]
551*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d4[2]
552*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d4[2]
553*e1eccf28SAndroid Build Coastguard Worker    117:    vext.u16    q12, q4, q5, #6
554*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q9, q10, #0
555*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d4[1]
556*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d4[1]
557*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d4[1]
558*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d4[1]
559*e1eccf28SAndroid Build Coastguard Worker    116:    vext.u16    q12, q4, q5, #7
560*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q8, q9, #7
561*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d4[0]
562*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d4[0]
563*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d4[0]
564*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d4[0]
565*e1eccf28SAndroid Build Coastguard Worker    115:    vext.u16    q12, q5, q6, #0
566*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q8, q9, #6
567*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d3[3]
568*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d3[3]
569*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d3[3]
570*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d3[3]
571*e1eccf28SAndroid Build Coastguard Worker    114:    vext.u16    q12, q5, q6, #1
572*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q8, q9, #5
573*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d3[2]
574*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d3[2]
575*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d3[2]
576*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d3[2]
577*e1eccf28SAndroid Build Coastguard Worker    113:    vext.u16    q12, q5, q6, #2
578*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q8, q9, #4
579*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d3[1]
580*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d3[1]
581*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d3[1]
582*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d3[1]
583*e1eccf28SAndroid Build Coastguard Worker    112:    vext.u16    q12, q5, q6, #3
584*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q8, q9, #3
585*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d3[0]
586*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d3[0]
587*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d3[0]
588*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d3[0]
589*e1eccf28SAndroid Build Coastguard Worker    111:    vext.u16    q12, q5, q6, #4
590*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q8, q9, #2
591*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d2[3]
592*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d2[3]
593*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d2[3]
594*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d2[3]
595*e1eccf28SAndroid Build Coastguard Worker    110:    vext.u16    q12, q5, q6, #5
596*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q8, q9, #1
597*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d2[2]
598*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d2[2]
599*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d2[2]
600*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d2[2]
601*e1eccf28SAndroid Build Coastguard Worker    109:    vext.u16    q12, q5, q6, #6
602*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q8, q9, #0
603*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d2[1]
604*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d2[1]
605*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d2[1]
606*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d2[1]
607*e1eccf28SAndroid Build Coastguard Worker    108:    vext.u16    q12, q5, q6, #7
608*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q7, q8, #7
609*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d2[0]
610*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d2[0]
611*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d2[0]
612*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d2[0]
613*e1eccf28SAndroid Build Coastguard Worker    107:    vext.u16    q12, q6, q7, #0
614*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q7, q8, #6
615*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d1[3]
616*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d1[3]
617*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d1[3]
618*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d1[3]
619*e1eccf28SAndroid Build Coastguard Worker    106:    vext.u16    q12, q6, q7, #1
620*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q7, q8, #5
621*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d1[2]
622*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d1[2]
623*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d1[2]
624*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d1[2]
625*e1eccf28SAndroid Build Coastguard Worker    105:    vext.u16    q12, q6, q7, #2
626*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q7, q8, #4
627*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d1[1]
628*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d1[1]
629*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d1[1]
630*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d1[1]
631*e1eccf28SAndroid Build Coastguard Worker    104:    vext.u16    q12, q6, q7, #3
632*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q7, q8, #3
633*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d1[0]
634*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d1[0]
635*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d1[0]
636*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d1[0]
637*e1eccf28SAndroid Build Coastguard Worker    103:    vext.u16    q12, q6, q7, #4
638*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q7, q8, #2
639*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d0[3]
640*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d0[3]
641*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d0[3]
642*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d0[3]
643*e1eccf28SAndroid Build Coastguard Worker    102:    vext.u16    q12, q6, q7, #5
644*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q7, q8, #1
645*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d0[2]
646*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d0[2]
647*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d0[2]
648*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d0[2]
649*e1eccf28SAndroid Build Coastguard Worker    101:    vext.u16    q12, q6, q7, #6
650*e1eccf28SAndroid Build Coastguard Worker            vext.u16    q13, q7, q8, #0
651*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d0[1]
652*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d0[1]
653*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d0[1]
654*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d0[1]
655*e1eccf28SAndroid Build Coastguard Worker
656*e1eccf28SAndroid Build Coastguard Worker            vqrshrn.u32 d28, q14, #16
657*e1eccf28SAndroid Build Coastguard Worker            vqrshrn.u32 d29, q15, #16
658*e1eccf28SAndroid Build Coastguard Worker            vqrshrn.u16 d31, q14, #FRACTION_BITS
659*e1eccf28SAndroid Build Coastguard Worker
660*e1eccf28SAndroid Build Coastguard Worker            vmov        d7, d9
661*e1eccf28SAndroid Build Coastguard Worker            vmov        q4, q5
662*e1eccf28SAndroid Build Coastguard Worker            vmov        q5, q6
663*e1eccf28SAndroid Build Coastguard Worker            vmov        q6, q7
664*e1eccf28SAndroid Build Coastguard Worker            vmov        q7, q8
665*e1eccf28SAndroid Build Coastguard Worker            vmov        q8, q9
666*e1eccf28SAndroid Build Coastguard Worker            vmov        q9, q10
667*e1eccf28SAndroid Build Coastguard Worker            vmov        q10, q11
668*e1eccf28SAndroid Build Coastguard Worker.endm/*}}}*/
669*e1eccf28SAndroid Build Coastguard Worker
670*e1eccf28SAndroid Build Coastguard Worker#define TUNED_LIST4 6, 12
671*e1eccf28SAndroid Build Coastguard Worker.macro hconv4_6/*{{{*/
672*e1eccf28SAndroid Build Coastguard Worker            vmull.u16   q14, d14, d0[0]
673*e1eccf28SAndroid Build Coastguard Worker            vmull.u16   q15, d15, d0[0]
674*e1eccf28SAndroid Build Coastguard Worker
675*e1eccf28SAndroid Build Coastguard Worker            ldr         r12, [pc, r5, LSL #2]
676*e1eccf28SAndroid Build Coastguard Worker            add         pc, pc, r12
677*e1eccf28SAndroid Build Coastguard Worker            bkpt
678*e1eccf28SAndroid Build Coastguard Worker    100:    .word 101f-100b
679*e1eccf28SAndroid Build Coastguard Worker            .word 102f-100b
680*e1eccf28SAndroid Build Coastguard Worker            .word 103f-100b
681*e1eccf28SAndroid Build Coastguard Worker            .word 104f-100b
682*e1eccf28SAndroid Build Coastguard Worker            .word 105f-100b
683*e1eccf28SAndroid Build Coastguard Worker            .word 106f-100b
684*e1eccf28SAndroid Build Coastguard Worker    106:    vmlal.u16   q14, d8,  d1[2]
685*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d9,  d1[2]
686*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d20, d1[2]
687*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d21, d1[2]
688*e1eccf28SAndroid Build Coastguard Worker    105:    vmlal.u16   q14, d9,  d1[1]
689*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d10, d1[1]
690*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d19, d1[1]
691*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d20, d1[1]
692*e1eccf28SAndroid Build Coastguard Worker    104:    vmlal.u16   q14, d10, d1[0]
693*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d11, d1[0]
694*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d18, d1[0]
695*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d19, d1[0]
696*e1eccf28SAndroid Build Coastguard Worker    103:    vmlal.u16   q14, d11, d0[3]
697*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d12, d0[3]
698*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d17, d0[3]
699*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d18, d0[3]
700*e1eccf28SAndroid Build Coastguard Worker    102:    vmlal.u16   q14, d12, d0[2]
701*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d13, d0[2]
702*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d16, d0[2]
703*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d17, d0[2]
704*e1eccf28SAndroid Build Coastguard Worker    101:    vmlal.u16   q14, d13, d0[1]
705*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d14, d0[1]
706*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d15, d0[1]
707*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d16, d0[1]
708*e1eccf28SAndroid Build Coastguard Worker
709*e1eccf28SAndroid Build Coastguard Worker            vqrshrn.u32 d28, q14, #16
710*e1eccf28SAndroid Build Coastguard Worker            vqrshrn.u32 d29, q15, #16
711*e1eccf28SAndroid Build Coastguard Worker            vqrshrn.u16 d31, q14, #FRACTION_BITS
712*e1eccf28SAndroid Build Coastguard Worker
713*e1eccf28SAndroid Build Coastguard Worker            vmov        q4, q5
714*e1eccf28SAndroid Build Coastguard Worker            vmov        q5, q6
715*e1eccf28SAndroid Build Coastguard Worker            vmov        q6, q7
716*e1eccf28SAndroid Build Coastguard Worker            vmov        q7, q8
717*e1eccf28SAndroid Build Coastguard Worker            vmov        q8, q9
718*e1eccf28SAndroid Build Coastguard Worker            vmov        q9, q10
719*e1eccf28SAndroid Build Coastguard Worker            vmov        q10, q11
720*e1eccf28SAndroid Build Coastguard Worker.endm/*}}}*/
721*e1eccf28SAndroid Build Coastguard Worker
722*e1eccf28SAndroid Build Coastguard Worker.macro hconv4_12/*{{{*/
723*e1eccf28SAndroid Build Coastguard Worker            vmull.u16   q14, d8, d0[0]
724*e1eccf28SAndroid Build Coastguard Worker            vmull.u16   q15, d9, d0[0]
725*e1eccf28SAndroid Build Coastguard Worker
726*e1eccf28SAndroid Build Coastguard Worker            ldr         r12, [pc, r5, LSL #2]
727*e1eccf28SAndroid Build Coastguard Worker            add         pc, pc, r12
728*e1eccf28SAndroid Build Coastguard Worker            bkpt
729*e1eccf28SAndroid Build Coastguard Worker    100:    .word 101f-100b
730*e1eccf28SAndroid Build Coastguard Worker            .word 102f-100b
731*e1eccf28SAndroid Build Coastguard Worker            .word 103f-100b
732*e1eccf28SAndroid Build Coastguard Worker            .word 104f-100b
733*e1eccf28SAndroid Build Coastguard Worker            .word 105f-100b
734*e1eccf28SAndroid Build Coastguard Worker            .word 106f-100b
735*e1eccf28SAndroid Build Coastguard Worker            .word 107f-100b
736*e1eccf28SAndroid Build Coastguard Worker            .word 108f-100b
737*e1eccf28SAndroid Build Coastguard Worker            .word 109f-100b
738*e1eccf28SAndroid Build Coastguard Worker            .word 110f-100b
739*e1eccf28SAndroid Build Coastguard Worker            .word 111f-100b
740*e1eccf28SAndroid Build Coastguard Worker            .word 112f-100b
741*e1eccf28SAndroid Build Coastguard Worker    112:    add         r12, r9, #0x1a0
742*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
743*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24,d25}, [r12:128]
744*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d3[0]
745*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d3[0]
746*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d20, d3[0]
747*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d21, d3[0]
748*e1eccf28SAndroid Build Coastguard Worker    111:    add         r12, r9, #0x1a8
749*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
750*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24}, [r12:64]!
751*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
752*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d25}, [r12:64]
753*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d2[3]
754*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d2[3]
755*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d19, d2[3]
756*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d20, d2[3]
757*e1eccf28SAndroid Build Coastguard Worker    110:    add         r12, r9, #0x1b0
758*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
759*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24,d25}, [r12:128]
760*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d2[2]
761*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d2[2]
762*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d18, d2[2]
763*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d19, d2[2]
764*e1eccf28SAndroid Build Coastguard Worker    109:    add         r12, r9, #0x1b8
765*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
766*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24}, [r12:64]!
767*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
768*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d25}, [r12:64]
769*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d2[1]
770*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d2[1]
771*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d17, d2[1]
772*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d18, d2[1]
773*e1eccf28SAndroid Build Coastguard Worker    108:    add         r12, r9, #0x1c0
774*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
775*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24,d25}, [r12:128]
776*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d2[0]
777*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d2[0]
778*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d16, d2[0]
779*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d17, d2[0]
780*e1eccf28SAndroid Build Coastguard Worker    107:    add         r12, r9, #0x1c8
781*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
782*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24}, [r12:64]!
783*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
784*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d25}, [r12:64]
785*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d1[3]
786*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d1[3]
787*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d15, d1[3]
788*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d16, d1[3]
789*e1eccf28SAndroid Build Coastguard Worker    106:    add         r12, r9, #0x1d0
790*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
791*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24,d25}, [r12:128]
792*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d1[2]
793*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d1[2]
794*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d14, d1[2]
795*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d15, d1[2]
796*e1eccf28SAndroid Build Coastguard Worker    105:    add         r12, r9, #0x1d8
797*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
798*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24}, [r12:64]!
799*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
800*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d25}, [r12:64]
801*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d1[1]
802*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d1[1]
803*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d13, d1[1]
804*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d14, d1[1]
805*e1eccf28SAndroid Build Coastguard Worker    104:    add         r12, r9, #0x1e0
806*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
807*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24,d25}, [r12:128]
808*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d1[0]
809*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d1[0]
810*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d12, d1[0]
811*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d13, d1[0]
812*e1eccf28SAndroid Build Coastguard Worker    103:    add         r12, r9, #0x1e8
813*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
814*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24}, [r12:64]!
815*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
816*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d25}, [r12:64]
817*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d0[3]
818*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d0[3]
819*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d11, d0[3]
820*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d12, d0[3]
821*e1eccf28SAndroid Build Coastguard Worker    102:    add         r12, r9, #0x1f0
822*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
823*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24,d25}, [r12:128]
824*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d0[2]
825*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d0[2]
826*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d10, d0[2]
827*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d11, d0[2]
828*e1eccf28SAndroid Build Coastguard Worker    101:    add         r12, r9, #0x1f8
829*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
830*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24}, [r12:64]
831*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d0[1]
832*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d8,  d0[1]
833*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d9,  d0[1]
834*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d10, d0[1]
835*e1eccf28SAndroid Build Coastguard Worker
836*e1eccf28SAndroid Build Coastguard Worker            vqrshrn.u32 d28, q14, #16
837*e1eccf28SAndroid Build Coastguard Worker            vqrshrn.u32 d29, q15, #16
838*e1eccf28SAndroid Build Coastguard Worker            vqrshrn.u16 d31, q14, #FRACTION_BITS
839*e1eccf28SAndroid Build Coastguard Worker
840*e1eccf28SAndroid Build Coastguard Worker            vst1.u8     {q4}, [r9:128]!
841*e1eccf28SAndroid Build Coastguard Worker            bic         r9, r9, #0x200
842*e1eccf28SAndroid Build Coastguard Worker            vmov        q4, q5
843*e1eccf28SAndroid Build Coastguard Worker            vmov        q5, q6
844*e1eccf28SAndroid Build Coastguard Worker            vmov        q6, q7
845*e1eccf28SAndroid Build Coastguard Worker            vmov        q7, q8
846*e1eccf28SAndroid Build Coastguard Worker            vmov        q8, q9
847*e1eccf28SAndroid Build Coastguard Worker            vmov        q9, q10
848*e1eccf28SAndroid Build Coastguard Worker            vmov        q10, q11
849*e1eccf28SAndroid Build Coastguard Worker.endm/*}}}*/
850*e1eccf28SAndroid Build Coastguard Worker
851*e1eccf28SAndroid Build Coastguard Worker.macro hconv4_25/*{{{*/
852*e1eccf28SAndroid Build Coastguard Worker            add         r12, r9, #0x198
853*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
854*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24}, [r12:64]!
855*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
856*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d25}, [r12:64]
857*e1eccf28SAndroid Build Coastguard Worker            vmull.u16   q14, d24, d0[0]
858*e1eccf28SAndroid Build Coastguard Worker            vmull.u16   q15, d25, d0[0]
859*e1eccf28SAndroid Build Coastguard Worker
860*e1eccf28SAndroid Build Coastguard Worker            ldr         r12, [pc, r5, LSL #2]
861*e1eccf28SAndroid Build Coastguard Worker            add         pc, pc, r12
862*e1eccf28SAndroid Build Coastguard Worker            bkpt
863*e1eccf28SAndroid Build Coastguard Worker    100:    .word 101f-100b
864*e1eccf28SAndroid Build Coastguard Worker            .word 102f-100b
865*e1eccf28SAndroid Build Coastguard Worker            .word 103f-100b
866*e1eccf28SAndroid Build Coastguard Worker            .word 104f-100b
867*e1eccf28SAndroid Build Coastguard Worker            .word 105f-100b
868*e1eccf28SAndroid Build Coastguard Worker            .word 106f-100b
869*e1eccf28SAndroid Build Coastguard Worker            .word 107f-100b
870*e1eccf28SAndroid Build Coastguard Worker            .word 108f-100b
871*e1eccf28SAndroid Build Coastguard Worker            .word 109f-100b
872*e1eccf28SAndroid Build Coastguard Worker            .word 110f-100b
873*e1eccf28SAndroid Build Coastguard Worker            .word 111f-100b
874*e1eccf28SAndroid Build Coastguard Worker            .word 112f-100b
875*e1eccf28SAndroid Build Coastguard Worker            .word 113f-100b
876*e1eccf28SAndroid Build Coastguard Worker            .word 114f-100b
877*e1eccf28SAndroid Build Coastguard Worker            .word 115f-100b
878*e1eccf28SAndroid Build Coastguard Worker            .word 116f-100b
879*e1eccf28SAndroid Build Coastguard Worker            .word 117f-100b
880*e1eccf28SAndroid Build Coastguard Worker            .word 118f-100b
881*e1eccf28SAndroid Build Coastguard Worker            .word 119f-100b
882*e1eccf28SAndroid Build Coastguard Worker            .word 120f-100b
883*e1eccf28SAndroid Build Coastguard Worker            .word 121f-100b
884*e1eccf28SAndroid Build Coastguard Worker            .word 122f-100b
885*e1eccf28SAndroid Build Coastguard Worker            .word 123f-100b
886*e1eccf28SAndroid Build Coastguard Worker            .word 124f-100b
887*e1eccf28SAndroid Build Coastguard Worker            .word 125f-100b
888*e1eccf28SAndroid Build Coastguard Worker    125:    add         r12, r9, #0x0d0
889*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
890*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24,d25}, [r12:128]
891*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d6[1]
892*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d6[1]
893*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d20, d6[1]
894*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d21, d6[1]
895*e1eccf28SAndroid Build Coastguard Worker    124:    add         r12, r9, #0x0d8
896*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
897*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24}, [r12:64]!
898*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
899*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d25}, [r12]
900*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d6[0]
901*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d6[0]
902*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d19, d6[0]
903*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d20, d6[0]
904*e1eccf28SAndroid Build Coastguard Worker    123:    add         r12, r9, #0x0e0
905*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
906*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24,d25}, [r12:128]
907*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d5[3]
908*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d5[3]
909*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d18, d5[3]
910*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d19, d5[3]
911*e1eccf28SAndroid Build Coastguard Worker    122:    add         r12, r9, #0x0e8
912*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
913*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24}, [r12:64]!
914*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
915*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d25}, [r12]
916*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d5[2]
917*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d5[2]
918*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d17, d5[2]
919*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d18, d5[2]
920*e1eccf28SAndroid Build Coastguard Worker    121:    add         r12, r9, #0x0f0
921*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
922*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24,d25}, [r12:128]
923*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d5[1]
924*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d5[1]
925*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d16, d5[1]
926*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d17, d5[1]
927*e1eccf28SAndroid Build Coastguard Worker    120:    add         r12, r9, #0x0f8
928*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
929*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24}, [r12:64]!
930*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
931*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d25}, [r12]
932*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d5[0]
933*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d5[0]
934*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d15, d5[0]
935*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d16, d5[0]
936*e1eccf28SAndroid Build Coastguard Worker    119:    add         r12, r9, #0x100
937*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
938*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24,d25}, [r12:128]
939*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d4[3]
940*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d4[3]
941*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d14, d4[3]
942*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d15, d4[3]
943*e1eccf28SAndroid Build Coastguard Worker    118:    add         r12, r9, #0x108
944*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
945*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24}, [r12:64]!
946*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
947*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d25}, [r12]
948*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d4[2]
949*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d4[2]
950*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d13, d4[2]
951*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d14, d4[2]
952*e1eccf28SAndroid Build Coastguard Worker    117:    add         r12, r9, #0x110
953*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
954*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24,d25}, [r12:128]
955*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d4[1]
956*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d4[1]
957*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d12, d4[1]
958*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d13, d4[1]
959*e1eccf28SAndroid Build Coastguard Worker    116:    add         r12, r9, #0x118
960*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
961*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24}, [r12:64]!
962*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
963*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d25}, [r12]
964*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d4[0]
965*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d4[0]
966*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d11, d4[0]
967*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d12, d4[0]
968*e1eccf28SAndroid Build Coastguard Worker    115:    add         r12, r9, #0x120
969*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
970*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24,d25}, [r12:128]
971*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d3[3]
972*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d3[3]
973*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d10, d3[3]
974*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d11, d3[3]
975*e1eccf28SAndroid Build Coastguard Worker    114:    add         r12, r9, #0x128
976*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
977*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24}, [r12:64]!
978*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
979*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d25}, [r12]
980*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d3[2]
981*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d3[2]
982*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d9,  d3[2]
983*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d10, d3[2]
984*e1eccf28SAndroid Build Coastguard Worker    113:    add         r12, r9, #0x130
985*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
986*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24,d25}, [r12:128]
987*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d3[1]
988*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d3[1]
989*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d8,  d3[1]
990*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d9,  d3[1]
991*e1eccf28SAndroid Build Coastguard Worker    112:    add         r12, r9, #0x138
992*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
993*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24}, [r12:64]!
994*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
995*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d25}, [r12]
996*e1eccf28SAndroid Build Coastguard Worker                                            add         r12, r9, #0x1f8
997*e1eccf28SAndroid Build Coastguard Worker                                            bic         r12, r12, #0x200
998*e1eccf28SAndroid Build Coastguard Worker                                            vld1.u16    {d26}, [r12:64]
999*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d3[0]
1000*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d3[0]
1001*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d3[0]   @ Could be d7, without the load, right?
1002*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d8,  d3[0]
1003*e1eccf28SAndroid Build Coastguard Worker    111:    add         r12, r9, #0x140
1004*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
1005*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24,d25}, [r12:128]
1006*e1eccf28SAndroid Build Coastguard Worker                                            add         r12, r9, #0x1f0
1007*e1eccf28SAndroid Build Coastguard Worker                                            bic         r12, r12, #0x200
1008*e1eccf28SAndroid Build Coastguard Worker                                            vld1.u16    {d26,d27}, [r12:128]
1009*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d2[3]
1010*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d2[3]
1011*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d2[3]
1012*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d2[3]
1013*e1eccf28SAndroid Build Coastguard Worker    110:    add         r12, r9, #0x148
1014*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
1015*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24}, [r12:64]!
1016*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
1017*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d25}, [r12]
1018*e1eccf28SAndroid Build Coastguard Worker                                            add         r12, r9, #0x1e8
1019*e1eccf28SAndroid Build Coastguard Worker                                            bic         r12, r12, #0x200
1020*e1eccf28SAndroid Build Coastguard Worker                                            vld1.u16    {d26}, [r12:64]!
1021*e1eccf28SAndroid Build Coastguard Worker                                            bic         r12, r12, #0x200
1022*e1eccf28SAndroid Build Coastguard Worker                                            vld1.u16    {d27}, [r12:64]
1023*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d2[2]
1024*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d2[2]
1025*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d2[2]
1026*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d2[2]
1027*e1eccf28SAndroid Build Coastguard Worker    109:    add         r12, r9, #0x150
1028*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
1029*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24,d25}, [r12:128]
1030*e1eccf28SAndroid Build Coastguard Worker                                            add         r12, r9, #0x1e0
1031*e1eccf28SAndroid Build Coastguard Worker                                            bic         r12, r12, #0x200
1032*e1eccf28SAndroid Build Coastguard Worker                                            vld1.u16    {d26,d27}, [r12:128]
1033*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d2[1]
1034*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d2[1]
1035*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d2[1]
1036*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d2[1]
1037*e1eccf28SAndroid Build Coastguard Worker    108:    add         r12, r9, #0x158
1038*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
1039*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24}, [r12:64]!
1040*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
1041*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d25}, [r12]
1042*e1eccf28SAndroid Build Coastguard Worker                                            add         r12, r9, #0x1d8
1043*e1eccf28SAndroid Build Coastguard Worker                                            bic         r12, r12, #0x200
1044*e1eccf28SAndroid Build Coastguard Worker                                            vld1.u16    {d26}, [r12:64]!
1045*e1eccf28SAndroid Build Coastguard Worker                                            bic         r12, r12, #0x200
1046*e1eccf28SAndroid Build Coastguard Worker                                            vld1.u16    {d27}, [r12:64]
1047*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d2[0]
1048*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d2[0]
1049*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d2[0]
1050*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d2[0]
1051*e1eccf28SAndroid Build Coastguard Worker    107:    add         r12, r9, #0x160
1052*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
1053*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24,d25}, [r12:128]
1054*e1eccf28SAndroid Build Coastguard Worker                                            add         r12, r9, #0x1d0
1055*e1eccf28SAndroid Build Coastguard Worker                                            bic         r12, r12, #0x200
1056*e1eccf28SAndroid Build Coastguard Worker                                            vld1.u16    {d26,d27}, [r12:128]
1057*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d1[3]
1058*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d1[3]
1059*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d1[3]
1060*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d1[3]
1061*e1eccf28SAndroid Build Coastguard Worker    106:    add         r12, r9, #0x168
1062*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
1063*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24}, [r12:64]!
1064*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
1065*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d25}, [r12]
1066*e1eccf28SAndroid Build Coastguard Worker                                            add         r12, r9, #0x1c8
1067*e1eccf28SAndroid Build Coastguard Worker                                            bic         r12, r12, #0x200
1068*e1eccf28SAndroid Build Coastguard Worker                                            vld1.u16    {d26}, [r12:64]!
1069*e1eccf28SAndroid Build Coastguard Worker                                            bic         r12, r12, #0x200
1070*e1eccf28SAndroid Build Coastguard Worker                                            vld1.u16    {d27}, [r12:64]
1071*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d1[2]
1072*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d1[2]
1073*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d1[2]
1074*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d1[2]
1075*e1eccf28SAndroid Build Coastguard Worker    105:    add         r12, r9, #0x170
1076*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
1077*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24,d25}, [r12:128]
1078*e1eccf28SAndroid Build Coastguard Worker                                            add         r12, r9, #0x1c0
1079*e1eccf28SAndroid Build Coastguard Worker                                            bic         r12, r12, #0x200
1080*e1eccf28SAndroid Build Coastguard Worker                                            vld1.u16    {d26,d27}, [r12:128]
1081*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d1[1]
1082*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d1[1]
1083*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d1[1]
1084*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d1[1]
1085*e1eccf28SAndroid Build Coastguard Worker    104:    add         r12, r9, #0x178
1086*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
1087*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24}, [r12:64]!
1088*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
1089*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d25}, [r12]
1090*e1eccf28SAndroid Build Coastguard Worker                                            add         r12, r9, #0x1b8
1091*e1eccf28SAndroid Build Coastguard Worker                                            bic         r12, r12, #0x200
1092*e1eccf28SAndroid Build Coastguard Worker                                            vld1.u16    {d26}, [r12:64]!
1093*e1eccf28SAndroid Build Coastguard Worker                                            bic         r12, r12, #0x200
1094*e1eccf28SAndroid Build Coastguard Worker                                            vld1.u16    {d27}, [r12:64]
1095*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d1[0]
1096*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d1[0]
1097*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d1[0]
1098*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d1[0]
1099*e1eccf28SAndroid Build Coastguard Worker    103:    add         r12, r9, #0x180
1100*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
1101*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24,d25}, [r12:128]
1102*e1eccf28SAndroid Build Coastguard Worker                                            add         r12, r9, #0x1b0
1103*e1eccf28SAndroid Build Coastguard Worker                                            bic         r12, r12, #0x200
1104*e1eccf28SAndroid Build Coastguard Worker                                            vld1.u16    {d26,d27}, [r12:128]
1105*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d0[3]
1106*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d0[3]
1107*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d0[3]
1108*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d0[3]
1109*e1eccf28SAndroid Build Coastguard Worker    102:    add         r12, r9, #0x188
1110*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
1111*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24}, [r12:64]!
1112*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
1113*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d25}, [r12]
1114*e1eccf28SAndroid Build Coastguard Worker                                            add         r12, r9, #0x1a8
1115*e1eccf28SAndroid Build Coastguard Worker                                            bic         r12, r12, #0x200
1116*e1eccf28SAndroid Build Coastguard Worker                                            vld1.u16    {d26}, [r12:64]!
1117*e1eccf28SAndroid Build Coastguard Worker                                            bic         r12, r12, #0x200
1118*e1eccf28SAndroid Build Coastguard Worker                                            vld1.u16    {d27}, [r12:64]
1119*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d0[2]
1120*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d0[2]
1121*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d0[2]
1122*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d0[2]
1123*e1eccf28SAndroid Build Coastguard Worker    101:    add         r12, r9, #0x190
1124*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
1125*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24,d25}, [r12:128]!
1126*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #0x200
1127*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d26,d27}, [r12:128]
1128*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d24, d0[1]
1129*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d25, d0[1]
1130*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q14, d26, d0[1]
1131*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q15, d27, d0[1]
1132*e1eccf28SAndroid Build Coastguard Worker
1133*e1eccf28SAndroid Build Coastguard Worker            vqrshrn.u32 d28, q14, #16
1134*e1eccf28SAndroid Build Coastguard Worker            vqrshrn.u32 d29, q15, #16
1135*e1eccf28SAndroid Build Coastguard Worker            vqrshrn.u16 d31, q14, #FRACTION_BITS
1136*e1eccf28SAndroid Build Coastguard Worker
1137*e1eccf28SAndroid Build Coastguard Worker            vst1.u8     {q4}, [r9:128]!
1138*e1eccf28SAndroid Build Coastguard Worker            bic         r9, r9, #0x200
1139*e1eccf28SAndroid Build Coastguard Worker            vmov        q4, q5
1140*e1eccf28SAndroid Build Coastguard Worker            vmov        q5, q6
1141*e1eccf28SAndroid Build Coastguard Worker            vmov        q6, q7
1142*e1eccf28SAndroid Build Coastguard Worker            vmov        q7, q8
1143*e1eccf28SAndroid Build Coastguard Worker            vmov        q8, q9
1144*e1eccf28SAndroid Build Coastguard Worker            vmov        q9, q10
1145*e1eccf28SAndroid Build Coastguard Worker            vmov        q10, q11
1146*e1eccf28SAndroid Build Coastguard Worker.endm/*}}}*/
1147*e1eccf28SAndroid Build Coastguard Worker
1148*e1eccf28SAndroid Build Coastguard Worker/* Dedicated function wrapper for the fetch macro, for the cases where
1149*e1eccf28SAndroid Build Coastguard Worker * performance isn't that important, to keep code size down.
1150*e1eccf28SAndroid Build Coastguard Worker */
1151*e1eccf28SAndroid Build Coastguard WorkerPRIVATE(fetch_generic_asm)
1152*e1eccf28SAndroid Build Coastguard Worker            push        {r10,r11}
1153*e1eccf28SAndroid Build Coastguard Worker            fetch
1154*e1eccf28SAndroid Build Coastguard Worker            pop         {r10,r11}
1155*e1eccf28SAndroid Build Coastguard Worker            bx          lr
1156*e1eccf28SAndroid Build Coastguard WorkerEND(fetch_generic_asm)
1157*e1eccf28SAndroid Build Coastguard Worker
1158*e1eccf28SAndroid Build Coastguard Worker
1159*e1eccf28SAndroid Build Coastguard Worker/* Fetch the next (16 - (r10 & 15)) columns of data, avoiding reading memory
1160*e1eccf28SAndroid Build Coastguard Worker * beyond that limit, and filling the rest of the vector with the last legal
1161*e1eccf28SAndroid Build Coastguard Worker * pixel.
1162*e1eccf28SAndroid Build Coastguard Worker * Result is in q10 and q11.  q8 and q9 are filled with the first legal pixel.
1163*e1eccf28SAndroid Build Coastguard Worker * Note: This function can read beyond the right edge of input if the image is
1164*e1eccf28SAndroid Build Coastguard Worker * narrower than 16 bytes.
1165*e1eccf28SAndroid Build Coastguard Worker */
1166*e1eccf28SAndroid Build Coastguard WorkerPRIVATE(fetch_clampleft1)
1167*e1eccf28SAndroid Build Coastguard Worker            push        {r12,lr}
1168*e1eccf28SAndroid Build Coastguard Worker            bl          fetch_generic_asm
1169*e1eccf28SAndroid Build Coastguard Worker            vdup.u16    q8, d20[0]
1170*e1eccf28SAndroid Build Coastguard Worker            vdup.u16    q9, d20[0]
1171*e1eccf28SAndroid Build Coastguard Worker            ands        r12, r10, #15
1172*e1eccf28SAndroid Build Coastguard Worker            beq         1f
1173*e1eccf28SAndroid Build Coastguard Worker            sub         r1, r1, r12
1174*e1eccf28SAndroid Build Coastguard Worker            sub         r10, r10, r12
1175*e1eccf28SAndroid Build Coastguard Worker            sub         sp, sp, #32
1176*e1eccf28SAndroid Build Coastguard Worker            vst1.u16    {q10,q11}, [sp]
1177*e1eccf28SAndroid Build Coastguard Worker            sub         r12, sp, r12, LSL #1
1178*e1eccf28SAndroid Build Coastguard Worker            sub         sp, sp, #32
1179*e1eccf28SAndroid Build Coastguard Worker            vst1.u16    {q8,q9}, [sp]
1180*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {q10,q11}, [r12]
1181*e1eccf28SAndroid Build Coastguard Worker            add         sp, sp, #64
1182*e1eccf28SAndroid Build Coastguard Worker1:          pop         {r12,pc}
1183*e1eccf28SAndroid Build Coastguard WorkerEND(fetch_clampleft1)
1184*e1eccf28SAndroid Build Coastguard Worker
1185*e1eccf28SAndroid Build Coastguard WorkerPRIVATE(fetch_clampleft4)
1186*e1eccf28SAndroid Build Coastguard Worker            push        {r12,lr}
1187*e1eccf28SAndroid Build Coastguard Worker            bl          fetch_generic_asm
1188*e1eccf28SAndroid Build Coastguard Worker            vmov.u16    d16, d20
1189*e1eccf28SAndroid Build Coastguard Worker            vmov.u16    d17, d20
1190*e1eccf28SAndroid Build Coastguard Worker            vmov.u16    d18, d20
1191*e1eccf28SAndroid Build Coastguard Worker            vmov.u16    d19, d20
1192*e1eccf28SAndroid Build Coastguard Worker            ands        r12, r10, #15
1193*e1eccf28SAndroid Build Coastguard Worker            beq         1f
1194*e1eccf28SAndroid Build Coastguard Worker            sub         r1, r1, r12
1195*e1eccf28SAndroid Build Coastguard Worker            sub         r10, r10, r12
1196*e1eccf28SAndroid Build Coastguard Worker            sub         sp, sp, #32
1197*e1eccf28SAndroid Build Coastguard Worker            vst1.u16    {q10-q11}, [sp]
1198*e1eccf28SAndroid Build Coastguard Worker            sub         r12, sp, r12, LSL #1
1199*e1eccf28SAndroid Build Coastguard Worker            sub         sp, sp, #32
1200*e1eccf28SAndroid Build Coastguard Worker            vst1.u16    {q8,q9}, [sp]
1201*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {q10,q11}, [r12]
1202*e1eccf28SAndroid Build Coastguard Worker            add         sp, sp, #64
1203*e1eccf28SAndroid Build Coastguard Worker1:          pop         {r12,pc}
1204*e1eccf28SAndroid Build Coastguard WorkerEND(fetch_clampleft4)
1205*e1eccf28SAndroid Build Coastguard Worker
1206*e1eccf28SAndroid Build Coastguard Worker/* Fetch only the next (r11 & 15) (where 0 means 16) columns of data, avoiding
1207*e1eccf28SAndroid Build Coastguard Worker * reading memory beyond that limit, and filling the rest of the vector with
1208*e1eccf28SAndroid Build Coastguard Worker * the last legal pixel.
1209*e1eccf28SAndroid Build Coastguard Worker * Result is in q10 and q11.  q12 and q13 are filled with the last legal pixel.
1210*e1eccf28SAndroid Build Coastguard Worker * Note: This function can read beyond the left edge of input if the image is
1211*e1eccf28SAndroid Build Coastguard Worker * narrower than 16 bytes.
1212*e1eccf28SAndroid Build Coastguard Worker */
1213*e1eccf28SAndroid Build Coastguard WorkerPRIVATE(fetch_clampright1)
1214*e1eccf28SAndroid Build Coastguard Worker            push        {r12, lr}
1215*e1eccf28SAndroid Build Coastguard Worker            rsb         r12, r11, #0
1216*e1eccf28SAndroid Build Coastguard Worker            ands        r12, r12, #15
1217*e1eccf28SAndroid Build Coastguard Worker            beq         1f
1218*e1eccf28SAndroid Build Coastguard Worker            sub         r1, r1, r12
1219*e1eccf28SAndroid Build Coastguard Worker            bl          fetch_generic_asm
1220*e1eccf28SAndroid Build Coastguard Worker            vdup.u16    q12, d23[3]
1221*e1eccf28SAndroid Build Coastguard Worker            vdup.u16    q13, d23[3]
1222*e1eccf28SAndroid Build Coastguard Worker            rsb         r12, r11, #0
1223*e1eccf28SAndroid Build Coastguard Worker            and         r12, r12, #15
1224*e1eccf28SAndroid Build Coastguard Worker            sub         sp, sp, #32
1225*e1eccf28SAndroid Build Coastguard Worker            vst1.u16    {q12,q13}, [sp]
1226*e1eccf28SAndroid Build Coastguard Worker            sub         sp, sp, #32
1227*e1eccf28SAndroid Build Coastguard Worker            add         r12, sp, r12, LSL #1
1228*e1eccf28SAndroid Build Coastguard Worker            vst1.u16    {q10,q11}, [sp]
1229*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {q10,q11}, [r12]
1230*e1eccf28SAndroid Build Coastguard Worker            add         sp, sp, #64
1231*e1eccf28SAndroid Build Coastguard Worker            pop         {r12,pc}
1232*e1eccf28SAndroid Build Coastguard Worker1:          bl          fetch_generic_asm
1233*e1eccf28SAndroid Build Coastguard Worker            vdup.u16    q12, d23[3]
1234*e1eccf28SAndroid Build Coastguard Worker            vdup.u16    q13, d23[3]
1235*e1eccf28SAndroid Build Coastguard Worker            pop         {r12,pc}
1236*e1eccf28SAndroid Build Coastguard WorkerEND(fetch_clampright1)
1237*e1eccf28SAndroid Build Coastguard Worker
1238*e1eccf28SAndroid Build Coastguard WorkerPRIVATE(fetch_clampright4)
1239*e1eccf28SAndroid Build Coastguard Worker            push        {r12, lr}
1240*e1eccf28SAndroid Build Coastguard Worker            rsb         r12, r11, #0
1241*e1eccf28SAndroid Build Coastguard Worker            ands        r12, r12, #15
1242*e1eccf28SAndroid Build Coastguard Worker            beq         1f
1243*e1eccf28SAndroid Build Coastguard Worker            sub         r1, r1, r12
1244*e1eccf28SAndroid Build Coastguard Worker            bl          fetch_generic_asm
1245*e1eccf28SAndroid Build Coastguard Worker            vmov.u16    d24, d23
1246*e1eccf28SAndroid Build Coastguard Worker            vmov.u16    d25, d23
1247*e1eccf28SAndroid Build Coastguard Worker            vmov.u16    d26, d23
1248*e1eccf28SAndroid Build Coastguard Worker            vmov.u16    d27, d23
1249*e1eccf28SAndroid Build Coastguard Worker            rsb         r12, r11, #0
1250*e1eccf28SAndroid Build Coastguard Worker            and         r12, r12, #15
1251*e1eccf28SAndroid Build Coastguard Worker            sub         sp, sp, #32
1252*e1eccf28SAndroid Build Coastguard Worker            vst1.u16    {q12-q13}, [sp]
1253*e1eccf28SAndroid Build Coastguard Worker            sub         sp, sp, #32
1254*e1eccf28SAndroid Build Coastguard Worker            add         r12, sp, r12, LSL #1
1255*e1eccf28SAndroid Build Coastguard Worker            vst1.u16    {q10,q11}, [sp]
1256*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {q10,q11}, [r12]
1257*e1eccf28SAndroid Build Coastguard Worker            add         sp, sp, #64
1258*e1eccf28SAndroid Build Coastguard Worker            pop         {r12,pc}
1259*e1eccf28SAndroid Build Coastguard Worker1:          bl          fetch_generic_asm
1260*e1eccf28SAndroid Build Coastguard Worker            vmov.u16    d24, d23
1261*e1eccf28SAndroid Build Coastguard Worker            vmov.u16    d25, d23
1262*e1eccf28SAndroid Build Coastguard Worker            vmov.u16    d26, d23
1263*e1eccf28SAndroid Build Coastguard Worker            vmov.u16    d27, d23
1264*e1eccf28SAndroid Build Coastguard Worker            pop         {r12,pc}
1265*e1eccf28SAndroid Build Coastguard WorkerEND(fetch_clampright4)
1266*e1eccf28SAndroid Build Coastguard Worker
1267*e1eccf28SAndroid Build Coastguard Worker/* Given values in q10 and q11, and an index in r11, sweep the (r11 & 15)th
1268*e1eccf28SAndroid Build Coastguard Worker * value across to fill the rest of the register pair.  Used for filling the
1269*e1eccf28SAndroid Build Coastguard Worker * right hand edge of the window when reading too close to the right hand edge
1270*e1eccf28SAndroid Build Coastguard Worker * of the image.
1271*e1eccf28SAndroid Build Coastguard Worker * Also returns a dup-ed copy of the last element in q12 for the tail-fill
1272*e1eccf28SAndroid Build Coastguard Worker * case (this happens incidentally in common path, but must be done
1273*e1eccf28SAndroid Build Coastguard Worker * deliberately in the fast-out path).
1274*e1eccf28SAndroid Build Coastguard Worker */
1275*e1eccf28SAndroid Build Coastguard WorkerPRIVATE(prefill_sweepright1)
1276*e1eccf28SAndroid Build Coastguard Worker            ands        r12, r11, #15
1277*e1eccf28SAndroid Build Coastguard Worker            beq         1f
1278*e1eccf28SAndroid Build Coastguard Worker            sub         r12, r12, #1
1279*e1eccf28SAndroid Build Coastguard Worker            sub         sp, sp, #64
1280*e1eccf28SAndroid Build Coastguard Worker            vst1.u16    {q10,q11}, [sp]
1281*e1eccf28SAndroid Build Coastguard Worker            add         r12, sp, r12, LSL #1
1282*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d24[],d25[]}, [r12]
1283*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d26[],d27[]}, [r12]
1284*e1eccf28SAndroid Build Coastguard Worker            vst1.u16    {q12,q13}, [r12]
1285*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {q10,q11}, [sp]
1286*e1eccf28SAndroid Build Coastguard Worker            add         sp, sp, #64
1287*e1eccf28SAndroid Build Coastguard Worker            bx          lr
1288*e1eccf28SAndroid Build Coastguard Worker1:          vdup.u16    q12, d23[3]
1289*e1eccf28SAndroid Build Coastguard Worker            vdup.u16    q13, d23[3]
1290*e1eccf28SAndroid Build Coastguard Worker            bx          lr
1291*e1eccf28SAndroid Build Coastguard WorkerEND(prefill_sweepright1)
1292*e1eccf28SAndroid Build Coastguard Worker
1293*e1eccf28SAndroid Build Coastguard WorkerPRIVATE(prefill_sweepright4)
1294*e1eccf28SAndroid Build Coastguard Worker            ands        r12, r11, #15
1295*e1eccf28SAndroid Build Coastguard Worker            beq         1f
1296*e1eccf28SAndroid Build Coastguard Worker            sub         r12, r12, #4
1297*e1eccf28SAndroid Build Coastguard Worker            sub         sp, sp, #64
1298*e1eccf28SAndroid Build Coastguard Worker            vst1.u16    {q10,q11}, [sp]
1299*e1eccf28SAndroid Build Coastguard Worker            add         r12, sp, r12, LSL #1
1300*e1eccf28SAndroid Build Coastguard Worker            vld1.u64    {d24}, [r12]
1301*e1eccf28SAndroid Build Coastguard Worker            vld1.u64    {d25}, [r12]
1302*e1eccf28SAndroid Build Coastguard Worker            vld1.u64    {d26}, [r12]
1303*e1eccf28SAndroid Build Coastguard Worker            vld1.u64    {d27}, [r12]
1304*e1eccf28SAndroid Build Coastguard Worker            vst1.u16    {q12,q13}, [r12]
1305*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {q10,q11}, [sp]
1306*e1eccf28SAndroid Build Coastguard Worker            add         sp, sp, #64
1307*e1eccf28SAndroid Build Coastguard Worker            bx          lr
1308*e1eccf28SAndroid Build Coastguard Worker1:          vmov.u16    d24, d23
1309*e1eccf28SAndroid Build Coastguard Worker            vmov.u16    d25, d23
1310*e1eccf28SAndroid Build Coastguard Worker            vmov.u16    d26, d23
1311*e1eccf28SAndroid Build Coastguard Worker            vmov.u16    d27, d23
1312*e1eccf28SAndroid Build Coastguard Worker            bx          lr
1313*e1eccf28SAndroid Build Coastguard WorkerEND(prefill_sweepright4)
1314*e1eccf28SAndroid Build Coastguard Worker
1315*e1eccf28SAndroid Build Coastguard Worker/* The main loop keeps a sliding window of data that has already been convolved
1316*e1eccf28SAndroid Build Coastguard Worker * in the vertical axis for the current line.  This usually stays in the
1317*e1eccf28SAndroid Build Coastguard Worker * register file, but spills to memory for large windows.  The first thing that
1318*e1eccf28SAndroid Build Coastguard Worker * needs to be done at start-up is to fill this window with image data, taking
1319*e1eccf28SAndroid Build Coastguard Worker * into account the padding needed if the left or right edges of the image fall
1320*e1eccf28SAndroid Build Coastguard Worker * within this window.
1321*e1eccf28SAndroid Build Coastguard Worker */
1322*e1eccf28SAndroid Build Coastguard Worker
1323*e1eccf28SAndroid Build Coastguard Worker/* Because the window is in the register file writes to it cannot be indexed
1324*e1eccf28SAndroid Build Coastguard Worker * by another register.  Consequently the fill loops are unrolled to address
1325*e1eccf28SAndroid Build Coastguard Worker * the registers directly.  This macro distinguishes between writes to the
1326*e1eccf28SAndroid Build Coastguard Worker * register file and writes to the spill buffer (indicated by a destination
1327*e1eccf28SAndroid Build Coastguard Worker * register named xx).
1328*e1eccf28SAndroid Build Coastguard Worker */
1329*e1eccf28SAndroid Build Coastguard Worker.macro prefill_out ra, rb, sra, srb, srb_hi
1330*e1eccf28SAndroid Build Coastguard Worker  .ifc \ra,xx
1331*e1eccf28SAndroid Build Coastguard Worker    .ifc \rb,xx
1332*e1eccf28SAndroid Build Coastguard Worker            vst1.u16    {\sra,\srb}, [r9:128]!
1333*e1eccf28SAndroid Build Coastguard Worker    .else
1334*e1eccf28SAndroid Build Coastguard Worker            /* this case is used only for the last tap of uchar1 r=25 */
1335*e1eccf28SAndroid Build Coastguard Worker            /* discard \sra */
1336*e1eccf28SAndroid Build Coastguard Worker            vmov.u16    \rb, \srb_hi
1337*e1eccf28SAndroid Build Coastguard Worker    .endif
1338*e1eccf28SAndroid Build Coastguard Worker  .else
1339*e1eccf28SAndroid Build Coastguard Worker    .ifnc \ra,\sra
1340*e1eccf28SAndroid Build Coastguard Worker            vmov.u16    \ra, \sra
1341*e1eccf28SAndroid Build Coastguard Worker    .endif
1342*e1eccf28SAndroid Build Coastguard Worker    .ifnc \rb,\srb
1343*e1eccf28SAndroid Build Coastguard Worker            vmov.u16    \rb, \srb
1344*e1eccf28SAndroid Build Coastguard Worker    .endif
1345*e1eccf28SAndroid Build Coastguard Worker  .endif
1346*e1eccf28SAndroid Build Coastguard Worker.endm
1347*e1eccf28SAndroid Build Coastguard Worker
1348*e1eccf28SAndroid Build Coastguard Worker/* This macro provides the list of registers representing the window, and the
1349*e1eccf28SAndroid Build Coastguard Worker * cases where the register file is too small and a spill buffer is used
1350*e1eccf28SAndroid Build Coastguard Worker * instead.
1351*e1eccf28SAndroid Build Coastguard Worker * Since several specialisations of each function are generated, this also
1352*e1eccf28SAndroid Build Coastguard Worker * culls superfluous iterations, and sets the variable `i` for subsequent
1353*e1eccf28SAndroid Build Coastguard Worker * macros indicating the current index into the window.
1354*e1eccf28SAndroid Build Coastguard Worker */
1355*e1eccf28SAndroid Build Coastguard Worker.macro prefill_list, macro, nextmacro, max_r, step, label
1356*e1eccf28SAndroid Build Coastguard Worker  .macro ifneeded macro, nextmacro, line, nextline, ra, rb, step, label
1357*e1eccf28SAndroid Build Coastguard Worker    .if windowsize >= (\line * 16)
1358*e1eccf28SAndroid Build Coastguard Worker      .set i, windowsize - (\line * 16)
1359*e1eccf28SAndroid Build Coastguard Worker\label\macro\line:
1360*e1eccf28SAndroid Build Coastguard Worker            prefill_\macro \label\nextmacro\line, \label\nextmacro\nextline, \ra, \rb, \step
1361*e1eccf28SAndroid Build Coastguard Worker    .endif
1362*e1eccf28SAndroid Build Coastguard Worker  .endm
1363*e1eccf28SAndroid Build Coastguard Worker  .if \step > 1
1364*e1eccf28SAndroid Build Coastguard Worker            ifneeded \macro \nextmacro, 13, 12, xx, xx,  \step, \label
1365*e1eccf28SAndroid Build Coastguard Worker            ifneeded \macro \nextmacro, 12, 11, xx, xx,  \step, \label
1366*e1eccf28SAndroid Build Coastguard Worker            ifneeded \macro \nextmacro, 11, 10, xx, xx,  \step, \label
1367*e1eccf28SAndroid Build Coastguard Worker            ifneeded \macro \nextmacro, 10,  9, xx, xx,  \step, \label
1368*e1eccf28SAndroid Build Coastguard Worker            ifneeded \macro \nextmacro,  9,  8, xx, xx,  \step, \label
1369*e1eccf28SAndroid Build Coastguard Worker            ifneeded \macro \nextmacro,  8,  7, xx, xx,  \step, \label
1370*e1eccf28SAndroid Build Coastguard Worker            ifneeded \macro \nextmacro,  7,  6, xx, xx,  \step, \label
1371*e1eccf28SAndroid Build Coastguard Worker            ifneeded \macro \nextmacro,  6,  5, xx, xx,  \step, \label
1372*e1eccf28SAndroid Build Coastguard Worker            ifneeded \macro \nextmacro,  5,  4, xx, xx,  \step, \label
1373*e1eccf28SAndroid Build Coastguard Worker            ifneeded \macro \nextmacro,  4,  3, xx, xx,  \step, \label
1374*e1eccf28SAndroid Build Coastguard Worker  .else
1375*e1eccf28SAndroid Build Coastguard Worker            /* q3 normally contains the coefficient table, but it's not fully
1376*e1eccf28SAndroid Build Coastguard Worker             * used.  In the uchar1, r=25 case the other half of q3 is used for
1377*e1eccf28SAndroid Build Coastguard Worker             * the last two window taps to avoid falling out to memory.
1378*e1eccf28SAndroid Build Coastguard Worker             */
1379*e1eccf28SAndroid Build Coastguard Worker            ifneeded \macro \nextmacro,  4,  3, xx, d7,   \step, \label
1380*e1eccf28SAndroid Build Coastguard Worker  .endif
1381*e1eccf28SAndroid Build Coastguard Worker            ifneeded \macro \nextmacro,  3,  2, q4, q5,   \step, \label
1382*e1eccf28SAndroid Build Coastguard Worker            ifneeded \macro \nextmacro,  2,  1, q6, q7,   \step, \label
1383*e1eccf28SAndroid Build Coastguard Worker            ifneeded \macro \nextmacro,  1,  0, q8, q9,   \step, \label
1384*e1eccf28SAndroid Build Coastguard Worker
1385*e1eccf28SAndroid Build Coastguard Worker\label\macro\()0:
1386*e1eccf28SAndroid Build Coastguard Worker            b           \label\()_end
1387*e1eccf28SAndroid Build Coastguard Worker  .purgem ifneeded
1388*e1eccf28SAndroid Build Coastguard Worker.endm
1389*e1eccf28SAndroid Build Coastguard Worker
1390*e1eccf28SAndroid Build Coastguard Worker/* These macros represent the possible stages of filling the window.
1391*e1eccf28SAndroid Build Coastguard Worker * Each macro is unrolled enough times that it can fill the entire window
1392*e1eccf28SAndroid Build Coastguard Worker * itself, but normally it will have to hand control to subsequent macros
1393*e1eccf28SAndroid Build Coastguard Worker * part-way through and this is done using labels named \next and \after, where
1394*e1eccf28SAndroid Build Coastguard Worker * \next is the next macro starting at the same window position and \after is
1395*e1eccf28SAndroid Build Coastguard Worker * the next macro starting after the current window position.
1396*e1eccf28SAndroid Build Coastguard Worker */
1397*e1eccf28SAndroid Build Coastguard Worker
1398*e1eccf28SAndroid Build Coastguard Worker/* leftfill: v8 and v9 contain the left padding value.  While the window
1399*e1eccf28SAndroid Build Coastguard Worker * extends outside of the image on the left-hand side, and at least 16 more
1400*e1eccf28SAndroid Build Coastguard Worker * padding values are needed in the window, store v8 and v9 into the window.
1401*e1eccf28SAndroid Build Coastguard Worker * Otherwise skip forward to storing image data.
1402*e1eccf28SAndroid Build Coastguard Worker */
1403*e1eccf28SAndroid Build Coastguard Worker.macro prefill_leftfill, next, after, ra, rb, step
1404*e1eccf28SAndroid Build Coastguard Worker            cmp         r10, #i+16
1405*e1eccf28SAndroid Build Coastguard Worker            blo         \next
1406*e1eccf28SAndroid Build Coastguard Worker            prefill_out \ra, \rb, q8, q9, d19
1407*e1eccf28SAndroid Build Coastguard Worker.endm
1408*e1eccf28SAndroid Build Coastguard Worker
1409*e1eccf28SAndroid Build Coastguard Worker/* leftedge: The very first non-fill or partial-fill chunk from the image is
1410*e1eccf28SAndroid Build Coastguard Worker * already loaded (as it was used to calculate the left padding value), so
1411*e1eccf28SAndroid Build Coastguard Worker * store it here, and then drop into the regular load/store cycle in the next
1412*e1eccf28SAndroid Build Coastguard Worker * macro.
1413*e1eccf28SAndroid Build Coastguard Worker */
1414*e1eccf28SAndroid Build Coastguard Worker.macro prefill_leftedge, next, after, ra, rb, step
1415*e1eccf28SAndroid Build Coastguard Worker1:          prefill_out \ra, \rb, q10, q11, d23
1416*e1eccf28SAndroid Build Coastguard Worker            b           \after
1417*e1eccf28SAndroid Build Coastguard Worker.endm
1418*e1eccf28SAndroid Build Coastguard Worker
1419*e1eccf28SAndroid Build Coastguard Worker/* dofetch: Copy chunks of the image into the window without any complications
1420*e1eccf28SAndroid Build Coastguard Worker * from edge conditions.
1421*e1eccf28SAndroid Build Coastguard Worker */
1422*e1eccf28SAndroid Build Coastguard Worker.macro prefill_dofetch, next, after, ra, rb, step
1423*e1eccf28SAndroid Build Coastguard Worker            cmp         r11, #i+16
1424*e1eccf28SAndroid Build Coastguard Worker            bls         \next
1425*e1eccf28SAndroid Build Coastguard Worker            bl          fetch_generic_asm
1426*e1eccf28SAndroid Build Coastguard Worker            prefill_out \ra, \rb, q10, q11, d23
1427*e1eccf28SAndroid Build Coastguard Worker.endm
1428*e1eccf28SAndroid Build Coastguard Worker
1429*e1eccf28SAndroid Build Coastguard Worker/* rightedge: The last fetch (currently in v10 and v11) may have gone beyond
1430*e1eccf28SAndroid Build Coastguard Worker * the right-hand edge of the image.  In that case sweep the last valid pixel
1431*e1eccf28SAndroid Build Coastguard Worker * across the rest of the chunk, and in either case prepare padding data in v12
1432*e1eccf28SAndroid Build Coastguard Worker * and v13 for the next macro.  This is done in fetch_clampright.
1433*e1eccf28SAndroid Build Coastguard Worker * This only happens once before going on to the next macro.
1434*e1eccf28SAndroid Build Coastguard Worker * Sometimes leftedge also covers the rightedge case, in which case this has
1435*e1eccf28SAndroid Build Coastguard Worker * to be skipped altogether.
1436*e1eccf28SAndroid Build Coastguard Worker */
1437*e1eccf28SAndroid Build Coastguard Worker.macro prefill_rightedge, next, after, ra, rb, step
1438*e1eccf28SAndroid Build Coastguard Worker            cmp         r11, #i
1439*e1eccf28SAndroid Build Coastguard Worker            bls         \next
1440*e1eccf28SAndroid Build Coastguard Worker            bl          fetch_clampright\step
1441*e1eccf28SAndroid Build Coastguard Worker            prefill_out \ra, \rb, q10, q11, d23
1442*e1eccf28SAndroid Build Coastguard Worker            b           \after
1443*e1eccf28SAndroid Build Coastguard Worker.endm
1444*e1eccf28SAndroid Build Coastguard Worker
1445*e1eccf28SAndroid Build Coastguard Worker/* rightfill: The rest of the window is simply filled with right padding from
1446*e1eccf28SAndroid Build Coastguard Worker * v12 and v13.
1447*e1eccf28SAndroid Build Coastguard Worker */
1448*e1eccf28SAndroid Build Coastguard Worker.macro prefill_rightfill, next, after, ra, rb, step
1449*e1eccf28SAndroid Build Coastguard Worker            prefill_out \ra, \rb, q12, q13, d25
1450*e1eccf28SAndroid Build Coastguard Worker.endm
1451*e1eccf28SAndroid Build Coastguard Worker
1452*e1eccf28SAndroid Build Coastguard Worker/* Here all of the macros above are unrolled and laid out in the proper order.
1453*e1eccf28SAndroid Build Coastguard Worker */
1454*e1eccf28SAndroid Build Coastguard Worker.macro prefill_body, max_r, step, label
1455*e1eccf28SAndroid Build Coastguard Worker            prefill_list leftfill,  leftedge,   \max_r, \step, \label
1456*e1eccf28SAndroid Build Coastguard Worker            prefill_list leftedge,  dofetch,    \max_r, \step, \label
1457*e1eccf28SAndroid Build Coastguard Worker            prefill_list dofetch,   rightedge,  \max_r, \step, \label
1458*e1eccf28SAndroid Build Coastguard Worker            prefill_list rightedge, rightfill,  \max_r, \step, \label
1459*e1eccf28SAndroid Build Coastguard Worker            prefill_list rightfill, oops,       \max_r, \step, \label
1460*e1eccf28SAndroid Build Coastguard Worker\label\()_end:
1461*e1eccf28SAndroid Build Coastguard Worker.endm
1462*e1eccf28SAndroid Build Coastguard Worker
1463*e1eccf28SAndroid Build Coastguard Worker/* Fill the convolution window with context data.  The aim here is to load
1464*e1eccf28SAndroid Build Coastguard Worker * exactly 2*r columns, and in the main loop to read as many columns as will be
1465*e1eccf28SAndroid Build Coastguard Worker * written.  This is complicated by the window being divided into chunks at
1466*e1eccf28SAndroid Build Coastguard Worker * register boundaries, and the need to handle cases when the input starts very
1467*e1eccf28SAndroid Build Coastguard Worker * close to the left or right (or both) edges of the image and the need to fill
1468*e1eccf28SAndroid Build Coastguard Worker * the spaces that leaves with left and right edge padding values.
1469*e1eccf28SAndroid Build Coastguard Worker *
1470*e1eccf28SAndroid Build Coastguard Worker * Input:
1471*e1eccf28SAndroid Build Coastguard Worker *      r1 -- src
1472*e1eccf28SAndroid Build Coastguard Worker *      r2 -- pitch
1473*e1eccf28SAndroid Build Coastguard Worker *      r3 -- count
1474*e1eccf28SAndroid Build Coastguard Worker *      r4 -- available image data right of src pointer
1475*e1eccf28SAndroid Build Coastguard Worker *      r5 -- r
1476*e1eccf28SAndroid Build Coastguard Worker *      r6 -- rup
1477*e1eccf28SAndroid Build Coastguard Worker *      r7 -- rdn
1478*e1eccf28SAndroid Build Coastguard Worker *      r8 -- available image data left of src pointer
1479*e1eccf28SAndroid Build Coastguard Worker *      r9 -- buffer (if needed)
1480*e1eccf28SAndroid Build Coastguard Worker * Output:
1481*e1eccf28SAndroid Build Coastguard Worker *      r4 -= min(inlen, count + windowsize - centertap)
1482*e1eccf28SAndroid Build Coastguard Worker *      r1 += min(inlen, count + windowsize - centertap)
1483*e1eccf28SAndroid Build Coastguard Worker * Modifies:
1484*e1eccf28SAndroid Build Coastguard Worker *      r10 -- fill start index in the window
1485*e1eccf28SAndroid Build Coastguard Worker *      r11 -- fill stop index in the window
1486*e1eccf28SAndroid Build Coastguard Worker *      r12 -- scratch
1487*e1eccf28SAndroid Build Coastguard Worker */
1488*e1eccf28SAndroid Build Coastguard Worker.macro prefill step=1, max_r=25, label=xx
1489*e1eccf28SAndroid Build Coastguard Worker.set windowsize, (((\max_r + \max_r) * \step + 15) & ~15)
1490*e1eccf28SAndroid Build Coastguard Worker.set centertap, (windowsize - \max_r * \step)
1491*e1eccf28SAndroid Build Coastguard Worker            mov         r10, #centertap
1492*e1eccf28SAndroid Build Coastguard Worker            subs        r10, r10, r8
1493*e1eccf28SAndroid Build Coastguard Worker            movlo       r10, #0
1494*e1eccf28SAndroid Build Coastguard Worker
1495*e1eccf28SAndroid Build Coastguard Worker            subs        r11, r4, #windowsize - centertap
1496*e1eccf28SAndroid Build Coastguard Worker            movhs       r11, #0
1497*e1eccf28SAndroid Build Coastguard Worker            add         r11, r11, #windowsize
1498*e1eccf28SAndroid Build Coastguard Worker
1499*e1eccf28SAndroid Build Coastguard Worker            /* r10 indicates where in the window legal image data begins.
1500*e1eccf28SAndroid Build Coastguard Worker             * r11 indicates where in the window legal image date ends.
1501*e1eccf28SAndroid Build Coastguard Worker             * When starting near the centre of a large image these would be
1502*e1eccf28SAndroid Build Coastguard Worker             * zero and windowsize respectively, but when starting near the
1503*e1eccf28SAndroid Build Coastguard Worker             * edges this can change.
1504*e1eccf28SAndroid Build Coastguard Worker             * When starting on the leftmost pixel, r10 will be centertap.
1505*e1eccf28SAndroid Build Coastguard Worker             * When starting on the rightmost pixel, r11 will be centertap+1.
1506*e1eccf28SAndroid Build Coastguard Worker             */
1507*e1eccf28SAndroid Build Coastguard Worker
1508*e1eccf28SAndroid Build Coastguard Worker            /* r4 indicates how much data there is between the current pointers
1509*e1eccf28SAndroid Build Coastguard Worker             * and the right edge of the image.  The pointers currently point
1510*e1eccf28SAndroid Build Coastguard Worker             * to the data needed at centertap.  The subsequent code will
1511*e1eccf28SAndroid Build Coastguard Worker             * consume (windowsize - r10) data, but only the data from
1512*e1eccf28SAndroid Build Coastguard Worker             * centertap to windowsize comes out of r4's budget.
1513*e1eccf28SAndroid Build Coastguard Worker             */
1514*e1eccf28SAndroid Build Coastguard Worker1:          subs        r4, r4, #windowsize - centertap
1515*e1eccf28SAndroid Build Coastguard Worker            movlo       r4, #0
1516*e1eccf28SAndroid Build Coastguard Worker
1517*e1eccf28SAndroid Build Coastguard Worker            /* And the pointers need to rewind to the start of the window.
1518*e1eccf28SAndroid Build Coastguard Worker             */
1519*e1eccf28SAndroid Build Coastguard Worker            sub         r1, r1, #centertap
1520*e1eccf28SAndroid Build Coastguard Worker
1521*e1eccf28SAndroid Build Coastguard Worker            /* Unless x8 indicated that there wasn't that much data available.
1522*e1eccf28SAndroid Build Coastguard Worker             */
1523*e1eccf28SAndroid Build Coastguard Worker            add         r1, r1, r10
1524*e1eccf28SAndroid Build Coastguard Worker
1525*e1eccf28SAndroid Build Coastguard Worker
1526*e1eccf28SAndroid Build Coastguard Worker            /* Get the first chunk, and add padding to align it to the window
1527*e1eccf28SAndroid Build Coastguard Worker             * if necessary.
1528*e1eccf28SAndroid Build Coastguard Worker             */
1529*e1eccf28SAndroid Build Coastguard Worker            bl          fetch_clampleft\step
1530*e1eccf28SAndroid Build Coastguard Worker
1531*e1eccf28SAndroid Build Coastguard Worker            /* Sometimes the start and the end of the window are in the same
1532*e1eccf28SAndroid Build Coastguard Worker             * chunk.  In that case both ends need filler at the outset.
1533*e1eccf28SAndroid Build Coastguard Worker             */
1534*e1eccf28SAndroid Build Coastguard Worker            sub         r12, r11, #1
1535*e1eccf28SAndroid Build Coastguard Worker            eor         r12,  r10, r12
1536*e1eccf28SAndroid Build Coastguard Worker            cmp         r12, #16
1537*e1eccf28SAndroid Build Coastguard Worker            bllo        prefill_sweepright\step
1538*e1eccf28SAndroid Build Coastguard Worker
1539*e1eccf28SAndroid Build Coastguard Worker            /* Iterate through all the points in the window and fill them in
1540*e1eccf28SAndroid Build Coastguard Worker             * with padding or image data as needed.
1541*e1eccf28SAndroid Build Coastguard Worker             */
1542*e1eccf28SAndroid Build Coastguard Worker            prefill_body \max_r, \step, \label
1543*e1eccf28SAndroid Build Coastguard Worker.endm
1544*e1eccf28SAndroid Build Coastguard Worker
1545*e1eccf28SAndroid Build Coastguard Worker/* The main body of the convolve functions.  Having already pre-filled the
1546*e1eccf28SAndroid Build Coastguard Worker * convolution window with 2*r input values, the logic settles into a regular
1547*e1eccf28SAndroid Build Coastguard Worker * pattern of reading and writing at a 1:1 rate until either input or output
1548*e1eccf28SAndroid Build Coastguard Worker * expires.  The input leads the output by r values, so when processing all the
1549*e1eccf28SAndroid Build Coastguard Worker * way to the right-hand edge, or within r pixels of that edge, the input will
1550*e1eccf28SAndroid Build Coastguard Worker * run out first.  In the case of very narrow images, or sub-windows starting
1551*e1eccf28SAndroid Build Coastguard Worker * near the right edge, the input may already have run out while the
1552*e1eccf28SAndroid Build Coastguard Worker * convolution window was being filled and this loop will start with a
1553*e1eccf28SAndroid Build Coastguard Worker * zero-length input.
1554*e1eccf28SAndroid Build Coastguard Worker *
1555*e1eccf28SAndroid Build Coastguard Worker * Once the input runs out, the rest of the output must be processed by padding
1556*e1eccf28SAndroid Build Coastguard Worker * the remainder of the window with pad value from the last valid pixel from
1557*e1eccf28SAndroid Build Coastguard Worker * the source.
1558*e1eccf28SAndroid Build Coastguard Worker *
1559*e1eccf28SAndroid Build Coastguard Worker * Input:
1560*e1eccf28SAndroid Build Coastguard Worker *      r0 = dst
1561*e1eccf28SAndroid Build Coastguard Worker *      r1 = src
1562*e1eccf28SAndroid Build Coastguard Worker *      r2 = pitch
1563*e1eccf28SAndroid Build Coastguard Worker *      r3 = count
1564*e1eccf28SAndroid Build Coastguard Worker *      r4 = inlen
1565*e1eccf28SAndroid Build Coastguard Worker *      r5 = r
1566*e1eccf28SAndroid Build Coastguard Worker *      r6 = rup
1567*e1eccf28SAndroid Build Coastguard Worker *      r7 = rdn
1568*e1eccf28SAndroid Build Coastguard Worker *      r9 = buffer
1569*e1eccf28SAndroid Build Coastguard Worker * Modifies
1570*e1eccf28SAndroid Build Coastguard Worker *      r8 = fetch code pointer
1571*e1eccf28SAndroid Build Coastguard Worker */
1572*e1eccf28SAndroid Build Coastguard Worker.macro conv_body core, step=1, max_r=25, labelc="", labelnc=""
1573*e1eccf28SAndroid Build Coastguard Worker
1574*e1eccf28SAndroid Build Coastguard Worker            /* If x4 >= x3 then there's no need for clipping.  The main loop
1575*e1eccf28SAndroid Build Coastguard Worker             * needs to exit when either x3 or x4 runs out, so clamp x4 to be
1576*e1eccf28SAndroid Build Coastguard Worker             * no greater than x3 and use x4 for the loop.
1577*e1eccf28SAndroid Build Coastguard Worker             * However, if x4 comes out of the loop with less than 16 bytes
1578*e1eccf28SAndroid Build Coastguard Worker             * left, a partial read would be necessary to avoid reading beyond
1579*e1eccf28SAndroid Build Coastguard Worker             * the end of the image.  To avoid this, clamp x4 to the next
1580*e1eccf28SAndroid Build Coastguard Worker             * multiple of 16, which is still sufficient to force it out of the
1581*e1eccf28SAndroid Build Coastguard Worker             * loop but doesn't imply a rewind.
1582*e1eccf28SAndroid Build Coastguard Worker             */
1583*e1eccf28SAndroid Build Coastguard Worker            add         r12, r3, #15
1584*e1eccf28SAndroid Build Coastguard Worker            bic         r12, r12, #15
1585*e1eccf28SAndroid Build Coastguard Worker            cmp         r4, r12
1586*e1eccf28SAndroid Build Coastguard Worker            movhi       r4, r12
1587*e1eccf28SAndroid Build Coastguard Worker
1588*e1eccf28SAndroid Build Coastguard Worker            /* First calculate the entry-point into the internal fetch logic.
1589*e1eccf28SAndroid Build Coastguard Worker             * This is done so the same function can service several kernel
1590*e1eccf28SAndroid Build Coastguard Worker             * sizes.
1591*e1eccf28SAndroid Build Coastguard Worker             */
1592*e1eccf28SAndroid Build Coastguard Worker            ldr         r8, 3f
1593*e1eccf28SAndroid Build Coastguard Worker1:          add         r8, r8, pc
1594*e1eccf28SAndroid Build Coastguard Worker            sub         r8, r5, LSL #5
1595*e1eccf28SAndroid Build Coastguard Worker            sub         r8, r5, LSL #4
1596*e1eccf28SAndroid Build Coastguard Worker            cmp         r5, r6
1597*e1eccf28SAndroid Build Coastguard Worker            cmpeq       r5, r7
1598*e1eccf28SAndroid Build Coastguard Worker            beq         5f
1599*e1eccf28SAndroid Build Coastguard Worker
1600*e1eccf28SAndroid Build Coastguard Worker            /* if (r != rup || r != rdn) then the address-clamping table should
1601*e1eccf28SAndroid Build Coastguard Worker             * be used rather than the short-cut version.
1602*e1eccf28SAndroid Build Coastguard Worker             */
1603*e1eccf28SAndroid Build Coastguard Worker            ldr         r8, 3f+4
1604*e1eccf28SAndroid Build Coastguard Worker2:          add         r8, r8, pc
1605*e1eccf28SAndroid Build Coastguard Worker            sub         r8, r5, LSL #6
1606*e1eccf28SAndroid Build Coastguard Worker            b           5f
1607*e1eccf28SAndroid Build Coastguard Worker            .align 3
1608*e1eccf28SAndroid Build Coastguard Worker3:          .word       \labelnc-1b-8
1609*e1eccf28SAndroid Build Coastguard Worker            .word       \labelc-2b-8
1610*e1eccf28SAndroid Build Coastguard Worker
1611*e1eccf28SAndroid Build Coastguard Worker            /* Main loop: ... */
1612*e1eccf28SAndroid Build Coastguard Worker            .align 4
1613*e1eccf28SAndroid Build Coastguard Worker3:          /* first perform a vertical convolution from memory to get the next
1614*e1eccf28SAndroid Build Coastguard Worker             * 16 taps of the horizontal window into the register file...
1615*e1eccf28SAndroid Build Coastguard Worker             */
1616*e1eccf28SAndroid Build Coastguard Worker            fetch max_r=\max_r, labelc=\labelc, labelnc=\labelnc, reg=r8
1617*e1eccf28SAndroid Build Coastguard Worker
1618*e1eccf28SAndroid Build Coastguard Worker            /* ...then perform a horizontal convolution on that window to
1619*e1eccf28SAndroid Build Coastguard Worker             * produce eight output bytes, and slide the window along.
1620*e1eccf28SAndroid Build Coastguard Worker             * This has to be done twice to match the 16-way vertical pass.
1621*e1eccf28SAndroid Build Coastguard Worker             * It would be preferable to have twice the work done in \core, but
1622*e1eccf28SAndroid Build Coastguard Worker             * that would demand yet another variant on those macros and would
1623*e1eccf28SAndroid Build Coastguard Worker             * perturb the register allocation severely.
1624*e1eccf28SAndroid Build Coastguard Worker             */
1625*e1eccf28SAndroid Build Coastguard Worker            \core
1626*e1eccf28SAndroid Build Coastguard Worker            vst1.u8     {d31}, [r0]!
1627*e1eccf28SAndroid Build Coastguard Worker            \core
1628*e1eccf28SAndroid Build Coastguard Worker            vst1.u8     {d31}, [r0]!
1629*e1eccf28SAndroid Build Coastguard Worker
1630*e1eccf28SAndroid Build Coastguard Worker            sub         r3, r3, #16
1631*e1eccf28SAndroid Build Coastguard Worker5:          subs        r4, r4, #16
1632*e1eccf28SAndroid Build Coastguard Worker            bhi         3b
1633*e1eccf28SAndroid Build Coastguard Worker            /* Here there's 16 or fewer bytes available before the edge of the
1634*e1eccf28SAndroid Build Coastguard Worker             * source image.  x4 holds that count minus 16 (because it was
1635*e1eccf28SAndroid Build Coastguard Worker             * decremented before the first iteration ran).  The last read may
1636*e1eccf28SAndroid Build Coastguard Worker             * not be a whole chunk, and beyond that a fill value must be used.
1637*e1eccf28SAndroid Build Coastguard Worker             *
1638*e1eccf28SAndroid Build Coastguard Worker             * Of course, none of that matters if there's no more output to
1639*e1eccf28SAndroid Build Coastguard Worker             * produce...
1640*e1eccf28SAndroid Build Coastguard Worker             */
1641*e1eccf28SAndroid Build Coastguard Worker            cmp         r3, #0
1642*e1eccf28SAndroid Build Coastguard Worker            beq         5f
1643*e1eccf28SAndroid Build Coastguard Worker
1644*e1eccf28SAndroid Build Coastguard Worker            /* Oh well. */
1645*e1eccf28SAndroid Build Coastguard Worker            adds        r4, r4, #16
1646*e1eccf28SAndroid Build Coastguard Worker            bne         1f
1647*e1eccf28SAndroid Build Coastguard Worker  .if \step==1
1648*e1eccf28SAndroid Build Coastguard Worker            vdup.u16    q10, d19[3]
1649*e1eccf28SAndroid Build Coastguard Worker            vdup.u16    q11, d19[3]
1650*e1eccf28SAndroid Build Coastguard Worker  .else
1651*e1eccf28SAndroid Build Coastguard Worker            vmov.u64    d20, d19
1652*e1eccf28SAndroid Build Coastguard Worker            vmov.u64    d21, d19
1653*e1eccf28SAndroid Build Coastguard Worker            vmov.u64    d22, d19
1654*e1eccf28SAndroid Build Coastguard Worker            vmov.u64    d23, d19
1655*e1eccf28SAndroid Build Coastguard Worker  .endif
1656*e1eccf28SAndroid Build Coastguard Worker            b           3f
1657*e1eccf28SAndroid Build Coastguard Worker
1658*e1eccf28SAndroid Build Coastguard Worker            /* To avoid reading past end of input, rewind pointers by (16-r4)
1659*e1eccf28SAndroid Build Coastguard Worker             * to ensure that they're exactly 16 bytes from the edge.
1660*e1eccf28SAndroid Build Coastguard Worker             */
1661*e1eccf28SAndroid Build Coastguard Worker1:          mov         r11, r4
1662*e1eccf28SAndroid Build Coastguard Worker            bl          fetch_clampright\step
1663*e1eccf28SAndroid Build Coastguard Worker            /* Now to put this padding to use, perform any remaining
1664*e1eccf28SAndroid Build Coastguard Worker             * iterations.  This is done at half the rate of the main loop,
1665*e1eccf28SAndroid Build Coastguard Worker             * because there's no longer pressure from a 16-lane window filler.
1666*e1eccf28SAndroid Build Coastguard Worker             */
1667*e1eccf28SAndroid Build Coastguard Worker3:          \core
1668*e1eccf28SAndroid Build Coastguard Worker  .if \step==1
1669*e1eccf28SAndroid Build Coastguard Worker            vdup.u16    q11, d23[3]
1670*e1eccf28SAndroid Build Coastguard Worker  .else
1671*e1eccf28SAndroid Build Coastguard Worker            vmov.u64    d22, d23
1672*e1eccf28SAndroid Build Coastguard Worker  .endif
1673*e1eccf28SAndroid Build Coastguard Worker            subs        r3, r3, #8
1674*e1eccf28SAndroid Build Coastguard Worker            blo         4f
1675*e1eccf28SAndroid Build Coastguard Worker            vst1.u8     {d31}, [r0]!
1676*e1eccf28SAndroid Build Coastguard Worker            bne         3b
1677*e1eccf28SAndroid Build Coastguard Worker            b           5f
1678*e1eccf28SAndroid Build Coastguard Worker
1679*e1eccf28SAndroid Build Coastguard Worker            /* If the final iteration contained 0 < l < 8 values, then perform
1680*e1eccf28SAndroid Build Coastguard Worker             * a piecewise store of the final vector.
1681*e1eccf28SAndroid Build Coastguard Worker             */
1682*e1eccf28SAndroid Build Coastguard Worker4:          tst         r3, #4
1683*e1eccf28SAndroid Build Coastguard Worker            beq         1f
1684*e1eccf28SAndroid Build Coastguard Worker            vst1.u32    {d31[0]}, [r0]!
1685*e1eccf28SAndroid Build Coastguard Worker            vext.u8     d31, d31, d31, #4
1686*e1eccf28SAndroid Build Coastguard Worker1:          tst         r3, #2
1687*e1eccf28SAndroid Build Coastguard Worker            beq         1f
1688*e1eccf28SAndroid Build Coastguard Worker            vst1.u16    {d31[0]}, [r0]!
1689*e1eccf28SAndroid Build Coastguard Worker            vext.u8     d31, d31, d31, #2
1690*e1eccf28SAndroid Build Coastguard Worker1:          tst         r3, #1
1691*e1eccf28SAndroid Build Coastguard Worker            beq         5f
1692*e1eccf28SAndroid Build Coastguard Worker            vst1.u8     {d31[0]}, [r0]!
1693*e1eccf28SAndroid Build Coastguard Worker            vext.u8     d31, d31, d31, #1
1694*e1eccf28SAndroid Build Coastguard Worker5:          mov         r0, #0
1695*e1eccf28SAndroid Build Coastguard Worker.endm
1696*e1eccf28SAndroid Build Coastguard Worker
1697*e1eccf28SAndroid Build Coastguard Worker.irp r, TUNED_LIST1, 25
1698*e1eccf28SAndroid Build Coastguard WorkerPRIVATE(convolve1_\r)
1699*e1eccf28SAndroid Build Coastguard Worker            push        {r12,lr}
1700*e1eccf28SAndroid Build Coastguard Worker
1701*e1eccf28SAndroid Build Coastguard Worker            prefill     step=1, max_r=\r, label=.Lcnv1_\r
1702*e1eccf28SAndroid Build Coastguard Worker
1703*e1eccf28SAndroid Build Coastguard Worker            conv_body   core=hconv1_\r, step=1, max_r=\r, labelc=.Lcnv1_\r, labelnc=.Lcnvnc1_\r
1704*e1eccf28SAndroid Build Coastguard Worker
1705*e1eccf28SAndroid Build Coastguard Worker            pop         {r12,pc}
1706*e1eccf28SAndroid Build Coastguard WorkerEND(convolve1_\r)
1707*e1eccf28SAndroid Build Coastguard Worker.endr
1708*e1eccf28SAndroid Build Coastguard Worker
1709*e1eccf28SAndroid Build Coastguard Worker.irp r, TUNED_LIST4, 25
1710*e1eccf28SAndroid Build Coastguard WorkerPRIVATE(convolve4_\r)
1711*e1eccf28SAndroid Build Coastguard Worker            push        {r12,lr}
1712*e1eccf28SAndroid Build Coastguard Worker            sub         r9, sp, #0x200
1713*e1eccf28SAndroid Build Coastguard Worker            sub         sp, sp, #0x200 + 0x400
1714*e1eccf28SAndroid Build Coastguard Worker            bic         r9, r9, #0x3fc
1715*e1eccf28SAndroid Build Coastguard Worker
1716*e1eccf28SAndroid Build Coastguard Worker            /* r9 now points to a 0x200 byte buffer on the stack whose address
1717*e1eccf28SAndroid Build Coastguard Worker             * has the low 10 bits clear.  This allows easy address calculation
1718*e1eccf28SAndroid Build Coastguard Worker             * in the wrap-around cases.
1719*e1eccf28SAndroid Build Coastguard Worker             */
1720*e1eccf28SAndroid Build Coastguard Worker
1721*e1eccf28SAndroid Build Coastguard Worker            prefill     step=4, max_r=\r, label=.Lcnv4_\r
1722*e1eccf28SAndroid Build Coastguard Worker
1723*e1eccf28SAndroid Build Coastguard Worker            conv_body   core=hconv4_\r, step=4, max_r=\r, labelc=.Lcnv4_\r, labelnc=.Lcnvnc4_\r
1724*e1eccf28SAndroid Build Coastguard Worker
1725*e1eccf28SAndroid Build Coastguard Worker            add         sp, sp, #0x200 + 0x400
1726*e1eccf28SAndroid Build Coastguard Worker            pop         {r12,pc}
1727*e1eccf28SAndroid Build Coastguard WorkerEND(convolve4_\r)
1728*e1eccf28SAndroid Build Coastguard Worker.endr
1729*e1eccf28SAndroid Build Coastguard Worker
1730*e1eccf28SAndroid Build Coastguard Worker/* void rsdIntrinsicBlurU1_K(
1731*e1eccf28SAndroid Build Coastguard Worker *                  void *out,      // r0
1732*e1eccf28SAndroid Build Coastguard Worker *                  void *in,       // r1
1733*e1eccf28SAndroid Build Coastguard Worker *                  size_t w,       // r2
1734*e1eccf28SAndroid Build Coastguard Worker *                  size_t h,       // r3
1735*e1eccf28SAndroid Build Coastguard Worker *                  size_t p,       // [sp]
1736*e1eccf28SAndroid Build Coastguard Worker *                  size_t x,       // [sp,#4]
1737*e1eccf28SAndroid Build Coastguard Worker *                  size_t y,       // [sp,#8]
1738*e1eccf28SAndroid Build Coastguard Worker *                  size_t count,   // [sp,#12]
1739*e1eccf28SAndroid Build Coastguard Worker *                  size_t r,       // [sp,#16]
1740*e1eccf28SAndroid Build Coastguard Worker *                  uint16_t *tab); // [sp,#20]
1741*e1eccf28SAndroid Build Coastguard Worker */
1742*e1eccf28SAndroid Build Coastguard WorkerENTRY(rsdIntrinsicBlurU1_K)
1743*e1eccf28SAndroid Build Coastguard Worker            push        {r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1744*e1eccf28SAndroid Build Coastguard Worker            vpush       {d8-d15}
1745*e1eccf28SAndroid Build Coastguard Worker            ldr         r6, [sp,#112]   // y
1746*e1eccf28SAndroid Build Coastguard Worker            ldr         r8, [sp,#108]   // x
1747*e1eccf28SAndroid Build Coastguard Worker            ldr         r5, [sp,#120]   // r
1748*e1eccf28SAndroid Build Coastguard Worker            sub         r4, r2, r8      // inlen = w - x
1749*e1eccf28SAndroid Build Coastguard Worker            sub         r7, r3, r6      // h - y
1750*e1eccf28SAndroid Build Coastguard Worker            ldr         r2, [sp,#104]   // pitch
1751*e1eccf28SAndroid Build Coastguard Worker            ldr         r3, [sp,#116]   // count
1752*e1eccf28SAndroid Build Coastguard Worker            sub         r7, r7, #1      // h - y - 1
1753*e1eccf28SAndroid Build Coastguard Worker
1754*e1eccf28SAndroid Build Coastguard Worker            ldr         r12, [sp,#124]
1755*e1eccf28SAndroid Build Coastguard Worker
1756*e1eccf28SAndroid Build Coastguard Worker            add         r1, r1, r8      // src += x
1757*e1eccf28SAndroid Build Coastguard Worker
1758*e1eccf28SAndroid Build Coastguard Worker            cmp         r6, r5
1759*e1eccf28SAndroid Build Coastguard Worker            movhi       r6, r5          // rup = min(r, y)
1760*e1eccf28SAndroid Build Coastguard Worker            cmp         r7, r5
1761*e1eccf28SAndroid Build Coastguard Worker            movhi       r7, r5          // rdn = min(r, h - y - 1)
1762*e1eccf28SAndroid Build Coastguard Worker
1763*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d0,d1,d2,d3}, [r12]!
1764*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d4,d5,d6}, [r12]!
1765*e1eccf28SAndroid Build Coastguard Worker
1766*e1eccf28SAndroid Build Coastguard Worker            adr         lr, 1f
1767*e1eccf28SAndroid Build Coastguard Worker  .irp r, TUNED_LIST1
1768*e1eccf28SAndroid Build Coastguard Worker            cmp         r5, #\r
1769*e1eccf28SAndroid Build Coastguard Worker            bls         convolve1_\r
1770*e1eccf28SAndroid Build Coastguard Worker  .endr
1771*e1eccf28SAndroid Build Coastguard Worker            b           convolve1_25
1772*e1eccf28SAndroid Build Coastguard Worker
1773*e1eccf28SAndroid Build Coastguard Worker1:          vpop        {d8-d15}
1774*e1eccf28SAndroid Build Coastguard Worker            pop         {r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
1775*e1eccf28SAndroid Build Coastguard WorkerEND(rsdIntrinsicBlurU1_K)
1776*e1eccf28SAndroid Build Coastguard Worker
1777*e1eccf28SAndroid Build Coastguard Worker/* void rsdIntrinsicBlurU4_K(
1778*e1eccf28SAndroid Build Coastguard Worker *                  void *out,      // r0
1779*e1eccf28SAndroid Build Coastguard Worker *                  void *in,       // r1
1780*e1eccf28SAndroid Build Coastguard Worker *                  size_t w,       // r2
1781*e1eccf28SAndroid Build Coastguard Worker *                  size_t h,       // r3
1782*e1eccf28SAndroid Build Coastguard Worker *                  size_t p,       // [sp]
1783*e1eccf28SAndroid Build Coastguard Worker *                  size_t x,       // [sp,#4]
1784*e1eccf28SAndroid Build Coastguard Worker *                  size_t y,       // [sp,#8]
1785*e1eccf28SAndroid Build Coastguard Worker *                  size_t count,   // [sp,#12]
1786*e1eccf28SAndroid Build Coastguard Worker *                  size_t r,       // [sp,#16]
1787*e1eccf28SAndroid Build Coastguard Worker *                  uint16_t *tab); // [sp,#20]
1788*e1eccf28SAndroid Build Coastguard Worker */
1789*e1eccf28SAndroid Build Coastguard WorkerENTRY(rsdIntrinsicBlurU4_K)
1790*e1eccf28SAndroid Build Coastguard Worker            push        {r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1791*e1eccf28SAndroid Build Coastguard Worker            vpush       {d8-d15}
1792*e1eccf28SAndroid Build Coastguard Worker            ldr         r6, [sp,#112]   // y
1793*e1eccf28SAndroid Build Coastguard Worker            ldr         r8, [sp,#108]   // x
1794*e1eccf28SAndroid Build Coastguard Worker            ldr         r5, [sp,#120]   // r
1795*e1eccf28SAndroid Build Coastguard Worker            lsl         r8, r8, #2
1796*e1eccf28SAndroid Build Coastguard Worker            rsb         r4, r8, r2, LSL #2 // inlen = (w - x)
1797*e1eccf28SAndroid Build Coastguard Worker            sub         r7, r3, r6      // h - y
1798*e1eccf28SAndroid Build Coastguard Worker            ldr         r2, [sp,#104]   // pitch
1799*e1eccf28SAndroid Build Coastguard Worker            ldr         r3, [sp,#116]   // count
1800*e1eccf28SAndroid Build Coastguard Worker            sub         r7, r7, #1      // h - y - 1
1801*e1eccf28SAndroid Build Coastguard Worker            lsl         r3, r3, #2      // count
1802*e1eccf28SAndroid Build Coastguard Worker
1803*e1eccf28SAndroid Build Coastguard Worker            ldr         r12, [sp,#124]
1804*e1eccf28SAndroid Build Coastguard Worker
1805*e1eccf28SAndroid Build Coastguard Worker            add         r1, r1, r8      // in += x
1806*e1eccf28SAndroid Build Coastguard Worker
1807*e1eccf28SAndroid Build Coastguard Worker            cmp         r6, r5
1808*e1eccf28SAndroid Build Coastguard Worker            movhi       r6, r5          // rup = min(r, y)
1809*e1eccf28SAndroid Build Coastguard Worker            cmp         r7, r5
1810*e1eccf28SAndroid Build Coastguard Worker            movhi       r7, r5          // rdn = min(r, h - y - 1)
1811*e1eccf28SAndroid Build Coastguard Worker
1812*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d0,d1,d2,d3}, [r12]!
1813*e1eccf28SAndroid Build Coastguard Worker            vld1.u16    {d4,d5,d6}, [r12]!
1814*e1eccf28SAndroid Build Coastguard Worker
1815*e1eccf28SAndroid Build Coastguard Worker            adr         lr, 1f
1816*e1eccf28SAndroid Build Coastguard Worker  .irp r, TUNED_LIST4
1817*e1eccf28SAndroid Build Coastguard Worker            cmp         r5, #\r
1818*e1eccf28SAndroid Build Coastguard Worker            bls         convolve4_\r
1819*e1eccf28SAndroid Build Coastguard Worker  .endr
1820*e1eccf28SAndroid Build Coastguard Worker            b           convolve4_25
1821*e1eccf28SAndroid Build Coastguard Worker
1822*e1eccf28SAndroid Build Coastguard Worker1:          vpop        {d8-d15}
1823*e1eccf28SAndroid Build Coastguard Worker            pop         {r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
1824*e1eccf28SAndroid Build Coastguard WorkerEND(rsdIntrinsicBlurU4_K)
1825