xref: /aosp_15_r20/external/libvpx/vpx_dsp/x86/intrapred_ssse3.asm (revision fb1b10ab9aebc7c7068eedab379b749d7e3900be)
1*fb1b10abSAndroid Build Coastguard Worker;
2*fb1b10abSAndroid Build Coastguard Worker;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3*fb1b10abSAndroid Build Coastguard Worker;
4*fb1b10abSAndroid Build Coastguard Worker;  Use of this source code is governed by a BSD-style license
5*fb1b10abSAndroid Build Coastguard Worker;  that can be found in the LICENSE file in the root of the source
6*fb1b10abSAndroid Build Coastguard Worker;  tree. An additional intellectual property rights grant can be found
7*fb1b10abSAndroid Build Coastguard Worker;  in the file PATENTS.  All contributing project authors may
8*fb1b10abSAndroid Build Coastguard Worker;  be found in the AUTHORS file in the root of the source tree.
9*fb1b10abSAndroid Build Coastguard Worker;
10*fb1b10abSAndroid Build Coastguard Worker
11*fb1b10abSAndroid Build Coastguard Worker%include "third_party/x86inc/x86inc.asm"
12*fb1b10abSAndroid Build Coastguard Worker
13*fb1b10abSAndroid Build Coastguard WorkerSECTION_RODATA
14*fb1b10abSAndroid Build Coastguard Worker
15*fb1b10abSAndroid Build Coastguard Workerpb_1: times 16 db 1
16*fb1b10abSAndroid Build Coastguard Workersh_b12345677: db 1, 2, 3, 4, 5, 6, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0
17*fb1b10abSAndroid Build Coastguard Workersh_b23456777: db 2, 3, 4, 5, 6, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0
18*fb1b10abSAndroid Build Coastguard Workersh_b0123456777777777: db 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7
19*fb1b10abSAndroid Build Coastguard Workersh_b1234567777777777: db 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
20*fb1b10abSAndroid Build Coastguard Workersh_b2345677777777777: db 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
21*fb1b10abSAndroid Build Coastguard Workersh_b123456789abcdeff: db 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15
22*fb1b10abSAndroid Build Coastguard Workersh_b23456789abcdefff: db 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, 15
23*fb1b10abSAndroid Build Coastguard Workersh_b32104567: db 3, 2, 1, 0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0
24*fb1b10abSAndroid Build Coastguard Workersh_b8091a2b345: db 8, 0, 9, 1, 10, 2, 11, 3, 4, 5, 0, 0, 0, 0, 0, 0
25*fb1b10abSAndroid Build Coastguard Workersh_b76543210: db 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0
26*fb1b10abSAndroid Build Coastguard Workersh_b65432108: db 6, 5, 4, 3, 2, 1, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0
27*fb1b10abSAndroid Build Coastguard Workersh_b54321089: db 5, 4, 3, 2, 1, 0, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0
28*fb1b10abSAndroid Build Coastguard Workersh_b89abcdef: db 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
29*fb1b10abSAndroid Build Coastguard Workersh_bfedcba9876543210: db 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
30*fb1b10abSAndroid Build Coastguard Worker
31*fb1b10abSAndroid Build Coastguard WorkerSECTION .text
32*fb1b10abSAndroid Build Coastguard Worker
33*fb1b10abSAndroid Build Coastguard WorkerINIT_XMM ssse3
34*fb1b10abSAndroid Build Coastguard Workercglobal d45_predictor_16x16, 3, 6, 4, dst, stride, above, dst8, line, goffset
35*fb1b10abSAndroid Build Coastguard Worker  GET_GOT     goffsetq
36*fb1b10abSAndroid Build Coastguard Worker
37*fb1b10abSAndroid Build Coastguard Worker  mova                   m0, [aboveq]
38*fb1b10abSAndroid Build Coastguard Worker  DEFINE_ARGS dst, stride, stride3, dst8, line
39*fb1b10abSAndroid Build Coastguard Worker  lea              stride3q, [strideq*3]
40*fb1b10abSAndroid Build Coastguard Worker  lea                 dst8q, [dstq+strideq*8]
41*fb1b10abSAndroid Build Coastguard Worker  mova                   m1, [GLOBAL(sh_b123456789abcdeff)]
42*fb1b10abSAndroid Build Coastguard Worker  pshufb                 m2, m0, [GLOBAL(sh_b23456789abcdefff)]
43*fb1b10abSAndroid Build Coastguard Worker  pavgb                  m3, m2, m0
44*fb1b10abSAndroid Build Coastguard Worker  pxor                   m2, m0
45*fb1b10abSAndroid Build Coastguard Worker  pshufb                 m0, m1
46*fb1b10abSAndroid Build Coastguard Worker  pand                   m2, [GLOBAL(pb_1)]
47*fb1b10abSAndroid Build Coastguard Worker  psubb                  m3, m2
48*fb1b10abSAndroid Build Coastguard Worker  pavgb                  m0, m3
49*fb1b10abSAndroid Build Coastguard Worker
50*fb1b10abSAndroid Build Coastguard Worker  ; first 4 lines and first half of 3rd 4 lines
51*fb1b10abSAndroid Build Coastguard Worker  mov                 lined, 2
52*fb1b10abSAndroid Build Coastguard Worker.loop:
53*fb1b10abSAndroid Build Coastguard Worker  mova   [dstq            ], m0
54*fb1b10abSAndroid Build Coastguard Worker  movhps [dst8q           ], m0
55*fb1b10abSAndroid Build Coastguard Worker  pshufb                 m0, m1
56*fb1b10abSAndroid Build Coastguard Worker  mova   [dstq +strideq   ], m0
57*fb1b10abSAndroid Build Coastguard Worker  movhps [dst8q+strideq   ], m0
58*fb1b10abSAndroid Build Coastguard Worker  pshufb                 m0, m1
59*fb1b10abSAndroid Build Coastguard Worker  mova   [dstq +strideq*2 ], m0
60*fb1b10abSAndroid Build Coastguard Worker  movhps [dst8q+strideq*2 ], m0
61*fb1b10abSAndroid Build Coastguard Worker  pshufb                 m0, m1
62*fb1b10abSAndroid Build Coastguard Worker  mova   [dstq +stride3q  ], m0
63*fb1b10abSAndroid Build Coastguard Worker  movhps [dst8q+stride3q  ], m0
64*fb1b10abSAndroid Build Coastguard Worker  pshufb                 m0, m1
65*fb1b10abSAndroid Build Coastguard Worker  lea                  dstq, [dstq +strideq*4]
66*fb1b10abSAndroid Build Coastguard Worker  lea                 dst8q, [dst8q+strideq*4]
67*fb1b10abSAndroid Build Coastguard Worker  dec                 lined
68*fb1b10abSAndroid Build Coastguard Worker  jnz .loop
69*fb1b10abSAndroid Build Coastguard Worker
70*fb1b10abSAndroid Build Coastguard Worker  ; bottom-right 8x8 block
71*fb1b10abSAndroid Build Coastguard Worker  movhps [dstq          +8], m0
72*fb1b10abSAndroid Build Coastguard Worker  movhps [dstq+strideq  +8], m0
73*fb1b10abSAndroid Build Coastguard Worker  movhps [dstq+strideq*2+8], m0
74*fb1b10abSAndroid Build Coastguard Worker  movhps [dstq+stride3q +8], m0
75*fb1b10abSAndroid Build Coastguard Worker  lea                  dstq, [dstq+strideq*4]
76*fb1b10abSAndroid Build Coastguard Worker  movhps [dstq          +8], m0
77*fb1b10abSAndroid Build Coastguard Worker  movhps [dstq+strideq  +8], m0
78*fb1b10abSAndroid Build Coastguard Worker  movhps [dstq+strideq*2+8], m0
79*fb1b10abSAndroid Build Coastguard Worker  movhps [dstq+stride3q +8], m0
80*fb1b10abSAndroid Build Coastguard Worker
81*fb1b10abSAndroid Build Coastguard Worker  RESTORE_GOT
82*fb1b10abSAndroid Build Coastguard Worker  RET
83*fb1b10abSAndroid Build Coastguard Worker
84*fb1b10abSAndroid Build Coastguard WorkerINIT_XMM ssse3
85*fb1b10abSAndroid Build Coastguard Workercglobal d45_predictor_32x32, 3, 6, 7, dst, stride, above, dst16, line, goffset
86*fb1b10abSAndroid Build Coastguard Worker  GET_GOT     goffsetq
87*fb1b10abSAndroid Build Coastguard Worker
88*fb1b10abSAndroid Build Coastguard Worker  mova                   m0, [aboveq]
89*fb1b10abSAndroid Build Coastguard Worker  mova                   m4, [aboveq+16]
90*fb1b10abSAndroid Build Coastguard Worker  DEFINE_ARGS dst, stride, stride3, dst16, line
91*fb1b10abSAndroid Build Coastguard Worker  lea              stride3q, [strideq*3]
92*fb1b10abSAndroid Build Coastguard Worker  lea                dst16q, [dstq  +strideq*8]
93*fb1b10abSAndroid Build Coastguard Worker  lea                dst16q, [dst16q+strideq*8]
94*fb1b10abSAndroid Build Coastguard Worker  mova                   m1, [GLOBAL(sh_b123456789abcdeff)]
95*fb1b10abSAndroid Build Coastguard Worker  pshufb                 m2, m4, [GLOBAL(sh_b23456789abcdefff)]
96*fb1b10abSAndroid Build Coastguard Worker  pavgb                  m3, m2, m4
97*fb1b10abSAndroid Build Coastguard Worker  pxor                   m2, m4
98*fb1b10abSAndroid Build Coastguard Worker  palignr                m5, m4, m0, 1
99*fb1b10abSAndroid Build Coastguard Worker  palignr                m6, m4, m0, 2
100*fb1b10abSAndroid Build Coastguard Worker  pshufb                 m4, m1
101*fb1b10abSAndroid Build Coastguard Worker  pand                   m2, [GLOBAL(pb_1)]
102*fb1b10abSAndroid Build Coastguard Worker  psubb                  m3, m2
103*fb1b10abSAndroid Build Coastguard Worker  pavgb                  m4, m3
104*fb1b10abSAndroid Build Coastguard Worker  pavgb                  m3, m0, m6
105*fb1b10abSAndroid Build Coastguard Worker  pxor                   m0, m6
106*fb1b10abSAndroid Build Coastguard Worker  pand                   m0, [GLOBAL(pb_1)]
107*fb1b10abSAndroid Build Coastguard Worker  psubb                  m3, m0
108*fb1b10abSAndroid Build Coastguard Worker  pavgb                  m5, m3
109*fb1b10abSAndroid Build Coastguard Worker
110*fb1b10abSAndroid Build Coastguard Worker  ; write 4x4 lines (and the first half of the second 4x4 lines)
111*fb1b10abSAndroid Build Coastguard Worker  mov                  lined, 4
112*fb1b10abSAndroid Build Coastguard Worker.loop:
113*fb1b10abSAndroid Build Coastguard Worker  mova [dstq               ], m5
114*fb1b10abSAndroid Build Coastguard Worker  mova [dstq            +16], m4
115*fb1b10abSAndroid Build Coastguard Worker  mova [dst16q             ], m4
116*fb1b10abSAndroid Build Coastguard Worker  palignr                 m3, m4, m5, 1
117*fb1b10abSAndroid Build Coastguard Worker  pshufb                  m4, m1
118*fb1b10abSAndroid Build Coastguard Worker  mova [dstq  +strideq     ], m3
119*fb1b10abSAndroid Build Coastguard Worker  mova [dstq  +strideq  +16], m4
120*fb1b10abSAndroid Build Coastguard Worker  mova [dst16q+strideq     ], m4
121*fb1b10abSAndroid Build Coastguard Worker  palignr                 m5, m4, m3, 1
122*fb1b10abSAndroid Build Coastguard Worker  pshufb                  m4, m1
123*fb1b10abSAndroid Build Coastguard Worker  mova [dstq  +strideq*2   ], m5
124*fb1b10abSAndroid Build Coastguard Worker  mova [dstq  +strideq*2+16], m4
125*fb1b10abSAndroid Build Coastguard Worker  mova [dst16q+strideq*2   ], m4
126*fb1b10abSAndroid Build Coastguard Worker  palignr                 m3, m4, m5, 1
127*fb1b10abSAndroid Build Coastguard Worker  pshufb                  m4, m1
128*fb1b10abSAndroid Build Coastguard Worker  mova [dstq  +stride3q    ], m3
129*fb1b10abSAndroid Build Coastguard Worker  mova [dstq  +stride3q +16], m4
130*fb1b10abSAndroid Build Coastguard Worker  mova [dst16q+stride3q    ], m4
131*fb1b10abSAndroid Build Coastguard Worker  palignr                 m5, m4, m3, 1
132*fb1b10abSAndroid Build Coastguard Worker  pshufb                  m4, m1
133*fb1b10abSAndroid Build Coastguard Worker  lea                  dstq, [dstq  +strideq*4]
134*fb1b10abSAndroid Build Coastguard Worker  lea                dst16q, [dst16q+strideq*4]
135*fb1b10abSAndroid Build Coastguard Worker  dec                 lined
136*fb1b10abSAndroid Build Coastguard Worker  jnz .loop
137*fb1b10abSAndroid Build Coastguard Worker
138*fb1b10abSAndroid Build Coastguard Worker  ; write second half of second 4x4 lines
139*fb1b10abSAndroid Build Coastguard Worker  mova [dstq            +16], m4
140*fb1b10abSAndroid Build Coastguard Worker  mova [dstq  +strideq  +16], m4
141*fb1b10abSAndroid Build Coastguard Worker  mova [dstq  +strideq*2+16], m4
142*fb1b10abSAndroid Build Coastguard Worker  mova [dstq  +stride3q +16], m4
143*fb1b10abSAndroid Build Coastguard Worker  lea                  dstq, [dstq  +strideq*4]
144*fb1b10abSAndroid Build Coastguard Worker  mova [dstq            +16], m4
145*fb1b10abSAndroid Build Coastguard Worker  mova [dstq  +strideq  +16], m4
146*fb1b10abSAndroid Build Coastguard Worker  mova [dstq  +strideq*2+16], m4
147*fb1b10abSAndroid Build Coastguard Worker  mova [dstq  +stride3q +16], m4
148*fb1b10abSAndroid Build Coastguard Worker  lea                  dstq, [dstq  +strideq*4]
149*fb1b10abSAndroid Build Coastguard Worker  mova [dstq            +16], m4
150*fb1b10abSAndroid Build Coastguard Worker  mova [dstq  +strideq  +16], m4
151*fb1b10abSAndroid Build Coastguard Worker  mova [dstq  +strideq*2+16], m4
152*fb1b10abSAndroid Build Coastguard Worker  mova [dstq  +stride3q +16], m4
153*fb1b10abSAndroid Build Coastguard Worker  lea                  dstq, [dstq  +strideq*4]
154*fb1b10abSAndroid Build Coastguard Worker  mova [dstq            +16], m4
155*fb1b10abSAndroid Build Coastguard Worker  mova [dstq  +strideq  +16], m4
156*fb1b10abSAndroid Build Coastguard Worker  mova [dstq  +strideq*2+16], m4
157*fb1b10abSAndroid Build Coastguard Worker  mova [dstq  +stride3q +16], m4
158*fb1b10abSAndroid Build Coastguard Worker
159*fb1b10abSAndroid Build Coastguard Worker  RESTORE_GOT
160*fb1b10abSAndroid Build Coastguard Worker  RET
161*fb1b10abSAndroid Build Coastguard Worker
162*fb1b10abSAndroid Build Coastguard Worker; ------------------------------------------
163*fb1b10abSAndroid Build Coastguard Worker; input: x, y, z, result
164*fb1b10abSAndroid Build Coastguard Worker;
165*fb1b10abSAndroid Build Coastguard Worker; trick from pascal
166*fb1b10abSAndroid Build Coastguard Worker; (x+2y+z+2)>>2 can be calculated as:
167*fb1b10abSAndroid Build Coastguard Worker; result = avg(x,z)
168*fb1b10abSAndroid Build Coastguard Worker; result -= xor(x,z) & 1
169*fb1b10abSAndroid Build Coastguard Worker; result = avg(result,y)
170*fb1b10abSAndroid Build Coastguard Worker; ------------------------------------------
171*fb1b10abSAndroid Build Coastguard Worker%macro X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 4
172*fb1b10abSAndroid Build Coastguard Worker  pavgb               %4, %1, %3
173*fb1b10abSAndroid Build Coastguard Worker  pxor                %3, %1
174*fb1b10abSAndroid Build Coastguard Worker  pand                %3, [GLOBAL(pb_1)]
175*fb1b10abSAndroid Build Coastguard Worker  psubb               %4, %3
176*fb1b10abSAndroid Build Coastguard Worker  pavgb               %4, %2
177*fb1b10abSAndroid Build Coastguard Worker%endmacro
178*fb1b10abSAndroid Build Coastguard Worker
179*fb1b10abSAndroid Build Coastguard WorkerINIT_XMM ssse3
180*fb1b10abSAndroid Build Coastguard Workercglobal d63_predictor_4x4, 3, 4, 5, dst, stride, above, goffset
181*fb1b10abSAndroid Build Coastguard Worker  GET_GOT     goffsetq
182*fb1b10abSAndroid Build Coastguard Worker
183*fb1b10abSAndroid Build Coastguard Worker  movq                m3, [aboveq]
184*fb1b10abSAndroid Build Coastguard Worker  pshufb              m1, m3, [GLOBAL(sh_b23456777)]
185*fb1b10abSAndroid Build Coastguard Worker  pshufb              m2, m3, [GLOBAL(sh_b12345677)]
186*fb1b10abSAndroid Build Coastguard Worker
187*fb1b10abSAndroid Build Coastguard Worker  X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m3, m2, m1, m4
188*fb1b10abSAndroid Build Coastguard Worker  pavgb               m3, m2
189*fb1b10abSAndroid Build Coastguard Worker
190*fb1b10abSAndroid Build Coastguard Worker  ; store 4 lines
191*fb1b10abSAndroid Build Coastguard Worker  movd    [dstq        ], m3
192*fb1b10abSAndroid Build Coastguard Worker  movd    [dstq+strideq], m4
193*fb1b10abSAndroid Build Coastguard Worker  lea               dstq, [dstq+strideq*2]
194*fb1b10abSAndroid Build Coastguard Worker  psrldq              m3, 1
195*fb1b10abSAndroid Build Coastguard Worker  psrldq              m4, 1
196*fb1b10abSAndroid Build Coastguard Worker  movd    [dstq        ], m3
197*fb1b10abSAndroid Build Coastguard Worker  movd    [dstq+strideq], m4
198*fb1b10abSAndroid Build Coastguard Worker  RESTORE_GOT
199*fb1b10abSAndroid Build Coastguard Worker  RET
200*fb1b10abSAndroid Build Coastguard Worker
201*fb1b10abSAndroid Build Coastguard WorkerINIT_XMM ssse3
202*fb1b10abSAndroid Build Coastguard Workercglobal d63_predictor_8x8, 3, 4, 5, dst, stride, above, goffset
203*fb1b10abSAndroid Build Coastguard Worker  GET_GOT     goffsetq
204*fb1b10abSAndroid Build Coastguard Worker
205*fb1b10abSAndroid Build Coastguard Worker  movq                m3, [aboveq]
206*fb1b10abSAndroid Build Coastguard Worker  DEFINE_ARGS dst, stride, stride3
207*fb1b10abSAndroid Build Coastguard Worker  lea           stride3q, [strideq*3]
208*fb1b10abSAndroid Build Coastguard Worker  pshufb              m1, m3, [GLOBAL(sh_b2345677777777777)]
209*fb1b10abSAndroid Build Coastguard Worker  pshufb              m0, m3, [GLOBAL(sh_b0123456777777777)]
210*fb1b10abSAndroid Build Coastguard Worker  pshufb              m2, m3, [GLOBAL(sh_b1234567777777777)]
211*fb1b10abSAndroid Build Coastguard Worker  pshufb              m3, [GLOBAL(sh_b0123456777777777)]
212*fb1b10abSAndroid Build Coastguard Worker
213*fb1b10abSAndroid Build Coastguard Worker  X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m2, m1, m4
214*fb1b10abSAndroid Build Coastguard Worker  pavgb               m3, m2
215*fb1b10abSAndroid Build Coastguard Worker
216*fb1b10abSAndroid Build Coastguard Worker  ; store 4 lines
217*fb1b10abSAndroid Build Coastguard Worker  movq    [dstq        ], m3
218*fb1b10abSAndroid Build Coastguard Worker  movq    [dstq+strideq], m4
219*fb1b10abSAndroid Build Coastguard Worker  psrldq              m3, 1
220*fb1b10abSAndroid Build Coastguard Worker  psrldq              m4, 1
221*fb1b10abSAndroid Build Coastguard Worker  movq  [dstq+strideq*2], m3
222*fb1b10abSAndroid Build Coastguard Worker  movq  [dstq+stride3q ], m4
223*fb1b10abSAndroid Build Coastguard Worker  lea               dstq, [dstq+strideq*4]
224*fb1b10abSAndroid Build Coastguard Worker  psrldq              m3, 1
225*fb1b10abSAndroid Build Coastguard Worker  psrldq              m4, 1
226*fb1b10abSAndroid Build Coastguard Worker
227*fb1b10abSAndroid Build Coastguard Worker  ; store 4 lines
228*fb1b10abSAndroid Build Coastguard Worker  movq    [dstq        ], m3
229*fb1b10abSAndroid Build Coastguard Worker  movq    [dstq+strideq], m4
230*fb1b10abSAndroid Build Coastguard Worker  psrldq              m3, 1
231*fb1b10abSAndroid Build Coastguard Worker  psrldq              m4, 1
232*fb1b10abSAndroid Build Coastguard Worker  movq  [dstq+strideq*2], m3
233*fb1b10abSAndroid Build Coastguard Worker  movq  [dstq+stride3q ], m4
234*fb1b10abSAndroid Build Coastguard Worker  RESTORE_GOT
235*fb1b10abSAndroid Build Coastguard Worker  RET
236*fb1b10abSAndroid Build Coastguard Worker
237*fb1b10abSAndroid Build Coastguard WorkerINIT_XMM ssse3
238*fb1b10abSAndroid Build Coastguard Workercglobal d63_predictor_16x16, 3, 5, 5, dst, stride, above, line, goffset
239*fb1b10abSAndroid Build Coastguard Worker  GET_GOT     goffsetq
240*fb1b10abSAndroid Build Coastguard Worker
241*fb1b10abSAndroid Build Coastguard Worker  mova                m0, [aboveq]
242*fb1b10abSAndroid Build Coastguard Worker  DEFINE_ARGS dst, stride, stride3, line
243*fb1b10abSAndroid Build Coastguard Worker  lea           stride3q, [strideq*3]
244*fb1b10abSAndroid Build Coastguard Worker  mova                m1, [GLOBAL(sh_b123456789abcdeff)]
245*fb1b10abSAndroid Build Coastguard Worker  pshufb              m2, m0, [GLOBAL(sh_b23456789abcdefff)]
246*fb1b10abSAndroid Build Coastguard Worker  pshufb              m3, m0, m1
247*fb1b10abSAndroid Build Coastguard Worker
248*fb1b10abSAndroid Build Coastguard Worker  X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m3, m2, m4
249*fb1b10abSAndroid Build Coastguard Worker  pavgb               m0, m3
250*fb1b10abSAndroid Build Coastguard Worker
251*fb1b10abSAndroid Build Coastguard Worker  mov              lined, 4
252*fb1b10abSAndroid Build Coastguard Worker.loop:
253*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq          ], m0
254*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq  ], m4
255*fb1b10abSAndroid Build Coastguard Worker  pshufb              m0, m1
256*fb1b10abSAndroid Build Coastguard Worker  pshufb              m4, m1
257*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq*2], m0
258*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+stride3q ], m4
259*fb1b10abSAndroid Build Coastguard Worker  pshufb              m0, m1
260*fb1b10abSAndroid Build Coastguard Worker  pshufb              m4, m1
261*fb1b10abSAndroid Build Coastguard Worker  lea               dstq, [dstq+strideq*4]
262*fb1b10abSAndroid Build Coastguard Worker  dec              lined
263*fb1b10abSAndroid Build Coastguard Worker  jnz .loop
264*fb1b10abSAndroid Build Coastguard Worker  RESTORE_GOT
265*fb1b10abSAndroid Build Coastguard Worker  REP_RET
266*fb1b10abSAndroid Build Coastguard Worker
267*fb1b10abSAndroid Build Coastguard WorkerINIT_XMM ssse3
268*fb1b10abSAndroid Build Coastguard Workercglobal d63_predictor_32x32, 3, 5, 8, dst, stride, above, line, goffset
269*fb1b10abSAndroid Build Coastguard Worker  GET_GOT     goffsetq
270*fb1b10abSAndroid Build Coastguard Worker
271*fb1b10abSAndroid Build Coastguard Worker  mova                   m0, [aboveq]
272*fb1b10abSAndroid Build Coastguard Worker  mova                   m7, [aboveq+16]
273*fb1b10abSAndroid Build Coastguard Worker  DEFINE_ARGS dst, stride, stride3, line
274*fb1b10abSAndroid Build Coastguard Worker  mova                   m1, [GLOBAL(sh_b123456789abcdeff)]
275*fb1b10abSAndroid Build Coastguard Worker  lea              stride3q, [strideq*3]
276*fb1b10abSAndroid Build Coastguard Worker  pshufb                 m2, m7, [GLOBAL(sh_b23456789abcdefff)]
277*fb1b10abSAndroid Build Coastguard Worker  pshufb                 m3, m7, m1
278*fb1b10abSAndroid Build Coastguard Worker
279*fb1b10abSAndroid Build Coastguard Worker  X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m7, m3, m2, m4
280*fb1b10abSAndroid Build Coastguard Worker  palignr                m6, m7, m0, 1
281*fb1b10abSAndroid Build Coastguard Worker  palignr                m5, m7, m0, 2
282*fb1b10abSAndroid Build Coastguard Worker  pavgb                  m7, m3
283*fb1b10abSAndroid Build Coastguard Worker
284*fb1b10abSAndroid Build Coastguard Worker  X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m6, m5, m2
285*fb1b10abSAndroid Build Coastguard Worker  pavgb                  m0, m6
286*fb1b10abSAndroid Build Coastguard Worker
287*fb1b10abSAndroid Build Coastguard Worker  mov                 lined, 8
288*fb1b10abSAndroid Build Coastguard Worker.loop:
289*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq             ], m0
290*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq          +16], m7
291*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq     ], m2
292*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq  +16], m4
293*fb1b10abSAndroid Build Coastguard Worker  palignr                m3, m7, m0, 1
294*fb1b10abSAndroid Build Coastguard Worker  palignr                m5, m4, m2, 1
295*fb1b10abSAndroid Build Coastguard Worker  pshufb                 m7, m1
296*fb1b10abSAndroid Build Coastguard Worker  pshufb                 m4, m1
297*fb1b10abSAndroid Build Coastguard Worker
298*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq*2   ], m3
299*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq*2+16], m7
300*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+stride3q    ], m5
301*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+stride3q +16], m4
302*fb1b10abSAndroid Build Coastguard Worker  palignr                m0, m7, m3, 1
303*fb1b10abSAndroid Build Coastguard Worker  palignr                m2, m4, m5, 1
304*fb1b10abSAndroid Build Coastguard Worker  pshufb                 m7, m1
305*fb1b10abSAndroid Build Coastguard Worker  pshufb                 m4, m1
306*fb1b10abSAndroid Build Coastguard Worker  lea                  dstq, [dstq+strideq*4]
307*fb1b10abSAndroid Build Coastguard Worker  dec                 lined
308*fb1b10abSAndroid Build Coastguard Worker  jnz .loop
309*fb1b10abSAndroid Build Coastguard Worker  RESTORE_GOT
310*fb1b10abSAndroid Build Coastguard Worker  REP_RET
311*fb1b10abSAndroid Build Coastguard Worker
312*fb1b10abSAndroid Build Coastguard WorkerINIT_XMM ssse3
313*fb1b10abSAndroid Build Coastguard Workercglobal d153_predictor_4x4, 4, 5, 4, dst, stride, above, left, goffset
314*fb1b10abSAndroid Build Coastguard Worker  GET_GOT     goffsetq
315*fb1b10abSAndroid Build Coastguard Worker  movd                m0, [leftq]               ; l1, l2, l3, l4
316*fb1b10abSAndroid Build Coastguard Worker  movd                m1, [aboveq-1]            ; tl, t1, t2, t3
317*fb1b10abSAndroid Build Coastguard Worker  punpckldq           m0, m1                    ; l1, l2, l3, l4, tl, t1, t2, t3
318*fb1b10abSAndroid Build Coastguard Worker  pshufb              m0, [GLOBAL(sh_b32104567)]; l4, l3, l2, l1, tl, t1, t2, t3
319*fb1b10abSAndroid Build Coastguard Worker  psrldq              m1, m0, 1                 ; l3, l2, l1, tl, t1, t2, t3
320*fb1b10abSAndroid Build Coastguard Worker  psrldq              m2, m0, 2                 ; l2, l1, tl, t1, t2, t3
321*fb1b10abSAndroid Build Coastguard Worker  ; comments below are for a predictor like this
322*fb1b10abSAndroid Build Coastguard Worker  ; A1 B1 C1 D1
323*fb1b10abSAndroid Build Coastguard Worker  ; A2 B2 A1 B1
324*fb1b10abSAndroid Build Coastguard Worker  ; A3 B3 A2 B2
325*fb1b10abSAndroid Build Coastguard Worker  ; A4 B4 A3 B3
326*fb1b10abSAndroid Build Coastguard Worker  X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m1, m2, m3  ; 3-tap avg B4 B3 B2 B1 C1 D1
327*fb1b10abSAndroid Build Coastguard Worker  pavgb               m1, m0                    ; 2-tap avg A4 A3 A2 A1
328*fb1b10abSAndroid Build Coastguard Worker
329*fb1b10abSAndroid Build Coastguard Worker  punpcklqdq          m3, m1                    ; B4 B3 B2 B1 C1 D1 x x A4 A3 A2 A1 ..
330*fb1b10abSAndroid Build Coastguard Worker
331*fb1b10abSAndroid Build Coastguard Worker  DEFINE_ARGS dst, stride, stride3
332*fb1b10abSAndroid Build Coastguard Worker  lea           stride3q, [strideq*3]
333*fb1b10abSAndroid Build Coastguard Worker  pshufb              m3, [GLOBAL(sh_b8091a2b345)] ; A4 B4 A3 B3 A2 B2 A1 B1 C1 D1 ..
334*fb1b10abSAndroid Build Coastguard Worker  movd  [dstq+stride3q ], m3
335*fb1b10abSAndroid Build Coastguard Worker  psrldq              m3, 2                     ; A3 B3 A2 B2 A1 B1 C1 D1 ..
336*fb1b10abSAndroid Build Coastguard Worker  movd  [dstq+strideq*2], m3
337*fb1b10abSAndroid Build Coastguard Worker  psrldq              m3, 2                     ; A2 B2 A1 B1 C1 D1 ..
338*fb1b10abSAndroid Build Coastguard Worker  movd  [dstq+strideq  ], m3
339*fb1b10abSAndroid Build Coastguard Worker  psrldq              m3, 2                     ; A1 B1 C1 D1 ..
340*fb1b10abSAndroid Build Coastguard Worker  movd  [dstq          ], m3
341*fb1b10abSAndroid Build Coastguard Worker  RESTORE_GOT
342*fb1b10abSAndroid Build Coastguard Worker  RET
343*fb1b10abSAndroid Build Coastguard Worker
344*fb1b10abSAndroid Build Coastguard WorkerINIT_XMM ssse3
345*fb1b10abSAndroid Build Coastguard Workercglobal d153_predictor_8x8, 4, 5, 8, dst, stride, above, left, goffset
346*fb1b10abSAndroid Build Coastguard Worker  GET_GOT     goffsetq
347*fb1b10abSAndroid Build Coastguard Worker  movq                m0, [leftq]                     ; [0- 7] l1-8 [byte]
348*fb1b10abSAndroid Build Coastguard Worker  movhps              m0, [aboveq-1]                  ; [8-15] tl, t1-7 [byte]
349*fb1b10abSAndroid Build Coastguard Worker  pshufb              m1, m0, [GLOBAL(sh_b76543210)]  ; l8-1 [word]
350*fb1b10abSAndroid Build Coastguard Worker  pshufb              m2, m0, [GLOBAL(sh_b65432108)]  ; l7-1,tl [word]
351*fb1b10abSAndroid Build Coastguard Worker  pshufb              m3, m0, [GLOBAL(sh_b54321089)]  ; l6-1,tl,t1 [word]
352*fb1b10abSAndroid Build Coastguard Worker  pshufb              m0, [GLOBAL(sh_b89abcdef)]      ; tl,t1-7 [word]
353*fb1b10abSAndroid Build Coastguard Worker  psrldq              m4, m0, 1                       ; t1-7 [word]
354*fb1b10abSAndroid Build Coastguard Worker  psrldq              m5, m0, 2                       ; t2-7 [word]
355*fb1b10abSAndroid Build Coastguard Worker  ; comments below are for a predictor like this
356*fb1b10abSAndroid Build Coastguard Worker  ; A1 B1 C1 D1 E1 F1 G1 H1
357*fb1b10abSAndroid Build Coastguard Worker  ; A2 B2 A1 B1 C1 D1 E1 F1
358*fb1b10abSAndroid Build Coastguard Worker  ; A3 B3 A2 B2 A1 B1 C1 D1
359*fb1b10abSAndroid Build Coastguard Worker  ; A4 B4 A3 B3 A2 B2 A1 B1
360*fb1b10abSAndroid Build Coastguard Worker  ; A5 B5 A4 B4 A3 B3 A2 B2
361*fb1b10abSAndroid Build Coastguard Worker  ; A6 B6 A5 B5 A4 B4 A3 B3
362*fb1b10abSAndroid Build Coastguard Worker  ; A7 B7 A6 B6 A5 B5 A4 B4
363*fb1b10abSAndroid Build Coastguard Worker  ; A8 B8 A7 B7 A6 B6 A5 B5
364*fb1b10abSAndroid Build Coastguard Worker  pavgb               m6, m1, m2                ; 2-tap avg A8-A1
365*fb1b10abSAndroid Build Coastguard Worker
366*fb1b10abSAndroid Build Coastguard Worker  X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m4, m5, m7  ; 3-tap avg C-H1
367*fb1b10abSAndroid Build Coastguard Worker
368*fb1b10abSAndroid Build Coastguard Worker  X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m1, m2, m3, m0  ; 3-tap avg B8-1
369*fb1b10abSAndroid Build Coastguard Worker
370*fb1b10abSAndroid Build Coastguard Worker  punpcklbw           m6, m0                    ; A-B8, A-B7 ... A-B2, A-B1
371*fb1b10abSAndroid Build Coastguard Worker
372*fb1b10abSAndroid Build Coastguard Worker  DEFINE_ARGS dst, stride, stride3
373*fb1b10abSAndroid Build Coastguard Worker  lea           stride3q, [strideq*3]
374*fb1b10abSAndroid Build Coastguard Worker
375*fb1b10abSAndroid Build Coastguard Worker  movhps [dstq+stride3q], m6                    ; A-B4, A-B3, A-B2, A-B1
376*fb1b10abSAndroid Build Coastguard Worker  palignr             m0, m7, m6, 10            ; A-B3, A-B2, A-B1, C-H1
377*fb1b10abSAndroid Build Coastguard Worker  movq  [dstq+strideq*2], m0
378*fb1b10abSAndroid Build Coastguard Worker  psrldq              m0, 2                     ; A-B2, A-B1, C-H1
379*fb1b10abSAndroid Build Coastguard Worker  movq  [dstq+strideq  ], m0
380*fb1b10abSAndroid Build Coastguard Worker  psrldq              m0, 2                     ; A-H1
381*fb1b10abSAndroid Build Coastguard Worker  movq  [dstq          ], m0
382*fb1b10abSAndroid Build Coastguard Worker  lea               dstq, [dstq+strideq*4]
383*fb1b10abSAndroid Build Coastguard Worker  movq  [dstq+stride3q ], m6                    ; A-B8, A-B7, A-B6, A-B5
384*fb1b10abSAndroid Build Coastguard Worker  psrldq              m6, 2                     ; A-B7, A-B6, A-B5, A-B4
385*fb1b10abSAndroid Build Coastguard Worker  movq  [dstq+strideq*2], m6
386*fb1b10abSAndroid Build Coastguard Worker  psrldq              m6, 2                     ; A-B6, A-B5, A-B4, A-B3
387*fb1b10abSAndroid Build Coastguard Worker  movq  [dstq+strideq  ], m6
388*fb1b10abSAndroid Build Coastguard Worker  psrldq              m6, 2                     ; A-B5, A-B4, A-B3, A-B2
389*fb1b10abSAndroid Build Coastguard Worker  movq  [dstq          ], m6
390*fb1b10abSAndroid Build Coastguard Worker  RESTORE_GOT
391*fb1b10abSAndroid Build Coastguard Worker  RET
392*fb1b10abSAndroid Build Coastguard Worker
393*fb1b10abSAndroid Build Coastguard WorkerINIT_XMM ssse3
394*fb1b10abSAndroid Build Coastguard Workercglobal d153_predictor_16x16, 4, 5, 8, dst, stride, above, left, goffset
395*fb1b10abSAndroid Build Coastguard Worker  GET_GOT     goffsetq
396*fb1b10abSAndroid Build Coastguard Worker  mova                m0, [leftq]
397*fb1b10abSAndroid Build Coastguard Worker  movu                m7, [aboveq-1]
398*fb1b10abSAndroid Build Coastguard Worker  ; comments below are for a predictor like this
399*fb1b10abSAndroid Build Coastguard Worker  ; A1 B1 C1 D1 E1 F1 G1 H1 I1 J1 K1 L1 M1 N1 O1 P1
400*fb1b10abSAndroid Build Coastguard Worker  ; A2 B2 A1 B1 C1 D1 E1 F1 G1 H1 I1 J1 K1 L1 M1 N1
401*fb1b10abSAndroid Build Coastguard Worker  ; A3 B3 A2 B2 A1 B1 C1 D1 E1 F1 G1 H1 I1 J1 K1 L1
402*fb1b10abSAndroid Build Coastguard Worker  ; A4 B4 A3 B3 A2 B2 A1 B1 C1 D1 E1 F1 G1 H1 I1 J1
403*fb1b10abSAndroid Build Coastguard Worker  ; A5 B5 A4 B4 A3 B3 A2 B2 A1 B1 C1 D1 E1 F1 G1 H1
404*fb1b10abSAndroid Build Coastguard Worker  ; A6 B6 A5 B5 A4 B4 A3 B3 A2 B2 A1 B1 C1 D1 E1 F1
405*fb1b10abSAndroid Build Coastguard Worker  ; A7 B7 A6 B6 A5 B5 A4 B4 A3 B3 A2 B2 A1 B1 C1 D1
406*fb1b10abSAndroid Build Coastguard Worker  ; A8 B8 A7 B7 A6 B6 A5 B5 A4 B4 A3 B3 A2 B2 A1 B1
407*fb1b10abSAndroid Build Coastguard Worker  ; A9 B9 A8 B8 A7 B7 A6 B6 A5 B5 A4 B4 A3 B3 A2 B2
408*fb1b10abSAndroid Build Coastguard Worker  ; Aa Ba A9 B9 A8 B8 A7 B7 A6 B6 A5 B5 A4 B4 A3 B3
409*fb1b10abSAndroid Build Coastguard Worker  ; Ab Bb Aa Ba A9 B9 A8 B8 A7 B7 A6 B6 A5 B5 A4 B4
410*fb1b10abSAndroid Build Coastguard Worker  ; Ac Bc Ab Bb Aa Ba A9 B9 A8 B8 A7 B7 A6 B6 A5 B5
411*fb1b10abSAndroid Build Coastguard Worker  ; Ad Bd Ac Bc Ab Bb Aa Ba A9 B9 A8 B8 A7 B7 A6 B6
412*fb1b10abSAndroid Build Coastguard Worker  ; Ae Be Ad Bd Ac Bc Ab Bb Aa Ba A9 B9 A8 B8 A7 B7
413*fb1b10abSAndroid Build Coastguard Worker  ; Af Bf Ae Be Ad Bd Ac Bc Ab Bb Aa Ba A9 B9 A8 B8
414*fb1b10abSAndroid Build Coastguard Worker  ; Ag Bg Af Bf Ae Be Ad Bd Ac Bc Ab Bb Aa Ba A9 B9
415*fb1b10abSAndroid Build Coastguard Worker  pshufb              m6, m7, [GLOBAL(sh_bfedcba9876543210)]
416*fb1b10abSAndroid Build Coastguard Worker  palignr             m5, m0, m6, 15
417*fb1b10abSAndroid Build Coastguard Worker  palignr             m3, m0, m6, 14
418*fb1b10abSAndroid Build Coastguard Worker
419*fb1b10abSAndroid Build Coastguard Worker  X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m5, m3, m4          ; 3-tap avg B3-Bg
420*fb1b10abSAndroid Build Coastguard Worker  pshufb              m1, m0, [GLOBAL(sh_b123456789abcdeff)]
421*fb1b10abSAndroid Build Coastguard Worker  pavgb               m5, m0                            ; A1 - Ag
422*fb1b10abSAndroid Build Coastguard Worker
423*fb1b10abSAndroid Build Coastguard Worker  punpcklbw           m0, m4, m5                        ; A-B8 ... A-B1
424*fb1b10abSAndroid Build Coastguard Worker  punpckhbw           m4, m5                            ; A-B9 ... A-Bg
425*fb1b10abSAndroid Build Coastguard Worker
426*fb1b10abSAndroid Build Coastguard Worker  pshufb              m3, m7, [GLOBAL(sh_b123456789abcdeff)]
427*fb1b10abSAndroid Build Coastguard Worker  pshufb              m5, m7, [GLOBAL(sh_b23456789abcdefff)]
428*fb1b10abSAndroid Build Coastguard Worker
429*fb1b10abSAndroid Build Coastguard Worker  X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m7, m3, m5, m1          ; 3-tap avg C1-P1
430*fb1b10abSAndroid Build Coastguard Worker
431*fb1b10abSAndroid Build Coastguard Worker  pshufb              m6, m0, [GLOBAL(sh_bfedcba9876543210)]
432*fb1b10abSAndroid Build Coastguard Worker  DEFINE_ARGS dst, stride, stride3
433*fb1b10abSAndroid Build Coastguard Worker  lea           stride3q, [strideq*3]
434*fb1b10abSAndroid Build Coastguard Worker  palignr             m2, m1, m6, 14
435*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq          ], m2
436*fb1b10abSAndroid Build Coastguard Worker  palignr             m2, m1, m6, 12
437*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq  ], m2
438*fb1b10abSAndroid Build Coastguard Worker  palignr             m2, m1, m6, 10
439*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq*2], m2
440*fb1b10abSAndroid Build Coastguard Worker  palignr             m2, m1, m6, 8
441*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+stride3q ], m2
442*fb1b10abSAndroid Build Coastguard Worker  lea               dstq, [dstq+strideq*4]
443*fb1b10abSAndroid Build Coastguard Worker  palignr             m2, m1, m6, 6
444*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq          ], m2
445*fb1b10abSAndroid Build Coastguard Worker  palignr             m2, m1, m6, 4
446*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq  ], m2
447*fb1b10abSAndroid Build Coastguard Worker  palignr             m2, m1, m6, 2
448*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq*2], m2
449*fb1b10abSAndroid Build Coastguard Worker  pshufb              m4, [GLOBAL(sh_bfedcba9876543210)]
450*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+stride3q ], m6
451*fb1b10abSAndroid Build Coastguard Worker  lea               dstq, [dstq+strideq*4]
452*fb1b10abSAndroid Build Coastguard Worker
453*fb1b10abSAndroid Build Coastguard Worker  palignr             m2, m6, m4, 14
454*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq          ], m2
455*fb1b10abSAndroid Build Coastguard Worker  palignr             m2, m6, m4, 12
456*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq  ], m2
457*fb1b10abSAndroid Build Coastguard Worker  palignr             m2, m6, m4, 10
458*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq*2], m2
459*fb1b10abSAndroid Build Coastguard Worker  palignr             m2, m6, m4, 8
460*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+stride3q ], m2
461*fb1b10abSAndroid Build Coastguard Worker  lea               dstq, [dstq+strideq*4]
462*fb1b10abSAndroid Build Coastguard Worker  palignr             m2, m6, m4, 6
463*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq          ], m2
464*fb1b10abSAndroid Build Coastguard Worker  palignr             m2, m6, m4, 4
465*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq  ], m2
466*fb1b10abSAndroid Build Coastguard Worker  palignr             m2, m6, m4, 2
467*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq*2], m2
468*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+stride3q ], m4
469*fb1b10abSAndroid Build Coastguard Worker  RESTORE_GOT
470*fb1b10abSAndroid Build Coastguard Worker  RET
471*fb1b10abSAndroid Build Coastguard Worker
472*fb1b10abSAndroid Build Coastguard WorkerINIT_XMM ssse3
473*fb1b10abSAndroid Build Coastguard Workercglobal d153_predictor_32x32, 4, 5, 8, dst, stride, above, left, goffset
474*fb1b10abSAndroid Build Coastguard Worker  GET_GOT     goffsetq
475*fb1b10abSAndroid Build Coastguard Worker  mova                  m0, [leftq]
476*fb1b10abSAndroid Build Coastguard Worker  movu                  m7, [aboveq-1]
477*fb1b10abSAndroid Build Coastguard Worker  movu                  m1, [aboveq+15]
478*fb1b10abSAndroid Build Coastguard Worker
479*fb1b10abSAndroid Build Coastguard Worker  pshufb                m4, m1, [GLOBAL(sh_b123456789abcdeff)]
480*fb1b10abSAndroid Build Coastguard Worker  pshufb                m6, m1, [GLOBAL(sh_b23456789abcdefff)]
481*fb1b10abSAndroid Build Coastguard Worker
482*fb1b10abSAndroid Build Coastguard Worker  X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m1, m4, m6, m2          ; 3-tap avg above [high]
483*fb1b10abSAndroid Build Coastguard Worker
484*fb1b10abSAndroid Build Coastguard Worker  palignr               m3, m1, m7, 1
485*fb1b10abSAndroid Build Coastguard Worker  palignr               m5, m1, m7, 2
486*fb1b10abSAndroid Build Coastguard Worker
487*fb1b10abSAndroid Build Coastguard Worker  X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m7, m3, m5, m1          ; 3-tap avg above [low]
488*fb1b10abSAndroid Build Coastguard Worker
489*fb1b10abSAndroid Build Coastguard Worker  pshufb                m7, [GLOBAL(sh_bfedcba9876543210)]
490*fb1b10abSAndroid Build Coastguard Worker  palignr               m5, m0, m7, 15
491*fb1b10abSAndroid Build Coastguard Worker  palignr               m3, m0, m7, 14
492*fb1b10abSAndroid Build Coastguard Worker
493*fb1b10abSAndroid Build Coastguard Worker  X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m5, m3, m4          ; 3-tap avg B3-Bg
494*fb1b10abSAndroid Build Coastguard Worker  pavgb                 m5, m0                            ; A1 - Ag
495*fb1b10abSAndroid Build Coastguard Worker  punpcklbw             m6, m4, m5                        ; A-B8 ... A-B1
496*fb1b10abSAndroid Build Coastguard Worker  punpckhbw             m4, m5                            ; A-B9 ... A-Bg
497*fb1b10abSAndroid Build Coastguard Worker  pshufb                m6, [GLOBAL(sh_bfedcba9876543210)]
498*fb1b10abSAndroid Build Coastguard Worker  pshufb                m4, [GLOBAL(sh_bfedcba9876543210)]
499*fb1b10abSAndroid Build Coastguard Worker
500*fb1b10abSAndroid Build Coastguard Worker  DEFINE_ARGS dst, stride, stride3, left, line
501*fb1b10abSAndroid Build Coastguard Worker  lea             stride3q, [strideq*3]
502*fb1b10abSAndroid Build Coastguard Worker
503*fb1b10abSAndroid Build Coastguard Worker  palignr               m5, m2, m1, 14
504*fb1b10abSAndroid Build Coastguard Worker  palignr               m7, m1, m6, 14
505*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq            ], m7
506*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+16         ], m5
507*fb1b10abSAndroid Build Coastguard Worker  palignr               m5, m2, m1, 12
508*fb1b10abSAndroid Build Coastguard Worker  palignr               m7, m1, m6, 12
509*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq    ], m7
510*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq+16 ], m5
511*fb1b10abSAndroid Build Coastguard Worker  palignr                m5, m2, m1, 10
512*fb1b10abSAndroid Build Coastguard Worker  palignr                m7, m1, m6, 10
513*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq*2   ], m7
514*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq*2+16], m5
515*fb1b10abSAndroid Build Coastguard Worker  palignr                m5, m2, m1, 8
516*fb1b10abSAndroid Build Coastguard Worker  palignr                m7, m1, m6, 8
517*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+stride3q    ], m7
518*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+stride3q+16 ], m5
519*fb1b10abSAndroid Build Coastguard Worker  lea                  dstq, [dstq+strideq*4]
520*fb1b10abSAndroid Build Coastguard Worker  palignr                m5, m2, m1, 6
521*fb1b10abSAndroid Build Coastguard Worker  palignr                m7, m1, m6, 6
522*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq             ], m7
523*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+16          ], m5
524*fb1b10abSAndroid Build Coastguard Worker  palignr                m5, m2, m1, 4
525*fb1b10abSAndroid Build Coastguard Worker  palignr                m7, m1, m6, 4
526*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq     ], m7
527*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq+16  ], m5
528*fb1b10abSAndroid Build Coastguard Worker  palignr                m5, m2, m1, 2
529*fb1b10abSAndroid Build Coastguard Worker  palignr                m7, m1, m6, 2
530*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq*2   ], m7
531*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq*2+16], m5
532*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+stride3q    ], m6
533*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+stride3q+16 ], m1
534*fb1b10abSAndroid Build Coastguard Worker  lea                  dstq, [dstq+strideq*4]
535*fb1b10abSAndroid Build Coastguard Worker
536*fb1b10abSAndroid Build Coastguard Worker  palignr                m5, m1, m6, 14
537*fb1b10abSAndroid Build Coastguard Worker  palignr                m3, m6, m4, 14
538*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq             ], m3
539*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+16          ], m5
540*fb1b10abSAndroid Build Coastguard Worker  palignr                m5, m1, m6, 12
541*fb1b10abSAndroid Build Coastguard Worker  palignr                m3, m6, m4, 12
542*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq     ], m3
543*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq+16  ], m5
544*fb1b10abSAndroid Build Coastguard Worker  palignr                m5, m1, m6, 10
545*fb1b10abSAndroid Build Coastguard Worker  palignr                m3, m6, m4, 10
546*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq*2   ], m3
547*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq*2+16], m5
548*fb1b10abSAndroid Build Coastguard Worker  palignr                m5, m1, m6, 8
549*fb1b10abSAndroid Build Coastguard Worker  palignr                m3, m6, m4, 8
550*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+stride3q    ], m3
551*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+stride3q+16 ], m5
552*fb1b10abSAndroid Build Coastguard Worker  lea                  dstq, [dstq+strideq*4]
553*fb1b10abSAndroid Build Coastguard Worker  palignr                m5, m1, m6, 6
554*fb1b10abSAndroid Build Coastguard Worker  palignr                m3, m6, m4, 6
555*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq             ], m3
556*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+16          ], m5
557*fb1b10abSAndroid Build Coastguard Worker  palignr                m5, m1, m6, 4
558*fb1b10abSAndroid Build Coastguard Worker  palignr                m3, m6, m4, 4
559*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq     ], m3
560*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq+16  ], m5
561*fb1b10abSAndroid Build Coastguard Worker  palignr                m5, m1, m6, 2
562*fb1b10abSAndroid Build Coastguard Worker  palignr                m3, m6, m4, 2
563*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq*2   ], m3
564*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq*2+16], m5
565*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+stride3q    ], m4
566*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+stride3q+16 ], m6
567*fb1b10abSAndroid Build Coastguard Worker  lea               dstq, [dstq+strideq*4]
568*fb1b10abSAndroid Build Coastguard Worker
569*fb1b10abSAndroid Build Coastguard Worker  mova                   m7, [leftq]
570*fb1b10abSAndroid Build Coastguard Worker  mova                   m3, [leftq+16]
571*fb1b10abSAndroid Build Coastguard Worker  palignr                m5, m3, m7, 15
572*fb1b10abSAndroid Build Coastguard Worker  palignr                m0, m3, m7, 14
573*fb1b10abSAndroid Build Coastguard Worker
574*fb1b10abSAndroid Build Coastguard Worker  X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m3, m5, m0, m2          ; 3-tap avg Bh -
575*fb1b10abSAndroid Build Coastguard Worker  pavgb                  m5, m3                            ; Ah -
576*fb1b10abSAndroid Build Coastguard Worker  punpcklbw              m3, m2, m5                        ; A-B8 ... A-B1
577*fb1b10abSAndroid Build Coastguard Worker  punpckhbw              m2, m5                            ; A-B9 ... A-Bg
578*fb1b10abSAndroid Build Coastguard Worker  pshufb                 m3, [GLOBAL(sh_bfedcba9876543210)]
579*fb1b10abSAndroid Build Coastguard Worker  pshufb                 m2, [GLOBAL(sh_bfedcba9876543210)]
580*fb1b10abSAndroid Build Coastguard Worker
581*fb1b10abSAndroid Build Coastguard Worker  palignr                m7, m6, m4, 14
582*fb1b10abSAndroid Build Coastguard Worker  palignr                m0, m4, m3, 14
583*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq             ], m0
584*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+16          ], m7
585*fb1b10abSAndroid Build Coastguard Worker  palignr                m7, m6, m4, 12
586*fb1b10abSAndroid Build Coastguard Worker  palignr                m0, m4, m3, 12
587*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq     ], m0
588*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq+16  ], m7
589*fb1b10abSAndroid Build Coastguard Worker  palignr                m7, m6, m4, 10
590*fb1b10abSAndroid Build Coastguard Worker  palignr                m0, m4, m3, 10
591*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq*2   ], m0
592*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq*2+16], m7
593*fb1b10abSAndroid Build Coastguard Worker  palignr                m7, m6, m4, 8
594*fb1b10abSAndroid Build Coastguard Worker  palignr                m0, m4, m3, 8
595*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+stride3q    ], m0
596*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+stride3q+16 ], m7
597*fb1b10abSAndroid Build Coastguard Worker  lea                  dstq, [dstq+strideq*4]
598*fb1b10abSAndroid Build Coastguard Worker  palignr                m7, m6, m4, 6
599*fb1b10abSAndroid Build Coastguard Worker  palignr                m0, m4, m3, 6
600*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq             ], m0
601*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+16          ], m7
602*fb1b10abSAndroid Build Coastguard Worker  palignr                m7, m6, m4, 4
603*fb1b10abSAndroid Build Coastguard Worker  palignr                m0, m4, m3, 4
604*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq     ], m0
605*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq+16  ], m7
606*fb1b10abSAndroid Build Coastguard Worker  palignr                m7, m6, m4, 2
607*fb1b10abSAndroid Build Coastguard Worker  palignr                m0, m4, m3, 2
608*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq*2   ], m0
609*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq*2+16], m7
610*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+stride3q    ], m3
611*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+stride3q+16 ], m4
612*fb1b10abSAndroid Build Coastguard Worker  lea                  dstq, [dstq+strideq*4]
613*fb1b10abSAndroid Build Coastguard Worker
614*fb1b10abSAndroid Build Coastguard Worker  palignr                m7, m4, m3, 14
615*fb1b10abSAndroid Build Coastguard Worker  palignr                m0, m3, m2, 14
616*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq             ], m0
617*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+16          ], m7
618*fb1b10abSAndroid Build Coastguard Worker  palignr                m7, m4, m3, 12
619*fb1b10abSAndroid Build Coastguard Worker  palignr                m0, m3, m2, 12
620*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq     ], m0
621*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq+16  ], m7
622*fb1b10abSAndroid Build Coastguard Worker  palignr                m7, m4, m3, 10
623*fb1b10abSAndroid Build Coastguard Worker  palignr                m0, m3, m2, 10
624*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq*2   ], m0
625*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq*2+16], m7
626*fb1b10abSAndroid Build Coastguard Worker  palignr                m7, m4, m3, 8
627*fb1b10abSAndroid Build Coastguard Worker  palignr                m0, m3, m2, 8
628*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+stride3q    ], m0
629*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+stride3q+16 ], m7
630*fb1b10abSAndroid Build Coastguard Worker  lea                  dstq, [dstq+strideq*4]
631*fb1b10abSAndroid Build Coastguard Worker  palignr                m7, m4, m3, 6
632*fb1b10abSAndroid Build Coastguard Worker  palignr                m0, m3, m2, 6
633*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq             ], m0
634*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+16          ], m7
635*fb1b10abSAndroid Build Coastguard Worker  palignr                m7, m4, m3, 4
636*fb1b10abSAndroid Build Coastguard Worker  palignr                m0, m3, m2, 4
637*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq     ], m0
638*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq+16  ], m7
639*fb1b10abSAndroid Build Coastguard Worker  palignr                m7, m4, m3, 2
640*fb1b10abSAndroid Build Coastguard Worker  palignr                m0, m3, m2, 2
641*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq*2   ], m0
642*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq*2+16], m7
643*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+stride3q    ], m2
644*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+stride3q+16 ], m3
645*fb1b10abSAndroid Build Coastguard Worker
646*fb1b10abSAndroid Build Coastguard Worker  RESTORE_GOT
647*fb1b10abSAndroid Build Coastguard Worker  RET
648*fb1b10abSAndroid Build Coastguard Worker
649*fb1b10abSAndroid Build Coastguard WorkerINIT_XMM ssse3
650*fb1b10abSAndroid Build Coastguard Workercglobal d207_predictor_8x8, 4, 5, 4, dst, stride, stride3, left, goffset
651*fb1b10abSAndroid Build Coastguard Worker  GET_GOT     goffsetq
652*fb1b10abSAndroid Build Coastguard Worker  movq                m3, [leftq]            ; abcdefgh [byte]
653*fb1b10abSAndroid Build Coastguard Worker  lea           stride3q, [strideq*3]
654*fb1b10abSAndroid Build Coastguard Worker
655*fb1b10abSAndroid Build Coastguard Worker  pshufb              m1, m3, [GLOBAL(sh_b2345677777777777)]
656*fb1b10abSAndroid Build Coastguard Worker  pshufb              m0, m3, [GLOBAL(sh_b0123456777777777)]
657*fb1b10abSAndroid Build Coastguard Worker  pshufb              m2, m3, [GLOBAL(sh_b1234567777777777)]
658*fb1b10abSAndroid Build Coastguard Worker
659*fb1b10abSAndroid Build Coastguard Worker  X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m2, m1, m3
660*fb1b10abSAndroid Build Coastguard Worker  pavgb               m0, m2
661*fb1b10abSAndroid Build Coastguard Worker  punpcklbw           m0, m3        ; interleaved output
662*fb1b10abSAndroid Build Coastguard Worker
663*fb1b10abSAndroid Build Coastguard Worker  movq  [dstq          ], m0
664*fb1b10abSAndroid Build Coastguard Worker  psrldq              m0, 2
665*fb1b10abSAndroid Build Coastguard Worker  movq  [dstq+strideq  ], m0
666*fb1b10abSAndroid Build Coastguard Worker  psrldq              m0, 2
667*fb1b10abSAndroid Build Coastguard Worker  movq  [dstq+strideq*2], m0
668*fb1b10abSAndroid Build Coastguard Worker  psrldq              m0, 2
669*fb1b10abSAndroid Build Coastguard Worker  movq  [dstq+stride3q ], m0
670*fb1b10abSAndroid Build Coastguard Worker  lea               dstq, [dstq+strideq*4]
671*fb1b10abSAndroid Build Coastguard Worker  pshufhw             m0, m0, q0000 ; de, d2ef, ef, e2fg, fg, f2gh, gh, g3h, 8xh
672*fb1b10abSAndroid Build Coastguard Worker  psrldq              m0, 2
673*fb1b10abSAndroid Build Coastguard Worker  movq  [dstq          ], m0
674*fb1b10abSAndroid Build Coastguard Worker  psrldq              m0, 2
675*fb1b10abSAndroid Build Coastguard Worker  movq  [dstq+strideq  ], m0
676*fb1b10abSAndroid Build Coastguard Worker  psrldq              m0, 2
677*fb1b10abSAndroid Build Coastguard Worker  movq  [dstq+strideq*2], m0
678*fb1b10abSAndroid Build Coastguard Worker  psrldq              m0, 2
679*fb1b10abSAndroid Build Coastguard Worker  movq  [dstq+stride3q ], m0
680*fb1b10abSAndroid Build Coastguard Worker  RESTORE_GOT
681*fb1b10abSAndroid Build Coastguard Worker  RET
682*fb1b10abSAndroid Build Coastguard Worker
683*fb1b10abSAndroid Build Coastguard WorkerINIT_XMM ssse3
684*fb1b10abSAndroid Build Coastguard Workercglobal d207_predictor_16x16, 4, 5, 5, dst, stride, stride3, left, goffset
685*fb1b10abSAndroid Build Coastguard Worker  GET_GOT     goffsetq
686*fb1b10abSAndroid Build Coastguard Worker  lea           stride3q, [strideq*3]
687*fb1b10abSAndroid Build Coastguard Worker  mova                m0, [leftq]            ; abcdefghijklmnop [byte]
688*fb1b10abSAndroid Build Coastguard Worker  pshufb              m1, m0, [GLOBAL(sh_b123456789abcdeff)] ; bcdefghijklmnopp
689*fb1b10abSAndroid Build Coastguard Worker  pshufb              m2, m0, [GLOBAL(sh_b23456789abcdefff)]
690*fb1b10abSAndroid Build Coastguard Worker
691*fb1b10abSAndroid Build Coastguard Worker  X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m1, m2, m3
692*fb1b10abSAndroid Build Coastguard Worker  pavgb               m1, m0                 ; ab, bc, cd .. no, op, pp [byte]
693*fb1b10abSAndroid Build Coastguard Worker
694*fb1b10abSAndroid Build Coastguard Worker  punpckhbw           m4, m1, m3    ; interleaved input
695*fb1b10abSAndroid Build Coastguard Worker  punpcklbw           m1, m3        ; interleaved output
696*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq          ], m1
697*fb1b10abSAndroid Build Coastguard Worker  palignr             m3, m4, m1, 2
698*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq  ], m3
699*fb1b10abSAndroid Build Coastguard Worker  palignr             m3, m4, m1, 4
700*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq*2], m3
701*fb1b10abSAndroid Build Coastguard Worker  palignr             m3, m4, m1, 6
702*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+stride3q ], m3
703*fb1b10abSAndroid Build Coastguard Worker  lea               dstq, [dstq+strideq*4]
704*fb1b10abSAndroid Build Coastguard Worker  palignr             m3, m4, m1, 8
705*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq          ], m3
706*fb1b10abSAndroid Build Coastguard Worker  palignr             m3, m4, m1, 10
707*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq  ], m3
708*fb1b10abSAndroid Build Coastguard Worker  palignr             m3, m4, m1, 12
709*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq*2], m3
710*fb1b10abSAndroid Build Coastguard Worker  palignr             m3, m4, m1, 14
711*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+stride3q ], m3
712*fb1b10abSAndroid Build Coastguard Worker  DEFINE_ARGS dst, stride, stride3, line
713*fb1b10abSAndroid Build Coastguard Worker  mov              lined, 2
714*fb1b10abSAndroid Build Coastguard Worker  mova                m0, [GLOBAL(sh_b23456789abcdefff)]
715*fb1b10abSAndroid Build Coastguard Worker.loop:
716*fb1b10abSAndroid Build Coastguard Worker  lea               dstq, [dstq+strideq*4]
717*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq          ], m4
718*fb1b10abSAndroid Build Coastguard Worker  pshufb              m4, m0
719*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq  ], m4
720*fb1b10abSAndroid Build Coastguard Worker  pshufb              m4, m0
721*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+strideq*2], m4
722*fb1b10abSAndroid Build Coastguard Worker  pshufb              m4, m0
723*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq+stride3q ], m4
724*fb1b10abSAndroid Build Coastguard Worker  pshufb              m4, m0
725*fb1b10abSAndroid Build Coastguard Worker  dec              lined
726*fb1b10abSAndroid Build Coastguard Worker  jnz .loop
727*fb1b10abSAndroid Build Coastguard Worker  RESTORE_GOT
728*fb1b10abSAndroid Build Coastguard Worker  REP_RET
729*fb1b10abSAndroid Build Coastguard Worker
730*fb1b10abSAndroid Build Coastguard WorkerINIT_XMM ssse3
731*fb1b10abSAndroid Build Coastguard Workercglobal d207_predictor_32x32, 4, 5, 8, dst, stride, stride3, left, goffset
732*fb1b10abSAndroid Build Coastguard Worker  GET_GOT     goffsetq
733*fb1b10abSAndroid Build Coastguard Worker  lea           stride3q, [strideq*3]
734*fb1b10abSAndroid Build Coastguard Worker  mova                m1, [leftq]              ;  0-15 [byte]
735*fb1b10abSAndroid Build Coastguard Worker  mova                m2, [leftq+16]           ; 16-31 [byte]
736*fb1b10abSAndroid Build Coastguard Worker  pshufb              m0, m2, [GLOBAL(sh_b23456789abcdefff)]
737*fb1b10abSAndroid Build Coastguard Worker  pshufb              m4, m2, [GLOBAL(sh_b123456789abcdeff)]
738*fb1b10abSAndroid Build Coastguard Worker
739*fb1b10abSAndroid Build Coastguard Worker  X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m2, m4, m0, m3
740*fb1b10abSAndroid Build Coastguard Worker  palignr             m6, m2, m1, 1
741*fb1b10abSAndroid Build Coastguard Worker  palignr             m5, m2, m1, 2
742*fb1b10abSAndroid Build Coastguard Worker  pavgb               m2, m4         ; high 16px even lines
743*fb1b10abSAndroid Build Coastguard Worker
744*fb1b10abSAndroid Build Coastguard Worker  X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m1, m6, m5, m0
745*fb1b10abSAndroid Build Coastguard Worker  pavgb                   m1, m6         ; low 16px even lines
746*fb1b10abSAndroid Build Coastguard Worker
747*fb1b10abSAndroid Build Coastguard Worker  punpckhbw               m6, m1, m0               ; interleaved output 2
748*fb1b10abSAndroid Build Coastguard Worker  punpcklbw               m1, m0                   ; interleaved output 1
749*fb1b10abSAndroid Build Coastguard Worker
750*fb1b10abSAndroid Build Coastguard Worker  punpckhbw               m7, m2, m3               ; interleaved output 4
751*fb1b10abSAndroid Build Coastguard Worker  punpcklbw               m2, m3                   ; interleaved output 3
752*fb1b10abSAndroid Build Coastguard Worker
753*fb1b10abSAndroid Build Coastguard Worker  ; output 1st 8 lines (and half of 2nd 8 lines)
754*fb1b10abSAndroid Build Coastguard Worker  DEFINE_ARGS dst, stride, stride3, dst8
755*fb1b10abSAndroid Build Coastguard Worker  lea                  dst8q, [dstq+strideq*8]
756*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq              ], m1
757*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq           +16], m6
758*fb1b10abSAndroid Build Coastguard Worker  mova  [dst8q             ], m6
759*fb1b10abSAndroid Build Coastguard Worker  palignr             m0, m6, m1, 2
760*fb1b10abSAndroid Build Coastguard Worker  palignr             m4, m2, m6, 2
761*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq +strideq     ], m0
762*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq +strideq  +16], m4
763*fb1b10abSAndroid Build Coastguard Worker  mova  [dst8q+strideq     ], m4
764*fb1b10abSAndroid Build Coastguard Worker  palignr             m0, m6, m1, 4
765*fb1b10abSAndroid Build Coastguard Worker  palignr             m4, m2, m6, 4
766*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq +strideq*2   ], m0
767*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq +strideq*2+16], m4
768*fb1b10abSAndroid Build Coastguard Worker  mova  [dst8q+strideq*2   ], m4
769*fb1b10abSAndroid Build Coastguard Worker  palignr             m0, m6, m1, 6
770*fb1b10abSAndroid Build Coastguard Worker  palignr             m4, m2, m6, 6
771*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq +stride3q    ], m0
772*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq +stride3q +16], m4
773*fb1b10abSAndroid Build Coastguard Worker  mova  [dst8q+stride3q    ], m4
774*fb1b10abSAndroid Build Coastguard Worker  lea               dstq, [dstq +strideq*4]
775*fb1b10abSAndroid Build Coastguard Worker  lea              dst8q, [dst8q+strideq*4]
776*fb1b10abSAndroid Build Coastguard Worker  palignr             m0, m6, m1, 8
777*fb1b10abSAndroid Build Coastguard Worker  palignr             m4, m2, m6, 8
778*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq              ], m0
779*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq           +16], m4
780*fb1b10abSAndroid Build Coastguard Worker  mova  [dst8q             ], m4
781*fb1b10abSAndroid Build Coastguard Worker  palignr             m0, m6, m1, 10
782*fb1b10abSAndroid Build Coastguard Worker  palignr             m4, m2, m6, 10
783*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq +strideq     ], m0
784*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq +strideq  +16], m4
785*fb1b10abSAndroid Build Coastguard Worker  mova  [dst8q+strideq     ], m4
786*fb1b10abSAndroid Build Coastguard Worker  palignr             m0, m6, m1, 12
787*fb1b10abSAndroid Build Coastguard Worker  palignr             m4, m2, m6, 12
788*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq +strideq*2   ], m0
789*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq +strideq*2+16], m4
790*fb1b10abSAndroid Build Coastguard Worker  mova  [dst8q+strideq*2   ], m4
791*fb1b10abSAndroid Build Coastguard Worker  palignr             m0, m6, m1, 14
792*fb1b10abSAndroid Build Coastguard Worker  palignr             m4, m2, m6, 14
793*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq +stride3q    ], m0
794*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq +stride3q +16], m4
795*fb1b10abSAndroid Build Coastguard Worker  mova  [dst8q+stride3q    ], m4
796*fb1b10abSAndroid Build Coastguard Worker  lea               dstq, [dstq+strideq*4]
797*fb1b10abSAndroid Build Coastguard Worker  lea              dst8q, [dst8q+strideq*4]
798*fb1b10abSAndroid Build Coastguard Worker
799*fb1b10abSAndroid Build Coastguard Worker  ; output 2nd half of 2nd 8 lines and half of 3rd 8 lines
800*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq           +16], m2
801*fb1b10abSAndroid Build Coastguard Worker  mova  [dst8q             ], m2
802*fb1b10abSAndroid Build Coastguard Worker  palignr             m4, m7, m2, 2
803*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq +strideq  +16], m4
804*fb1b10abSAndroid Build Coastguard Worker  mova  [dst8q+strideq     ], m4
805*fb1b10abSAndroid Build Coastguard Worker  palignr             m4, m7, m2, 4
806*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq +strideq*2+16], m4
807*fb1b10abSAndroid Build Coastguard Worker  mova  [dst8q+strideq*2   ], m4
808*fb1b10abSAndroid Build Coastguard Worker  palignr             m4, m7, m2, 6
809*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq +stride3q +16], m4
810*fb1b10abSAndroid Build Coastguard Worker  mova  [dst8q+stride3q    ], m4
811*fb1b10abSAndroid Build Coastguard Worker  lea               dstq, [dstq+strideq*4]
812*fb1b10abSAndroid Build Coastguard Worker  lea              dst8q, [dst8q+strideq*4]
813*fb1b10abSAndroid Build Coastguard Worker  palignr             m4, m7, m2, 8
814*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq           +16], m4
815*fb1b10abSAndroid Build Coastguard Worker  mova  [dst8q             ], m4
816*fb1b10abSAndroid Build Coastguard Worker  palignr             m4, m7, m2, 10
817*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq +strideq  +16], m4
818*fb1b10abSAndroid Build Coastguard Worker  mova  [dst8q+strideq     ], m4
819*fb1b10abSAndroid Build Coastguard Worker  palignr             m4, m7, m2, 12
820*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq +strideq*2+16], m4
821*fb1b10abSAndroid Build Coastguard Worker  mova  [dst8q+strideq*2   ], m4
822*fb1b10abSAndroid Build Coastguard Worker  palignr             m4, m7, m2, 14
823*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq +stride3q +16], m4
824*fb1b10abSAndroid Build Coastguard Worker  mova  [dst8q+stride3q    ], m4
825*fb1b10abSAndroid Build Coastguard Worker  lea               dstq, [dstq+strideq*4]
826*fb1b10abSAndroid Build Coastguard Worker  lea              dst8q, [dst8q+strideq*4]
827*fb1b10abSAndroid Build Coastguard Worker
828*fb1b10abSAndroid Build Coastguard Worker  ; output 2nd half of 3rd 8 lines and half of 4th 8 lines
829*fb1b10abSAndroid Build Coastguard Worker  mova                m0, [GLOBAL(sh_b23456789abcdefff)]
830*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq           +16], m7
831*fb1b10abSAndroid Build Coastguard Worker  mova  [dst8q             ], m7
832*fb1b10abSAndroid Build Coastguard Worker  pshufb              m7, m0
833*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq +strideq  +16], m7
834*fb1b10abSAndroid Build Coastguard Worker  mova  [dst8q+strideq     ], m7
835*fb1b10abSAndroid Build Coastguard Worker  pshufb              m7, m0
836*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq +strideq*2+16], m7
837*fb1b10abSAndroid Build Coastguard Worker  mova  [dst8q+strideq*2   ], m7
838*fb1b10abSAndroid Build Coastguard Worker  pshufb              m7, m0
839*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq +stride3q +16], m7
840*fb1b10abSAndroid Build Coastguard Worker  mova  [dst8q+stride3q    ], m7
841*fb1b10abSAndroid Build Coastguard Worker  pshufb              m7, m0
842*fb1b10abSAndroid Build Coastguard Worker  lea               dstq, [dstq+strideq*4]
843*fb1b10abSAndroid Build Coastguard Worker  lea              dst8q, [dst8q+strideq*4]
844*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq           +16], m7
845*fb1b10abSAndroid Build Coastguard Worker  mova  [dst8q             ], m7
846*fb1b10abSAndroid Build Coastguard Worker  pshufb              m7, m0
847*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq +strideq  +16], m7
848*fb1b10abSAndroid Build Coastguard Worker  mova  [dst8q+strideq     ], m7
849*fb1b10abSAndroid Build Coastguard Worker  pshufb              m7, m0
850*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq +strideq*2+16], m7
851*fb1b10abSAndroid Build Coastguard Worker  mova  [dst8q+strideq*2   ], m7
852*fb1b10abSAndroid Build Coastguard Worker  pshufb              m7, m0
853*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq +stride3q +16], m7
854*fb1b10abSAndroid Build Coastguard Worker  mova  [dst8q+stride3q    ], m7
855*fb1b10abSAndroid Build Coastguard Worker  pshufb              m7, m0
856*fb1b10abSAndroid Build Coastguard Worker  lea               dstq, [dstq+strideq*4]
857*fb1b10abSAndroid Build Coastguard Worker
858*fb1b10abSAndroid Build Coastguard Worker  ; output last half of 4th 8 lines
859*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq           +16], m7
860*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq +strideq  +16], m7
861*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq +strideq*2+16], m7
862*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq +stride3q +16], m7
863*fb1b10abSAndroid Build Coastguard Worker  lea               dstq, [dstq+strideq*4]
864*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq           +16], m7
865*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq +strideq  +16], m7
866*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq +strideq*2+16], m7
867*fb1b10abSAndroid Build Coastguard Worker  mova  [dstq +stride3q +16], m7
868*fb1b10abSAndroid Build Coastguard Worker
869*fb1b10abSAndroid Build Coastguard Worker  ; done!
870*fb1b10abSAndroid Build Coastguard Worker  RESTORE_GOT
871*fb1b10abSAndroid Build Coastguard Worker  RET
872