xref: /aosp_15_r20/external/libvpx/vpx_dsp/x86/subtract_sse2.asm (revision fb1b10ab9aebc7c7068eedab379b749d7e3900be)
1*fb1b10abSAndroid Build Coastguard Worker;
2*fb1b10abSAndroid Build Coastguard Worker;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3*fb1b10abSAndroid Build Coastguard Worker;
4*fb1b10abSAndroid Build Coastguard Worker;  Use of this source code is governed by a BSD-style license
5*fb1b10abSAndroid Build Coastguard Worker;  that can be found in the LICENSE file in the root of the source
6*fb1b10abSAndroid Build Coastguard Worker;  tree. An additional intellectual property rights grant can be found
7*fb1b10abSAndroid Build Coastguard Worker;  in the file PATENTS.  All contributing project authors may
8*fb1b10abSAndroid Build Coastguard Worker;  be found in the AUTHORS file in the root of the source tree.
9*fb1b10abSAndroid Build Coastguard Worker;
10*fb1b10abSAndroid Build Coastguard Worker
11*fb1b10abSAndroid Build Coastguard Worker%include "third_party/x86inc/x86inc.asm"
12*fb1b10abSAndroid Build Coastguard Worker
13*fb1b10abSAndroid Build Coastguard WorkerSECTION .text
14*fb1b10abSAndroid Build Coastguard Worker
15*fb1b10abSAndroid Build Coastguard Worker; void vpx_subtract_block(int rows, int cols,
16*fb1b10abSAndroid Build Coastguard Worker;                         int16_t *diff, ptrdiff_t diff_stride,
17*fb1b10abSAndroid Build Coastguard Worker;                         const uint8_t *src, ptrdiff_t src_stride,
18*fb1b10abSAndroid Build Coastguard Worker;                         const uint8_t *pred, ptrdiff_t pred_stride)
19*fb1b10abSAndroid Build Coastguard Worker
20*fb1b10abSAndroid Build Coastguard WorkerINIT_XMM sse2
21*fb1b10abSAndroid Build Coastguard Workercglobal subtract_block, 7, 7, 8, \
22*fb1b10abSAndroid Build Coastguard Worker                        rows, cols, diff, diff_stride, src, src_stride, \
23*fb1b10abSAndroid Build Coastguard Worker                        pred, pred_stride
24*fb1b10abSAndroid Build Coastguard Worker%define pred_str colsq
25*fb1b10abSAndroid Build Coastguard Worker  pxor                  m7, m7         ; dedicated zero register
26*fb1b10abSAndroid Build Coastguard Worker  cmp                colsd, 4
27*fb1b10abSAndroid Build Coastguard Worker  je .case_4
28*fb1b10abSAndroid Build Coastguard Worker  cmp                colsd, 8
29*fb1b10abSAndroid Build Coastguard Worker  je .case_8
30*fb1b10abSAndroid Build Coastguard Worker  cmp                colsd, 16
31*fb1b10abSAndroid Build Coastguard Worker  je .case_16
32*fb1b10abSAndroid Build Coastguard Worker  cmp                colsd, 32
33*fb1b10abSAndroid Build Coastguard Worker  je .case_32
34*fb1b10abSAndroid Build Coastguard Worker
35*fb1b10abSAndroid Build Coastguard Worker%macro loop16 6
36*fb1b10abSAndroid Build Coastguard Worker  mova                  m0, [srcq+%1]
37*fb1b10abSAndroid Build Coastguard Worker  mova                  m4, [srcq+%2]
38*fb1b10abSAndroid Build Coastguard Worker  mova                  m1, [predq+%3]
39*fb1b10abSAndroid Build Coastguard Worker  mova                  m5, [predq+%4]
40*fb1b10abSAndroid Build Coastguard Worker  punpckhbw             m2, m0, m7
41*fb1b10abSAndroid Build Coastguard Worker  punpckhbw             m3, m1, m7
42*fb1b10abSAndroid Build Coastguard Worker  punpcklbw             m0, m7
43*fb1b10abSAndroid Build Coastguard Worker  punpcklbw             m1, m7
44*fb1b10abSAndroid Build Coastguard Worker  psubw                 m2, m3
45*fb1b10abSAndroid Build Coastguard Worker  psubw                 m0, m1
46*fb1b10abSAndroid Build Coastguard Worker  punpckhbw             m1, m4, m7
47*fb1b10abSAndroid Build Coastguard Worker  punpckhbw             m3, m5, m7
48*fb1b10abSAndroid Build Coastguard Worker  punpcklbw             m4, m7
49*fb1b10abSAndroid Build Coastguard Worker  punpcklbw             m5, m7
50*fb1b10abSAndroid Build Coastguard Worker  psubw                 m1, m3
51*fb1b10abSAndroid Build Coastguard Worker  psubw                 m4, m5
52*fb1b10abSAndroid Build Coastguard Worker  mova [diffq+mmsize*0+%5], m0
53*fb1b10abSAndroid Build Coastguard Worker  mova [diffq+mmsize*1+%5], m2
54*fb1b10abSAndroid Build Coastguard Worker  mova [diffq+mmsize*0+%6], m4
55*fb1b10abSAndroid Build Coastguard Worker  mova [diffq+mmsize*1+%6], m1
56*fb1b10abSAndroid Build Coastguard Worker%endmacro
57*fb1b10abSAndroid Build Coastguard Worker
58*fb1b10abSAndroid Build Coastguard Worker  mov             pred_str, pred_stridemp
59*fb1b10abSAndroid Build Coastguard Worker.loop_64:
60*fb1b10abSAndroid Build Coastguard Worker  loop16 0*mmsize, 1*mmsize, 0*mmsize, 1*mmsize, 0*mmsize, 2*mmsize
61*fb1b10abSAndroid Build Coastguard Worker  loop16 2*mmsize, 3*mmsize, 2*mmsize, 3*mmsize, 4*mmsize, 6*mmsize
62*fb1b10abSAndroid Build Coastguard Worker  lea                diffq, [diffq+diff_strideq*2]
63*fb1b10abSAndroid Build Coastguard Worker  add                predq, pred_str
64*fb1b10abSAndroid Build Coastguard Worker  add                 srcq, src_strideq
65*fb1b10abSAndroid Build Coastguard Worker  dec                rowsd
66*fb1b10abSAndroid Build Coastguard Worker  jg .loop_64
67*fb1b10abSAndroid Build Coastguard Worker  RET
68*fb1b10abSAndroid Build Coastguard Worker
69*fb1b10abSAndroid Build Coastguard Worker.case_32:
70*fb1b10abSAndroid Build Coastguard Worker  mov             pred_str, pred_stridemp
71*fb1b10abSAndroid Build Coastguard Worker.loop_32:
72*fb1b10abSAndroid Build Coastguard Worker  loop16 0, mmsize, 0, mmsize, 0, 2*mmsize
73*fb1b10abSAndroid Build Coastguard Worker  lea                diffq, [diffq+diff_strideq*2]
74*fb1b10abSAndroid Build Coastguard Worker  add                predq, pred_str
75*fb1b10abSAndroid Build Coastguard Worker  add                 srcq, src_strideq
76*fb1b10abSAndroid Build Coastguard Worker  dec                rowsd
77*fb1b10abSAndroid Build Coastguard Worker  jg .loop_32
78*fb1b10abSAndroid Build Coastguard Worker  RET
79*fb1b10abSAndroid Build Coastguard Worker
80*fb1b10abSAndroid Build Coastguard Worker.case_16:
81*fb1b10abSAndroid Build Coastguard Worker  mov             pred_str, pred_stridemp
82*fb1b10abSAndroid Build Coastguard Worker.loop_16:
83*fb1b10abSAndroid Build Coastguard Worker  loop16 0, src_strideq, 0, pred_str, 0, diff_strideq*2
84*fb1b10abSAndroid Build Coastguard Worker  lea                diffq, [diffq+diff_strideq*4]
85*fb1b10abSAndroid Build Coastguard Worker  lea                predq, [predq+pred_str*2]
86*fb1b10abSAndroid Build Coastguard Worker  lea                 srcq, [srcq+src_strideq*2]
87*fb1b10abSAndroid Build Coastguard Worker  sub                rowsd, 2
88*fb1b10abSAndroid Build Coastguard Worker  jg .loop_16
89*fb1b10abSAndroid Build Coastguard Worker  RET
90*fb1b10abSAndroid Build Coastguard Worker
91*fb1b10abSAndroid Build Coastguard Worker%macro loop_h 0
92*fb1b10abSAndroid Build Coastguard Worker  movh                  m0, [srcq]
93*fb1b10abSAndroid Build Coastguard Worker  movh                  m2, [srcq+src_strideq]
94*fb1b10abSAndroid Build Coastguard Worker  movh                  m1, [predq]
95*fb1b10abSAndroid Build Coastguard Worker  movh                  m3, [predq+pred_str]
96*fb1b10abSAndroid Build Coastguard Worker  punpcklbw             m0, m7
97*fb1b10abSAndroid Build Coastguard Worker  punpcklbw             m1, m7
98*fb1b10abSAndroid Build Coastguard Worker  punpcklbw             m2, m7
99*fb1b10abSAndroid Build Coastguard Worker  punpcklbw             m3, m7
100*fb1b10abSAndroid Build Coastguard Worker  psubw                 m0, m1
101*fb1b10abSAndroid Build Coastguard Worker  psubw                 m2, m3
102*fb1b10abSAndroid Build Coastguard Worker  mova             [diffq], m0
103*fb1b10abSAndroid Build Coastguard Worker  mova [diffq+diff_strideq*2], m2
104*fb1b10abSAndroid Build Coastguard Worker%endmacro
105*fb1b10abSAndroid Build Coastguard Worker
106*fb1b10abSAndroid Build Coastguard Worker.case_8:
107*fb1b10abSAndroid Build Coastguard Worker  mov             pred_str, pred_stridemp
108*fb1b10abSAndroid Build Coastguard Worker.loop_8:
109*fb1b10abSAndroid Build Coastguard Worker  loop_h
110*fb1b10abSAndroid Build Coastguard Worker  lea                diffq, [diffq+diff_strideq*4]
111*fb1b10abSAndroid Build Coastguard Worker  lea                 srcq, [srcq+src_strideq*2]
112*fb1b10abSAndroid Build Coastguard Worker  lea                predq, [predq+pred_str*2]
113*fb1b10abSAndroid Build Coastguard Worker  sub                rowsd, 2
114*fb1b10abSAndroid Build Coastguard Worker  jg .loop_8
115*fb1b10abSAndroid Build Coastguard Worker  RET
116*fb1b10abSAndroid Build Coastguard Worker
117*fb1b10abSAndroid Build Coastguard WorkerINIT_MMX
118*fb1b10abSAndroid Build Coastguard Worker.case_4:
119*fb1b10abSAndroid Build Coastguard Worker  mov             pred_str, pred_stridemp
120*fb1b10abSAndroid Build Coastguard Worker.loop_4:
121*fb1b10abSAndroid Build Coastguard Worker  loop_h
122*fb1b10abSAndroid Build Coastguard Worker  lea                diffq, [diffq+diff_strideq*4]
123*fb1b10abSAndroid Build Coastguard Worker  lea                 srcq, [srcq+src_strideq*2]
124*fb1b10abSAndroid Build Coastguard Worker  lea                predq, [predq+pred_str*2]
125*fb1b10abSAndroid Build Coastguard Worker  sub                rowsd, 2
126*fb1b10abSAndroid Build Coastguard Worker  jg .loop_4
127*fb1b10abSAndroid Build Coastguard Worker  emms
128*fb1b10abSAndroid Build Coastguard Worker  RET
129