xref: /aosp_15_r20/external/libvpx/vp8/encoder/x86/copy_sse3.asm (revision fb1b10ab9aebc7c7068eedab379b749d7e3900be)
1*fb1b10abSAndroid Build Coastguard Worker;
2*fb1b10abSAndroid Build Coastguard Worker;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3*fb1b10abSAndroid Build Coastguard Worker;
4*fb1b10abSAndroid Build Coastguard Worker;  Use of this source code is governed by a BSD-style license
5*fb1b10abSAndroid Build Coastguard Worker;  that can be found in the LICENSE file in the root of the source
6*fb1b10abSAndroid Build Coastguard Worker;  tree. An additional intellectual property rights grant can be found
7*fb1b10abSAndroid Build Coastguard Worker;  in the file PATENTS.  All contributing project authors may
8*fb1b10abSAndroid Build Coastguard Worker;  be found in the AUTHORS file in the root of the source tree.
9*fb1b10abSAndroid Build Coastguard Worker;
10*fb1b10abSAndroid Build Coastguard Worker
11*fb1b10abSAndroid Build Coastguard Worker%include "vpx_ports/x86_abi_support.asm"
12*fb1b10abSAndroid Build Coastguard Worker
13*fb1b10abSAndroid Build Coastguard Worker%macro STACK_FRAME_CREATE_X3 0
14*fb1b10abSAndroid Build Coastguard Worker%if ABI_IS_32BIT
15*fb1b10abSAndroid Build Coastguard Worker  %define     src_ptr       rsi
16*fb1b10abSAndroid Build Coastguard Worker  %define     src_stride    rax
17*fb1b10abSAndroid Build Coastguard Worker  %define     ref_ptr       rdi
18*fb1b10abSAndroid Build Coastguard Worker  %define     ref_stride    rdx
19*fb1b10abSAndroid Build Coastguard Worker  %define     end_ptr       rcx
20*fb1b10abSAndroid Build Coastguard Worker  %define     ret_var       rbx
21*fb1b10abSAndroid Build Coastguard Worker  %define     result_ptr    arg(4)
22*fb1b10abSAndroid Build Coastguard Worker  %define     max_sad       arg(4)
23*fb1b10abSAndroid Build Coastguard Worker  %define     height        dword ptr arg(4)
24*fb1b10abSAndroid Build Coastguard Worker    push        rbp
25*fb1b10abSAndroid Build Coastguard Worker    mov         rbp,        rsp
26*fb1b10abSAndroid Build Coastguard Worker    push        rsi
27*fb1b10abSAndroid Build Coastguard Worker    push        rdi
28*fb1b10abSAndroid Build Coastguard Worker    push        rbx
29*fb1b10abSAndroid Build Coastguard Worker
30*fb1b10abSAndroid Build Coastguard Worker    mov         rsi,        arg(0)              ; src_ptr
31*fb1b10abSAndroid Build Coastguard Worker    mov         rdi,        arg(2)              ; ref_ptr
32*fb1b10abSAndroid Build Coastguard Worker
33*fb1b10abSAndroid Build Coastguard Worker    movsxd      rax,        dword ptr arg(1)    ; src_stride
34*fb1b10abSAndroid Build Coastguard Worker    movsxd      rdx,        dword ptr arg(3)    ; ref_stride
35*fb1b10abSAndroid Build Coastguard Worker%else
36*fb1b10abSAndroid Build Coastguard Worker  %if LIBVPX_YASM_WIN64
37*fb1b10abSAndroid Build Coastguard Worker    SAVE_XMM 7, u
38*fb1b10abSAndroid Build Coastguard Worker    %define     src_ptr     rcx
39*fb1b10abSAndroid Build Coastguard Worker    %define     src_stride  rdx
40*fb1b10abSAndroid Build Coastguard Worker    %define     ref_ptr     r8
41*fb1b10abSAndroid Build Coastguard Worker    %define     ref_stride  r9
42*fb1b10abSAndroid Build Coastguard Worker    %define     end_ptr     r10
43*fb1b10abSAndroid Build Coastguard Worker    %define     ret_var     r11
44*fb1b10abSAndroid Build Coastguard Worker    %define     result_ptr  [rsp+xmm_stack_space+8+4*8]
45*fb1b10abSAndroid Build Coastguard Worker    %define     max_sad     [rsp+xmm_stack_space+8+4*8]
46*fb1b10abSAndroid Build Coastguard Worker    %define     height      dword ptr [rsp+xmm_stack_space+8+4*8]
47*fb1b10abSAndroid Build Coastguard Worker  %else
48*fb1b10abSAndroid Build Coastguard Worker    %define     src_ptr     rdi
49*fb1b10abSAndroid Build Coastguard Worker    %define     src_stride  rsi
50*fb1b10abSAndroid Build Coastguard Worker    %define     ref_ptr     rdx
51*fb1b10abSAndroid Build Coastguard Worker    %define     ref_stride  rcx
52*fb1b10abSAndroid Build Coastguard Worker    %define     end_ptr     r9
53*fb1b10abSAndroid Build Coastguard Worker    %define     ret_var     r10
54*fb1b10abSAndroid Build Coastguard Worker    %define     result_ptr  r8
55*fb1b10abSAndroid Build Coastguard Worker    %define     max_sad     r8
56*fb1b10abSAndroid Build Coastguard Worker    %define     height      r8
57*fb1b10abSAndroid Build Coastguard Worker  %endif
58*fb1b10abSAndroid Build Coastguard Worker%endif
59*fb1b10abSAndroid Build Coastguard Worker
60*fb1b10abSAndroid Build Coastguard Worker%endmacro
61*fb1b10abSAndroid Build Coastguard Worker
62*fb1b10abSAndroid Build Coastguard Worker%macro STACK_FRAME_DESTROY_X3 0
63*fb1b10abSAndroid Build Coastguard Worker  %define     src_ptr
64*fb1b10abSAndroid Build Coastguard Worker  %define     src_stride
65*fb1b10abSAndroid Build Coastguard Worker  %define     ref_ptr
66*fb1b10abSAndroid Build Coastguard Worker  %define     ref_stride
67*fb1b10abSAndroid Build Coastguard Worker  %define     end_ptr
68*fb1b10abSAndroid Build Coastguard Worker  %define     ret_var
69*fb1b10abSAndroid Build Coastguard Worker  %define     result_ptr
70*fb1b10abSAndroid Build Coastguard Worker  %define     max_sad
71*fb1b10abSAndroid Build Coastguard Worker  %define     height
72*fb1b10abSAndroid Build Coastguard Worker
73*fb1b10abSAndroid Build Coastguard Worker%if ABI_IS_32BIT
74*fb1b10abSAndroid Build Coastguard Worker    pop         rbx
75*fb1b10abSAndroid Build Coastguard Worker    pop         rdi
76*fb1b10abSAndroid Build Coastguard Worker    pop         rsi
77*fb1b10abSAndroid Build Coastguard Worker    pop         rbp
78*fb1b10abSAndroid Build Coastguard Worker%else
79*fb1b10abSAndroid Build Coastguard Worker  %if LIBVPX_YASM_WIN64
80*fb1b10abSAndroid Build Coastguard Worker    RESTORE_XMM
81*fb1b10abSAndroid Build Coastguard Worker  %endif
82*fb1b10abSAndroid Build Coastguard Worker%endif
83*fb1b10abSAndroid Build Coastguard Worker    ret
84*fb1b10abSAndroid Build Coastguard Worker%endmacro
85*fb1b10abSAndroid Build Coastguard Worker
86*fb1b10abSAndroid Build Coastguard WorkerSECTION .text
87*fb1b10abSAndroid Build Coastguard Worker
88*fb1b10abSAndroid Build Coastguard Worker;void vp8_copy32xn_sse3(
89*fb1b10abSAndroid Build Coastguard Worker;    unsigned char *src_ptr,
90*fb1b10abSAndroid Build Coastguard Worker;    int  src_stride,
91*fb1b10abSAndroid Build Coastguard Worker;    unsigned char *dst_ptr,
92*fb1b10abSAndroid Build Coastguard Worker;    int  dst_stride,
93*fb1b10abSAndroid Build Coastguard Worker;    int height);
94*fb1b10abSAndroid Build Coastguard Workerglobalsym(vp8_copy32xn_sse3)
95*fb1b10abSAndroid Build Coastguard Workersym(vp8_copy32xn_sse3):
96*fb1b10abSAndroid Build Coastguard Worker
97*fb1b10abSAndroid Build Coastguard Worker    STACK_FRAME_CREATE_X3
98*fb1b10abSAndroid Build Coastguard Worker
99*fb1b10abSAndroid Build Coastguard Worker.block_copy_sse3_loopx4:
100*fb1b10abSAndroid Build Coastguard Worker        lea             end_ptr,    [src_ptr+src_stride*2]
101*fb1b10abSAndroid Build Coastguard Worker
102*fb1b10abSAndroid Build Coastguard Worker        movdqu          xmm0,       XMMWORD PTR [src_ptr]
103*fb1b10abSAndroid Build Coastguard Worker        movdqu          xmm1,       XMMWORD PTR [src_ptr + 16]
104*fb1b10abSAndroid Build Coastguard Worker        movdqu          xmm2,       XMMWORD PTR [src_ptr + src_stride]
105*fb1b10abSAndroid Build Coastguard Worker        movdqu          xmm3,       XMMWORD PTR [src_ptr + src_stride + 16]
106*fb1b10abSAndroid Build Coastguard Worker        movdqu          xmm4,       XMMWORD PTR [end_ptr]
107*fb1b10abSAndroid Build Coastguard Worker        movdqu          xmm5,       XMMWORD PTR [end_ptr + 16]
108*fb1b10abSAndroid Build Coastguard Worker        movdqu          xmm6,       XMMWORD PTR [end_ptr + src_stride]
109*fb1b10abSAndroid Build Coastguard Worker        movdqu          xmm7,       XMMWORD PTR [end_ptr + src_stride + 16]
110*fb1b10abSAndroid Build Coastguard Worker
111*fb1b10abSAndroid Build Coastguard Worker        lea             src_ptr,    [src_ptr+src_stride*4]
112*fb1b10abSAndroid Build Coastguard Worker
113*fb1b10abSAndroid Build Coastguard Worker        lea             end_ptr,    [ref_ptr+ref_stride*2]
114*fb1b10abSAndroid Build Coastguard Worker
115*fb1b10abSAndroid Build Coastguard Worker        movdqa          XMMWORD PTR [ref_ptr], xmm0
116*fb1b10abSAndroid Build Coastguard Worker        movdqa          XMMWORD PTR [ref_ptr + 16], xmm1
117*fb1b10abSAndroid Build Coastguard Worker        movdqa          XMMWORD PTR [ref_ptr + ref_stride], xmm2
118*fb1b10abSAndroid Build Coastguard Worker        movdqa          XMMWORD PTR [ref_ptr + ref_stride + 16], xmm3
119*fb1b10abSAndroid Build Coastguard Worker        movdqa          XMMWORD PTR [end_ptr], xmm4
120*fb1b10abSAndroid Build Coastguard Worker        movdqa          XMMWORD PTR [end_ptr + 16], xmm5
121*fb1b10abSAndroid Build Coastguard Worker        movdqa          XMMWORD PTR [end_ptr + ref_stride], xmm6
122*fb1b10abSAndroid Build Coastguard Worker        movdqa          XMMWORD PTR [end_ptr + ref_stride + 16], xmm7
123*fb1b10abSAndroid Build Coastguard Worker
124*fb1b10abSAndroid Build Coastguard Worker        lea             ref_ptr,    [ref_ptr+ref_stride*4]
125*fb1b10abSAndroid Build Coastguard Worker
126*fb1b10abSAndroid Build Coastguard Worker        sub             height,     4
127*fb1b10abSAndroid Build Coastguard Worker        cmp             height,     4
128*fb1b10abSAndroid Build Coastguard Worker        jge             .block_copy_sse3_loopx4
129*fb1b10abSAndroid Build Coastguard Worker
130*fb1b10abSAndroid Build Coastguard Worker        ;Check to see if there is more rows need to be copied.
131*fb1b10abSAndroid Build Coastguard Worker        cmp             height, 0
132*fb1b10abSAndroid Build Coastguard Worker        je              .copy_is_done
133*fb1b10abSAndroid Build Coastguard Worker
134*fb1b10abSAndroid Build Coastguard Worker.block_copy_sse3_loop:
135*fb1b10abSAndroid Build Coastguard Worker        movdqu          xmm0,       XMMWORD PTR [src_ptr]
136*fb1b10abSAndroid Build Coastguard Worker        movdqu          xmm1,       XMMWORD PTR [src_ptr + 16]
137*fb1b10abSAndroid Build Coastguard Worker        lea             src_ptr,    [src_ptr+src_stride]
138*fb1b10abSAndroid Build Coastguard Worker
139*fb1b10abSAndroid Build Coastguard Worker        movdqa          XMMWORD PTR [ref_ptr], xmm0
140*fb1b10abSAndroid Build Coastguard Worker        movdqa          XMMWORD PTR [ref_ptr + 16], xmm1
141*fb1b10abSAndroid Build Coastguard Worker        lea             ref_ptr,    [ref_ptr+ref_stride]
142*fb1b10abSAndroid Build Coastguard Worker
143*fb1b10abSAndroid Build Coastguard Worker        sub             height,     1
144*fb1b10abSAndroid Build Coastguard Worker        jne             .block_copy_sse3_loop
145*fb1b10abSAndroid Build Coastguard Worker
146*fb1b10abSAndroid Build Coastguard Worker.copy_is_done:
147*fb1b10abSAndroid Build Coastguard Worker    STACK_FRAME_DESTROY_X3
148