xref: /aosp_15_r20/external/libvpx/vp8/encoder/x86/copy_sse2.asm (revision fb1b10ab9aebc7c7068eedab379b749d7e3900be)
1*fb1b10abSAndroid Build Coastguard Worker;
2*fb1b10abSAndroid Build Coastguard Worker;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3*fb1b10abSAndroid Build Coastguard Worker;
4*fb1b10abSAndroid Build Coastguard Worker;  Use of this source code is governed by a BSD-style license
5*fb1b10abSAndroid Build Coastguard Worker;  that can be found in the LICENSE file in the root of the source
6*fb1b10abSAndroid Build Coastguard Worker;  tree. An additional intellectual property rights grant can be found
7*fb1b10abSAndroid Build Coastguard Worker;  in the file PATENTS.  All contributing project authors may
8*fb1b10abSAndroid Build Coastguard Worker;  be found in the AUTHORS file in the root of the source tree.
9*fb1b10abSAndroid Build Coastguard Worker;
10*fb1b10abSAndroid Build Coastguard Worker
11*fb1b10abSAndroid Build Coastguard Worker
12*fb1b10abSAndroid Build Coastguard Worker%include "vpx_ports/x86_abi_support.asm"
13*fb1b10abSAndroid Build Coastguard Worker
14*fb1b10abSAndroid Build Coastguard WorkerSECTION .text
15*fb1b10abSAndroid Build Coastguard Worker
16*fb1b10abSAndroid Build Coastguard Worker;void vp8_copy32xn_sse2(
17*fb1b10abSAndroid Build Coastguard Worker;    unsigned char *src_ptr,
18*fb1b10abSAndroid Build Coastguard Worker;    int  src_stride,
19*fb1b10abSAndroid Build Coastguard Worker;    unsigned char *dst_ptr,
20*fb1b10abSAndroid Build Coastguard Worker;    int  dst_stride,
21*fb1b10abSAndroid Build Coastguard Worker;    int height);
22*fb1b10abSAndroid Build Coastguard Workerglobalsym(vp8_copy32xn_sse2)
23*fb1b10abSAndroid Build Coastguard Workersym(vp8_copy32xn_sse2):
24*fb1b10abSAndroid Build Coastguard Worker    push        rbp
25*fb1b10abSAndroid Build Coastguard Worker    mov         rbp, rsp
26*fb1b10abSAndroid Build Coastguard Worker    SHADOW_ARGS_TO_STACK 5
27*fb1b10abSAndroid Build Coastguard Worker    SAVE_XMM 7
28*fb1b10abSAndroid Build Coastguard Worker    push        rsi
29*fb1b10abSAndroid Build Coastguard Worker    push        rdi
30*fb1b10abSAndroid Build Coastguard Worker    ; end prolog
31*fb1b10abSAndroid Build Coastguard Worker
32*fb1b10abSAndroid Build Coastguard Worker        mov             rsi,        arg(0) ;src_ptr
33*fb1b10abSAndroid Build Coastguard Worker        mov             rdi,        arg(2) ;dst_ptr
34*fb1b10abSAndroid Build Coastguard Worker
35*fb1b10abSAndroid Build Coastguard Worker        movsxd          rax,        dword ptr arg(1) ;src_stride
36*fb1b10abSAndroid Build Coastguard Worker        movsxd          rdx,        dword ptr arg(3) ;dst_stride
37*fb1b10abSAndroid Build Coastguard Worker        movsxd          rcx,        dword ptr arg(4) ;height
38*fb1b10abSAndroid Build Coastguard Worker
39*fb1b10abSAndroid Build Coastguard Worker.block_copy_sse2_loopx4:
40*fb1b10abSAndroid Build Coastguard Worker        movdqu          xmm0,       XMMWORD PTR [rsi]
41*fb1b10abSAndroid Build Coastguard Worker        movdqu          xmm1,       XMMWORD PTR [rsi + 16]
42*fb1b10abSAndroid Build Coastguard Worker        movdqu          xmm2,       XMMWORD PTR [rsi + rax]
43*fb1b10abSAndroid Build Coastguard Worker        movdqu          xmm3,       XMMWORD PTR [rsi + rax + 16]
44*fb1b10abSAndroid Build Coastguard Worker
45*fb1b10abSAndroid Build Coastguard Worker        lea             rsi,        [rsi+rax*2]
46*fb1b10abSAndroid Build Coastguard Worker
47*fb1b10abSAndroid Build Coastguard Worker        movdqu          xmm4,       XMMWORD PTR [rsi]
48*fb1b10abSAndroid Build Coastguard Worker        movdqu          xmm5,       XMMWORD PTR [rsi + 16]
49*fb1b10abSAndroid Build Coastguard Worker        movdqu          xmm6,       XMMWORD PTR [rsi + rax]
50*fb1b10abSAndroid Build Coastguard Worker        movdqu          xmm7,       XMMWORD PTR [rsi + rax + 16]
51*fb1b10abSAndroid Build Coastguard Worker
52*fb1b10abSAndroid Build Coastguard Worker        lea             rsi,    [rsi+rax*2]
53*fb1b10abSAndroid Build Coastguard Worker
54*fb1b10abSAndroid Build Coastguard Worker        movdqa          XMMWORD PTR [rdi], xmm0
55*fb1b10abSAndroid Build Coastguard Worker        movdqa          XMMWORD PTR [rdi + 16], xmm1
56*fb1b10abSAndroid Build Coastguard Worker        movdqa          XMMWORD PTR [rdi + rdx], xmm2
57*fb1b10abSAndroid Build Coastguard Worker        movdqa          XMMWORD PTR [rdi + rdx + 16], xmm3
58*fb1b10abSAndroid Build Coastguard Worker
59*fb1b10abSAndroid Build Coastguard Worker        lea             rdi,    [rdi+rdx*2]
60*fb1b10abSAndroid Build Coastguard Worker
61*fb1b10abSAndroid Build Coastguard Worker        movdqa          XMMWORD PTR [rdi], xmm4
62*fb1b10abSAndroid Build Coastguard Worker        movdqa          XMMWORD PTR [rdi + 16], xmm5
63*fb1b10abSAndroid Build Coastguard Worker        movdqa          XMMWORD PTR [rdi + rdx], xmm6
64*fb1b10abSAndroid Build Coastguard Worker        movdqa          XMMWORD PTR [rdi + rdx + 16], xmm7
65*fb1b10abSAndroid Build Coastguard Worker
66*fb1b10abSAndroid Build Coastguard Worker        lea             rdi,    [rdi+rdx*2]
67*fb1b10abSAndroid Build Coastguard Worker
68*fb1b10abSAndroid Build Coastguard Worker        sub             rcx,     4
69*fb1b10abSAndroid Build Coastguard Worker        cmp             rcx,     4
70*fb1b10abSAndroid Build Coastguard Worker        jge             .block_copy_sse2_loopx4
71*fb1b10abSAndroid Build Coastguard Worker
72*fb1b10abSAndroid Build Coastguard Worker        cmp             rcx, 0
73*fb1b10abSAndroid Build Coastguard Worker        je              .copy_is_done
74*fb1b10abSAndroid Build Coastguard Worker
75*fb1b10abSAndroid Build Coastguard Worker.block_copy_sse2_loop:
76*fb1b10abSAndroid Build Coastguard Worker        movdqu          xmm0,       XMMWORD PTR [rsi]
77*fb1b10abSAndroid Build Coastguard Worker        movdqu          xmm1,       XMMWORD PTR [rsi + 16]
78*fb1b10abSAndroid Build Coastguard Worker        lea             rsi,    [rsi+rax]
79*fb1b10abSAndroid Build Coastguard Worker
80*fb1b10abSAndroid Build Coastguard Worker        movdqa          XMMWORD PTR [rdi], xmm0
81*fb1b10abSAndroid Build Coastguard Worker        movdqa          XMMWORD PTR [rdi + 16], xmm1
82*fb1b10abSAndroid Build Coastguard Worker        lea             rdi,    [rdi+rdx]
83*fb1b10abSAndroid Build Coastguard Worker
84*fb1b10abSAndroid Build Coastguard Worker        sub             rcx,     1
85*fb1b10abSAndroid Build Coastguard Worker        jne             .block_copy_sse2_loop
86*fb1b10abSAndroid Build Coastguard Worker
87*fb1b10abSAndroid Build Coastguard Worker.copy_is_done:
88*fb1b10abSAndroid Build Coastguard Worker    ; begin epilog
89*fb1b10abSAndroid Build Coastguard Worker    pop rdi
90*fb1b10abSAndroid Build Coastguard Worker    pop rsi
91*fb1b10abSAndroid Build Coastguard Worker    RESTORE_XMM
92*fb1b10abSAndroid Build Coastguard Worker    UNSHADOW_ARGS
93*fb1b10abSAndroid Build Coastguard Worker    pop         rbp
94*fb1b10abSAndroid Build Coastguard Worker    ret
95