xref: /aosp_15_r20/external/libaom/av1/encoder/x86/dct_sse2.asm (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1*77c1e3ccSAndroid Build Coastguard Worker;
2*77c1e3ccSAndroid Build Coastguard Worker; Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3*77c1e3ccSAndroid Build Coastguard Worker;
4*77c1e3ccSAndroid Build Coastguard Worker; This source code is subject to the terms of the BSD 2 Clause License and
5*77c1e3ccSAndroid Build Coastguard Worker; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6*77c1e3ccSAndroid Build Coastguard Worker; was not distributed with this source code in the LICENSE file, you can
7*77c1e3ccSAndroid Build Coastguard Worker; obtain it at www.aomedia.org/license/software. If the Alliance for Open
8*77c1e3ccSAndroid Build Coastguard Worker; Media Patent License 1.0 was not distributed with this source code in the
9*77c1e3ccSAndroid Build Coastguard Worker; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10*77c1e3ccSAndroid Build Coastguard Worker;
11*77c1e3ccSAndroid Build Coastguard Worker
12*77c1e3ccSAndroid Build Coastguard Worker%define private_prefix av1
13*77c1e3ccSAndroid Build Coastguard Worker
14*77c1e3ccSAndroid Build Coastguard Worker%include "third_party/x86inc/x86inc.asm"
15*77c1e3ccSAndroid Build Coastguard Worker
16*77c1e3ccSAndroid Build Coastguard WorkerSECTION .text
17*77c1e3ccSAndroid Build Coastguard Worker
18*77c1e3ccSAndroid Build Coastguard Worker%macro TRANSFORM_COLS 0
19*77c1e3ccSAndroid Build Coastguard Worker  paddw           m0,        m1
20*77c1e3ccSAndroid Build Coastguard Worker  movq            m4,        m0
21*77c1e3ccSAndroid Build Coastguard Worker  psubw           m3,        m2
22*77c1e3ccSAndroid Build Coastguard Worker  psubw           m4,        m3
23*77c1e3ccSAndroid Build Coastguard Worker  psraw           m4,        1
24*77c1e3ccSAndroid Build Coastguard Worker  movq            m5,        m4
25*77c1e3ccSAndroid Build Coastguard Worker  psubw           m5,        m1 ;b1
26*77c1e3ccSAndroid Build Coastguard Worker  psubw           m4,        m2 ;c1
27*77c1e3ccSAndroid Build Coastguard Worker  psubw           m0,        m4
28*77c1e3ccSAndroid Build Coastguard Worker  paddw           m3,        m5
29*77c1e3ccSAndroid Build Coastguard Worker                                ; m0 a0
30*77c1e3ccSAndroid Build Coastguard Worker  SWAP            1,         4  ; m1 c1
31*77c1e3ccSAndroid Build Coastguard Worker  SWAP            2,         3  ; m2 d1
32*77c1e3ccSAndroid Build Coastguard Worker  SWAP            3,         5  ; m3 b1
33*77c1e3ccSAndroid Build Coastguard Worker%endmacro
34*77c1e3ccSAndroid Build Coastguard Worker
35*77c1e3ccSAndroid Build Coastguard Worker%macro TRANSPOSE_4X4 0
36*77c1e3ccSAndroid Build Coastguard Worker                                ; 00 01 02 03
37*77c1e3ccSAndroid Build Coastguard Worker                                ; 10 11 12 13
38*77c1e3ccSAndroid Build Coastguard Worker                                ; 20 21 22 23
39*77c1e3ccSAndroid Build Coastguard Worker                                ; 30 31 32 33
40*77c1e3ccSAndroid Build Coastguard Worker  punpcklwd       m0,        m1 ; 00 10 01 11  02 12 03 13
41*77c1e3ccSAndroid Build Coastguard Worker  punpcklwd       m2,        m3 ; 20 30 21 31  22 32 23 33
42*77c1e3ccSAndroid Build Coastguard Worker  mova            m1,        m0
43*77c1e3ccSAndroid Build Coastguard Worker  punpckldq       m0,        m2 ; 00 10 20 30  01 11 21 31
44*77c1e3ccSAndroid Build Coastguard Worker  punpckhdq       m1,        m2 ; 02 12 22 32  03 13 23 33
45*77c1e3ccSAndroid Build Coastguard Worker%endmacro
46*77c1e3ccSAndroid Build Coastguard Worker
47*77c1e3ccSAndroid Build Coastguard WorkerINIT_XMM sse2
48*77c1e3ccSAndroid Build Coastguard Workercglobal fwht4x4, 3, 4, 8, input, output, stride
49*77c1e3ccSAndroid Build Coastguard Worker  lea             r3q,       [inputq + strideq*4]
50*77c1e3ccSAndroid Build Coastguard Worker  movq            m0,        [inputq] ;a1
51*77c1e3ccSAndroid Build Coastguard Worker  movq            m1,        [inputq + strideq*2] ;b1
52*77c1e3ccSAndroid Build Coastguard Worker  movq            m2,        [r3q] ;c1
53*77c1e3ccSAndroid Build Coastguard Worker  movq            m3,        [r3q + strideq*2] ;d1
54*77c1e3ccSAndroid Build Coastguard Worker
55*77c1e3ccSAndroid Build Coastguard Worker  TRANSFORM_COLS
56*77c1e3ccSAndroid Build Coastguard Worker  TRANSPOSE_4X4
57*77c1e3ccSAndroid Build Coastguard Worker  SWAP            1,         2
58*77c1e3ccSAndroid Build Coastguard Worker  psrldq          m1,        m0, 8
59*77c1e3ccSAndroid Build Coastguard Worker  psrldq          m3,        m2, 8
60*77c1e3ccSAndroid Build Coastguard Worker  TRANSFORM_COLS
61*77c1e3ccSAndroid Build Coastguard Worker  TRANSPOSE_4X4
62*77c1e3ccSAndroid Build Coastguard Worker
63*77c1e3ccSAndroid Build Coastguard Worker  psllw           m0,        2
64*77c1e3ccSAndroid Build Coastguard Worker  psllw           m1,        2
65*77c1e3ccSAndroid Build Coastguard Worker
66*77c1e3ccSAndroid Build Coastguard Worker  ; sign extension
67*77c1e3ccSAndroid Build Coastguard Worker  mova            m2,             m0
68*77c1e3ccSAndroid Build Coastguard Worker  mova            m3,             m1
69*77c1e3ccSAndroid Build Coastguard Worker  punpcklwd       m0,             m0
70*77c1e3ccSAndroid Build Coastguard Worker  punpcklwd       m1,             m1
71*77c1e3ccSAndroid Build Coastguard Worker  punpckhwd       m2,             m2
72*77c1e3ccSAndroid Build Coastguard Worker  punpckhwd       m3,             m3
73*77c1e3ccSAndroid Build Coastguard Worker  psrad           m0,             16
74*77c1e3ccSAndroid Build Coastguard Worker  psrad           m1,             16
75*77c1e3ccSAndroid Build Coastguard Worker  psrad           m2,             16
76*77c1e3ccSAndroid Build Coastguard Worker  psrad           m3,             16
77*77c1e3ccSAndroid Build Coastguard Worker  mova            [outputq],      m0
78*77c1e3ccSAndroid Build Coastguard Worker  mova            [outputq + 16], m2
79*77c1e3ccSAndroid Build Coastguard Worker  mova            [outputq + 32], m1
80*77c1e3ccSAndroid Build Coastguard Worker  mova            [outputq + 48], m3
81*77c1e3ccSAndroid Build Coastguard Worker
82*77c1e3ccSAndroid Build Coastguard Worker  RET
83