xref: /aosp_15_r20/external/libxaac/decoder/armv7/ixheaacd_esbr_cos_sin_mod_loop2.s (revision 15dc779a375ca8b5125643b829a8aa4b70d7f451)
1@/******************************************************************************
2@ *
3@ * Copyright (C) 2018 The Android Open Source Project
4@ *
5@ * Licensed under the Apache License, Version 2.0 (the "License");
6@ * you may not use this file except in compliance with the License.
7@ * You may obtain a copy of the License at:
8@ *
9@ * http:@www.apache.org/licenses/LICENSE-2.0
10@ *
11@ * Unless required by applicable law or agreed to in writing, software
12@ * distributed under the License is distributed on an "AS IS" BASIS,
13@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14@ * See the License for the specific language governing permissions and
15@ * limitations under the License.
16@ *
17@ *****************************************************************************
18@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19@*/
20
21
22.text
23.p2align 2
24
25    .global ixheaacd_esbr_cos_sin_mod_loop2
26    .type ixheaacd_esbr_cos_sin_mod_loop2, %function
27ixheaacd_esbr_cos_sin_mod_loop2:
28
29    STMFD           sp!, {r4-r12, r14}
30    VPUSH           {D8-D15}
31    @generating load addresses
32    ADD             R3, R0, R2, LSL #3  @psubband1 = &subband[2 * M - 1];
33    SUB             R3, R3, #4
34    ADD             R10, R0, #256
35    ADD             R11, R10, R2, LSL #3
36    SUB             R11, R11, #4
37    MOV             R8, #-4
38    LDR             R6, [R0]
39    MOV             R4, R2, ASR #1      @M_2 = ixheaac_shr32(M, 1);
40    SUB             R4, R4, #1
41
42    ASR             R6, R6, #1          @*psubband = *psubband >> 1;
43    VLD1.32         {D2[0]}, [R3]
44
45    STR             R6, [R0], #4        @psubband++;
46    LDR             R7, [R0]
47    ASR             R7, R7, #1
48    RSB             R6, R7, #0
49    STR             R6, [R3], #-4
50    VLD1.32         {D3[0]}, [R3]       @  im = *psubband1;
51
52    VLD2.32         {D0[0], D1[0]}, [R1]!
53    VDUP.32         D0, D0[0]
54    VDUP.32         D1, D1[0]
55
56    VLD1.32         {D2[1]}, [R11]      @re = *psubband12;
57
58    LDR             R6, [R10]
59    ASR             R7, R6, #1
60    MOV             R9, #0
61    QSUB            R7, R9, R7
62
63    STR             R7, [R11], #-4
64
65    LDR             R6, [R10, #4]
66    ASR             R6, R6, #1
67    STR             R6, [R10], #4
68
69    VLD1.32         {D3[1]}, [R11]
70
71    VMULL.S32       q2, d0, d2          @qsub 2nd
72    VMULL.S32       q3, d0, d3          @add 2nd
73    VMULL.S32       q4, d1, d2          @add 1st
74    VMULL.S32       q5, d1, d3          @qsub 1st
75
76    vadd.I64        q6, q4, q3
77    VQSUB.S64       Q7, Q5, Q2
78    VQSUB.S64       Q8, Q2, Q5
79
80    VSHRN.I64       D12, Q6, #32
81    VSHRN.I64       D14, Q7, #32
82    VSHRN.I64       D16, Q8, #32
83
84    VST1.32         {D12[0]}, [R3], R8
85
86    VST1.32         {D14[0]}, [R0]!
87
88    VQNEG.S32       D12, D12
89
90
91    VST1.32         {D12[1]}, [R10]!
92
93    VST1.32         {D16[1]}, [R11], R8
94
95LOOP1:
96    VLD1.32         {D2}, [R0]
97    VLD1.32         {D3}, [R10]
98    LDR             R5, [R3]            @RE2
99    LDR             R6, [R11]           @RE3
100    VTRN.32         D2, D3
101
102    VMULL.S32       q2, d0, d2          @qsub 2nd
103    VMULL.S32       q3, d0, d3          @add 2nd
104    VMULL.S32       q4, d1, d2          @add 1st
105    VMULL.S32       q5, d1, d3          @qsub 1st
106
107    vadd.I64        q6, q4, q3
108    VQSUB.S64       Q7, Q2, Q5
109    VQSUB.S64       Q8, Q5, Q2
110
111    VSHRN.I64       D12, Q6, #32
112    VSHRN.I64       D14, Q7, #32
113    VSHRN.I64       D16, Q8, #32
114
115    VST1.32         {D12[0]}, [R0]!
116    VST1.32         {D14[0]}, [R3], R8
117    VQNEG.S32       D12, D12
118
119    VST1.32         {D12[1]}, [R11], R8
120    VST1.32         {D16[1]}, [R10]!
121
122    @ second part
123    VLD2.32         {D0[0], D1[0]}, [R1]!
124    VDUP.32         D0, D0[0]
125    VDUP.32         D1, D1[0]
126
127    VMOV            D3, R5, R6
128    VLD1.32         {D2[0]}, [R3]
129    VLD1.32         {D2[1]}, [R11]
130
131    VMULL.S32       q2, d0, d2          @qsub 2nd
132    VMULL.S32       q3, d0, d3          @add 2nd
133    VMULL.S32       q4, d1, d2          @add 1st
134    VMULL.S32       q5, d1, d3          @qsub 1st
135
136    vadd.I64        q6, q2, q5
137    VQSUB.S64       Q7, Q4, Q3
138    VQSUB.S64       Q8, Q3, Q4
139
140    VSHRN.I64       D12, Q6, #32
141    VSHRN.I64       D14, Q7, #32
142    VSHRN.I64       D16, Q8, #32
143
144    VST1.32         {D12[0]}, [R3], R8
145    VST1.32         {D14[0]}, [R0]!
146
147    VQNEG.S32       D12, D12
148
149    subs            r4, r4, #1
150    VST1.32         {D12[1]}, [R10]!
151    VST1.32         {D16[1]}, [R11], R8
152
153    BGT             LOOP1
154    VPOP            {D8-D15}
155    LDMFD           sp!, {r4-r12, r15}
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182