1@/****************************************************************************** 2@ * 3@ * Copyright (C) 2018 The Android Open Source Project 4@ * 5@ * Licensed under the Apache License, Version 2.0 (the "License"); 6@ * you may not use this file except in compliance with the License. 7@ * You may obtain a copy of the License at: 8@ * 9@ * http:@www.apache.org/licenses/LICENSE-2.0 10@ * 11@ * Unless required by applicable law or agreed to in writing, software 12@ * distributed under the License is distributed on an "AS IS" BASIS, 13@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14@ * See the License for the specific language governing permissions and 15@ * limitations under the License. 16@ * 17@ ***************************************************************************** 18@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19@*/ 20 21 22.text 23.p2align 2 24 25 .global ixheaacd_esbr_cos_sin_mod_loop2 26 .type ixheaacd_esbr_cos_sin_mod_loop2, %function 27ixheaacd_esbr_cos_sin_mod_loop2: 28 29 STMFD sp!, {r4-r12, r14} 30 VPUSH {D8-D15} 31 @generating load addresses 32 ADD R3, R0, R2, LSL #3 @psubband1 = &subband[2 * M - 1]; 33 SUB R3, R3, #4 34 ADD R10, R0, #256 35 ADD R11, R10, R2, LSL #3 36 SUB R11, R11, #4 37 MOV R8, #-4 38 LDR R6, [R0] 39 MOV R4, R2, ASR #1 @M_2 = ixheaac_shr32(M, 1); 40 SUB R4, R4, #1 41 42 ASR R6, R6, #1 @*psubband = *psubband >> 1; 43 VLD1.32 {D2[0]}, [R3] 44 45 STR R6, [R0], #4 @psubband++; 46 LDR R7, [R0] 47 ASR R7, R7, #1 48 RSB R6, R7, #0 49 STR R6, [R3], #-4 50 VLD1.32 {D3[0]}, [R3] @ im = *psubband1; 51 52 VLD2.32 {D0[0], D1[0]}, [R1]! 53 VDUP.32 D0, D0[0] 54 VDUP.32 D1, D1[0] 55 56 VLD1.32 {D2[1]}, [R11] @re = *psubband12; 57 58 LDR R6, [R10] 59 ASR R7, R6, #1 60 MOV R9, #0 61 QSUB R7, R9, R7 62 63 STR R7, [R11], #-4 64 65 LDR R6, [R10, #4] 66 ASR R6, R6, #1 67 STR R6, [R10], #4 68 69 VLD1.32 {D3[1]}, [R11] 70 71 VMULL.S32 q2, d0, d2 @qsub 2nd 72 VMULL.S32 q3, d0, d3 @add 2nd 73 VMULL.S32 q4, d1, d2 @add 1st 74 VMULL.S32 q5, d1, d3 @qsub 1st 75 76 vadd.I64 q6, q4, q3 77 VQSUB.S64 Q7, Q5, Q2 78 VQSUB.S64 Q8, Q2, Q5 79 80 VSHRN.I64 D12, Q6, #32 81 VSHRN.I64 D14, Q7, #32 82 VSHRN.I64 D16, Q8, #32 83 84 VST1.32 {D12[0]}, [R3], R8 85 86 VST1.32 {D14[0]}, [R0]! 87 88 VQNEG.S32 D12, D12 89 90 91 VST1.32 {D12[1]}, [R10]! 92 93 VST1.32 {D16[1]}, [R11], R8 94 95LOOP1: 96 VLD1.32 {D2}, [R0] 97 VLD1.32 {D3}, [R10] 98 LDR R5, [R3] @RE2 99 LDR R6, [R11] @RE3 100 VTRN.32 D2, D3 101 102 VMULL.S32 q2, d0, d2 @qsub 2nd 103 VMULL.S32 q3, d0, d3 @add 2nd 104 VMULL.S32 q4, d1, d2 @add 1st 105 VMULL.S32 q5, d1, d3 @qsub 1st 106 107 vadd.I64 q6, q4, q3 108 VQSUB.S64 Q7, Q2, Q5 109 VQSUB.S64 Q8, Q5, Q2 110 111 VSHRN.I64 D12, Q6, #32 112 VSHRN.I64 D14, Q7, #32 113 VSHRN.I64 D16, Q8, #32 114 115 VST1.32 {D12[0]}, [R0]! 116 VST1.32 {D14[0]}, [R3], R8 117 VQNEG.S32 D12, D12 118 119 VST1.32 {D12[1]}, [R11], R8 120 VST1.32 {D16[1]}, [R10]! 121 122 @ second part 123 VLD2.32 {D0[0], D1[0]}, [R1]! 124 VDUP.32 D0, D0[0] 125 VDUP.32 D1, D1[0] 126 127 VMOV D3, R5, R6 128 VLD1.32 {D2[0]}, [R3] 129 VLD1.32 {D2[1]}, [R11] 130 131 VMULL.S32 q2, d0, d2 @qsub 2nd 132 VMULL.S32 q3, d0, d3 @add 2nd 133 VMULL.S32 q4, d1, d2 @add 1st 134 VMULL.S32 q5, d1, d3 @qsub 1st 135 136 vadd.I64 q6, q2, q5 137 VQSUB.S64 Q7, Q4, Q3 138 VQSUB.S64 Q8, Q3, Q4 139 140 VSHRN.I64 D12, Q6, #32 141 VSHRN.I64 D14, Q7, #32 142 VSHRN.I64 D16, Q8, #32 143 144 VST1.32 {D12[0]}, [R3], R8 145 VST1.32 {D14[0]}, [R0]! 146 147 VQNEG.S32 D12, D12 148 149 subs r4, r4, #1 150 VST1.32 {D12[1]}, [R10]! 151 VST1.32 {D16[1]}, [R11], R8 152 153 BGT LOOP1 154 VPOP {D8-D15} 155 LDMFD sp!, {r4-r12, r15} 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182