1.text 2.p2align 2 3.global ixheaacd_fft32x32_ld2_armv7 4 5ixheaacd_fft32x32_ld2_armv7: 6 7 STMFD sp!, {r4-r12, r14} 8 9 @DIT Radix-4 FFT First Stage 10 @First Butterfly 11 MOV r0, r2 12 MOV r1, r3 13 LDR r2, [r0] @x_0 = x[0 ] 14 LDR r3, [r0, #32] @x_2 = x[8 ] 15 LDR r4, [r0, #64] @x_4 = x[16] 16 LDR r5, [r0, #96] @x_6 = x[24] 17 ADD r6, r2, r4 @xh0_0 = x_0 + x_4 18 SUB r7, r2, r4 @xl0_0 = x_0 - x_4 19 ADD r8, r3, r5 @xh0_1 = x_2 + x_6 20 SUB r9, r3, r5 @xl0_1 = x_2 - x_6 21 22 LDR r2, [r0, #4] @x_1 = x[0 +1] 23 LDR r3, [r0, #36] @x_3 = x[8 +1] 24 LDR r4, [r0, #68] @x_5 = x[16+1] 25 LDR r5, [r0, #100] @x_7 = x[24+1] 26 ADD r10, r2, r4 @xh1_0 = x_1 + x_5 27 SUB r11, r2, r4 @xl1_0 = x_1 - x_5 28 ADD r12, r3, r5 @xh1_1 = x_3 + x_7 29 SUB r14, r3, r5 @xl1_1 = x_3 - x_7 30 31 ADD r2, r6, r8 @n00 = xh0_0 + xh0_1 32 ADD r3, r7, r14 @n10 = xl0_0 + xl1_1 33 SUB r4, r6, r8 @n20 = xh0_0 - xh0_1 34 SUB r5, r7, r14 @n30 = xl0_0 - xl1_1 35 STR r2, [r0] @x[0 ] = n00 36 STR r3, [r0, #32] @x[8 ] = n10 37 STR r4, [r0, #64] @x[16] = n20 38 STR r5, [r0, #96] @x[24] = n30 39 40 ADD r2, r10, r12 @n01 = xh1_0 + xh1_1 41 SUB r3, r11, r9 @n11 = xl1_0 - xl0_1 42 SUB r4, r10, r12 @n21 = xh1_0 - xh1_1 43 ADD r5, r11, r9 @n31 = xl1_0 + xl0_1 44 STR r2, [r0, #4] @x[1 ] = n01 45 STR r3, [r0, #36] @x[8+1 ] = n11 46 STR r4, [r0, #68] @x[16+1] = n21 47 STR r5, [r0, #100] @x[24+1] = n31 48 49 @Second Butterfly 50 LDR r2, [r0, #8] @x_0 = x[2 ] 51 LDR r3, [r0, #40] @x_2 = x[10] 52 LDR r4, [r0, #72] @x_4 = x[18] 53 LDR r5, [r0, #104] @x_6 = x[26] 54 ADD r6, r2, r4 @xh0_0 = x_0 + x_4 55 SUB r7, r2, r4 @xl0_0 = x_0 - x_4 56 ADD r8, r3, r5 @xh0_1 = x_2 + x_6 57 SUB r9, r3, r5 @xl0_1 = x_2 - x_6 58 59 LDR r2, [r0, #12] @x_1 = x[2 +1] 60 LDR r3, [r0, #44] @x_3 = x[10+1] 61 LDR r4, [r0, #76] @x_5 = x[18+1] 62 LDR r5, [r0, #108] @x_7 = x[26+1] 63 ADD r10, r2, r4 @xh1_0 = x_1 + x_5 64 SUB r11, r2, r4 @xl1_0 = x_1 - x_5 65 ADD r12, r3, r5 @xh1_1 = x_3 + x_7 66 SUB r14, r3, r5 @xl1_1 = x_3 - x_7 67 68 ADD r2, r6, r8 @n00 = xh0_0 + xh0_1 69 ADD r3, r7, r14 @n10 = xl0_0 + xl1_1 70 SUB r4, r6, r8 @n20 = xh0_0 - xh0_1 71 SUB r5, r7, r14 @n30 = xl0_0 - xl1_1 72 STR r2, [r0, #8] @x[2 ] = n00 73 STR r3, [r0, #40] @x[10] = n10 74 STR r4, [r0, #72] @x[18] = n20 75 STR r5, [r0, #104] @x[26] = n30 76 77 ADD r2, r10, r12 @n01 = xh1_0 + xh1_1 78 SUB r3, r11, r9 @n11 = xl1_0 - xl0_1 79 SUB r4, r10, r12 @n21 = xh1_0 - xh1_1 80 ADD r5, r11, r9 @n31 = xl1_0 + xl0_1 81 STR r2, [r0, #12] @x[2 +1] = n01 82 STR r3, [r0, #44] @x[10+1] = n11 83 STR r4, [r0, #76] @x[18+1] = n21 84 STR r5, [r0, #108] @x[26+1] = n31 85 86 @Third Butterfly 87 LDR r2, [r0, #16] @x_0 = x[4 ] 88 LDR r3, [r0, #48] @x_2 = x[12] 89 LDR r4, [r0, #80] @x_4 = x[20] 90 LDR r5, [r0, #112] @x_6 = x[28] 91 ADD r6, r2, r4 @xh0_0 = x_0 + x_4 92 SUB r7, r2, r4 @xl0_0 = x_0 - x_4 93 ADD r8, r3, r5 @xh0_1 = x_2 + x_6 94 SUB r9, r3, r5 @xl0_1 = x_2 - x_6 95 96 LDR r2, [r0, #20] @x_1 = x[4 +1] 97 LDR r3, [r0, #52] @x_3 = x[12+1] 98 LDR r4, [r0, #84] @x_5 = x[20+1] 99 LDR r5, [r0, #116] @x_7 = x[28+1] 100 ADD r10, r2, r4 @xh1_0 = x_1 + x_5 101 SUB r11, r2, r4 @xl1_0 = x_1 - x_5 102 ADD r12, r3, r5 @xh1_1 = x_3 + x_7 103 SUB r14, r3, r5 @xl1_1 = x_3 - x_7 104 105 ADD r2, r6, r8 @n00 = xh0_0 + xh0_1 106 ADD r3, r7, r14 @n10 = xl0_0 + xl1_1 107 SUB r4, r6, r8 @n20 = xh0_0 - xh0_1 108 SUB r5, r7, r14 @n30 = xl0_0 - xl1_1 109 STR r2, [r0, #16] @x[4 ] = n00 110 STR r3, [r0, #48] @x[12] = n10 111 STR r4, [r0, #80] @x[20] = n20 112 STR r5, [r0, #112] @x[28] = n30 113 114 ADD r2, r10, r12 @n01 = xh1_0 + xh1_1 115 SUB r3, r11, r9 @n11 = xl1_0 - xl0_1 116 SUB r4, r10, r12 @n21 = xh1_0 - xh1_1 117 ADD r5, r11, r9 @n31 = xl1_0 + xl0_1 118 STR r2, [r0, #20] @x[4 +1] = n01 119 STR r3, [r0, #52] @x[12+1] = n11 120 STR r4, [r0, #84] @x[20+1] = n21 121 STR r5, [r0, #116] @x[28+1] = n31 122 123 @Fourth Butterfly 124 LDR r2, [r0, #24] @x_0 = x[6 ] 125 LDR r3, [r0, #56] @x_2 = x[14] 126 LDR r4, [r0, #88] @x_4 = x[22] 127 LDR r5, [r0, #120] @x_6 = x[30] 128 ADD r6, r2, r4 @xh0_0 = x_0 + x_4 129 SUB r7, r2, r4 @xl0_0 = x_0 - x_4 130 ADD r8, r3, r5 @xh0_1 = x_2 + x_6 131 SUB r9, r3, r5 @xl0_1 = x_2 - x_6 132 133 LDR r2, [r0, #28] @x_1 = x[6 +1] 134 LDR r3, [r0, #60] @x_3 = x[14+1] 135 LDR r4, [r0, #92] @x_5 = x[22+1] 136 LDR r5, [r0, #124] @x_7 = x[30+1] 137 ADD r10, r2, r4 @xh1_0 = x_1 + x_5 138 SUB r11, r2, r4 @xl1_0 = x_1 - x_5 139 ADD r12, r3, r5 @xh1_1 = x_3 + x_7 140 SUB r14, r3, r5 @xl1_1 = x_3 - x_7 141 142 ADD r2, r6, r8 @n00 = xh0_0 + xh0_1 143 ADD r3, r7, r14 @n10 = xl0_0 + xl1_1 144 SUB r4, r6, r8 @n20 = xh0_0 - xh0_1 145 SUB r5, r7, r14 @n30 = xl0_0 - xl1_1 146 STR r2, [r0, #24] @x[6 ] = n00 147 STR r3, [r0, #56] @x[14] = n10 148 STR r4, [r0, #88] @x[22] = n20 149 STR r5, [r0, #120] @x[30] = n30 150 151 ADD r2, r10, r12 @n01 = xh1_0 + xh1_1 152 SUB r3, r11, r9 @n11 = xl1_0 - xl0_1 153 SUB r4, r10, r12 @n21 = xh1_0 - xh1_1 154 ADD r5, r11, r9 @n31 = xl1_0 + xl0_1 155 STR r2, [r0, #28] @x[6 +1] = n01 156 STR r3, [r0, #60] @x[14+1] = n11 157 STR r4, [r0, #92] @x[22+1] = n21 158 STR r5, [r0, #124] @x[30+1] = n31 159 160 161 @DIT Radix-4 FFT Second Stage 162 @First Butterfly 163 LDR r2, [r0] @inp_0qr = x[0] 164 LDR r3, [r0, #8] @inp_1qr = x[2] 165 LDR r4, [r0, #16] @inp_2qr = x[4] 166 LDR r5, [r0, #24] @inp_3qr = x[6] 167 ADD r6, r2, r4 @sum_0qr = mul_0qr + mul_2qr 168 SUB r7, r2, r4 @sum_1qr = mul_0qr - mul_2qr 169 ADD r8, r3, r5 @sum_2qr = mul_1qr + mul_3qr 170 SUB r9, r3, r5 @sum_3qr = mul_1qr - mul_3qr 171 172 LDR r2, [r0, #4] @inp_0qi = x[1] 173 LDR r3, [r0, #12] @inp_1qi = x[3] 174 LDR r4, [r0, #20] @inp_2qi = x[5] 175 LDR r5, [r0, #28] @inp_3qi = x[7] 176 ADD r10, r2, r4 @sum_0qi = mul_0qi + mul_2qi 177 SUB r11, r2, r4 @sum_1qi = mul_0qi - mul_2qi 178 ADD r12, r3, r5 @sum_2qi = mul_1qi + mul_3qi 179 SUB r14, r3, r5 @sum_3qi = mul_1qi - mul_3qi 180 181 ADD r2, r6, r8 @sum_0qr + sum_2qr 182 ADD r3, r7, r14 @sum_1qr + sum_3qi 183 SUB r4, r6, r8 @sum_0qr - sum_2qr 184 SUB r5, r7, r14 @sum_1qr - sum_3qi 185 STR r2, [r1] @y[0 ] = sum_0qr + sum_2qr 186 STR r3, [r1, #32] @y[8 ] = sum_1qr + sum_3qi 187 STR r4, [r1, #64] @y[16] = sum_0qr - sum_2qr 188 STR r5, [r1, #96] @y[24] = sum_1qr - sum_3qi 189 190 ADD r2, r10, r12 @sum_0qi + sum_2qi 191 SUB r3, r11, r9 @sum_1qi - sum_3qr 192 SUB r4, r10, r12 @sum_0qi - sum_2qi 193 ADD r5, r11, r9 @sum_1qi + sum_3qr 194 STR r2, [r1, #4] @y[0 +1] = sum_0qi + sum_2qi 195 STR r3, [r1, #36] @y[8 +1] = sum_1qi - sum_3qr 196 STR r4, [r1, #68] @y[16+1] = sum_0qi - sum_2qi 197 STR r5, [r1, #100] @y[24+1] = sum_1qi + sum_3qr 198 199 200 @Load twiddle factors 201 MOVW r11, 0X7642 202 MOVT r11, 0X89BE 203 MOVW r12, 0X30FC 204 MOVT r12, 0XCF04 205 MOVW r14, 0X5A83 206 MOVT r14, 0XA57D 207 208 @Second Butterfly 209 LDR r2, [r0, #32] @mul_0qr = inp_0qr = x[8] 210 LDR r3, [r0, #36] @mul_0qi = inp_1qr = x[9] 211 212 LDR r5, [r0, #40] @inp_1qr = x[10] 213 LDR r6, [r0, #44] @inp_1qi = x[11] 214 SMULWB r4, r5, r11 @mul_1qr = mpy_16_32_ns( 0x7642 , inp_1qr) 215 SMLAWB r4, r6, r12, r4 @mul_1qr -= mpy_16_32_ns(-0x30FC , inp_1qi) 216 SMULWT r5, r5, r12 @mul_1qi = mpy_16_32_ns(-0x30FC , inp_1qr) 217 218 LDR r7, [r0, #48] @inp_2qr = x[12] 219 LDR r8, [r0, #52] @inp_2qi = x[13] 220 221 @Moved for delay slot 222 SMLAWB r5, r6, r11, r5 @mul_1qi += mpy_16_32_ns( 0x7642 , inp_1qi) 223 224 ADD r6, r7, r8 @(inp_2qr + inp_2qi) 225 SMULWB r6, r6, r14 @mul_2qr = mpy_16_32_ns(0x5A83 , (inp_2qr + inp_2qi)) 226 SUB r7, r8, r7 @(-inp_2qr + inp_2qi) 227 SMULWB r7, r7, r14 @mul_2qi = mpy_16_32_ns(0x5A83 , (-inp_2qr + inp_2qi)) 228 229 LDR r9 , [r0, #56] @inp_3qr = x[14] 230 LDR r10, [r0, #60] @inp_3qi = x[15] 231 SMULWB r8, r9 , r12 @mul_3qr = mpy_16_32_ns( 0x30FC , inp_3qr) 232 SMLAWB r8, r10, r11, r8 @mul_3qr -= mpy_16_32_ns(-0x7642 , inp_3qi)@ 233 SMULWT r9, r9 , r11 @mul_3qi = mpy_16_32_ns(-0x7642 , inp_3qr) 234 SMLAWB r9, r10, r12, r9 @mul_3qi += mpy_16_32_ns( 0x30FC , inp_3qi) 235 236 ADD r10, r2, r6, lsl #1 @sum_0qr = mul_0qr + (mul_2qr << 1) 237 SUB r2 , r2, r6, lsl #1 @sum_1qr = mul_0qr - (mul_2qr << 1) 238 ADD r6 , r4, r8 @sum_2qr = mul_1qr + mul_3qr 239 SUB r4 , r4, r8 @sum_3qr = mul_1qr - mul_3qr 240 241 ADD r8 , r3, r7, lsl #1 @sum_0qi = mul_0qi + (mul_2qi << 1) 242 SUB r3 , r3, r7, lsl #1 @sum_1qi = mul_0qi - (mul_2qi << 1) 243 ADD r7 , r5, r9 @sum_2qi = mul_1qi + mul_3qi 244 SUB r5 , r5, r9 @sum_3qi = mul_1qi - mul_3qi 245 246 ADD r9 , r10, r6, lsl #1 @sum_0qr + (sum_2qr << 1) 247 SUB r10, r10, r6, lsl #1 @sum_0qr - (sum_2qr << 1) 248 ADD r6 , r2 , r5, lsl #1 @sum_1qr + (sum_3qi << 1) 249 SUB r2 , r2 , r5, lsl #1 @sum_1qr - (sum_3qi << 1) 250 STR r9 , [r1, #8] @y[2 ] = sum_0qr + (sum_2qr << 1) 251 STR r10, [r1, #72] @y[18] = sum_0qr - (sum_2qr << 1) 252 STR r6 , [r1, #40] @y[10] = sum_1qr + (sum_3qi << 1) 253 STR r2 , [r1, #104] @y[26] = sum_1qr - (sum_3qi << 1) 254 255 ADD r5 , r8 , r7, lsl #1 @sum_0qi + (sum_2qi << 1) 256 SUB r8 , r8 , r7, lsl #1 @sum_0qi - (sum_2qi << 1) 257 SUB r7 , r3 , r4, lsl #1 @sum_1qi - (sum_3qr << 1) 258 ADD r3 , r3 , r4, lsl #1 @sum_1qi + (sum_3qr << 1) 259 STR r5 , [r1, #12] @y[2 +1] = sum_0qi + (sum_2qi << 1) 260 STR r8 , [r1, #76] @y[18+1] = sum_0qi - (sum_2qi << 1) 261 STR r7 , [r1, #44] @y[10+1] = sum_1qi - (sum_3qr << 1) 262 STR r3 , [r1, #108] @y[26+1] = sum_1qi + (sum_3qr << 1) 263 264 @Third Butterfly 265 LDR r2, [r0, #64] @mul_0qr = inp_0qr = x[16] 266 267 LDR r5, [r0, #72] @inp_1qr = x[18] 268 LDR r6, [r0, #76] @inp_1qi = x[19] 269 270 @Moved for delay slot 271 LDR r3, [r0, #68] @mul_0qi = inp_1qr = x[17] 272 273 ADD r4, r5, r6 @(inp_1qr + inp_1qi) 274 SMULWB r4, r4, r14 @mul_1qr = mpy_16_32_ns(0x5A83 , (inp_1qr + inp_1qi)) 275 SUB r5, r6, r5 @(-inp_1qr + inp_1qi) 276 SMULWB r5, r5, r14 @mul_1qi = mpy_16_32_ns(0x5A83 , (-inp_1qr + inp_1qi)) 277 278 LDR r6, [r0, #84] @mul_2qr = inp_2qi = x[21] 279 280 LDR r9 , [r0, #88] @inp_3qr = x[22] 281 LDR r10, [r0, #92] @inp_3qi = x[23] 282 283 @Moved for delay slot 284 LDR r7, [r0, #80] @mul_2qi = inp_2qr = x[20] 285 286 SUB r8 , r10, r9 @(-inp_3qr + inp_3qi) 287 SMULWB r8 , r8 , r14 @mul_3qr = mpy_16_32_ns( 0x5A83 , (-inp_3qr + inp_3qi)) 288 ADD r9 , r9 , r10 @(inp_3qr + inp_3qi) 289 SMULWT r9 , r9 , r14 @mul_3qi = mpy_16_32_ns(-0x5A83 , (inp_3qr + inp_3qi)) 290 291 ADD r10, r2, r6 @sum_0qr = mul_0qr + mul_2qr 292 SUB r2 , r2, r6 @sum_1qr = mul_0qr - mul_2qr 293 ADD r6 , r4, r8 @sum_2qr = mul_1qr + mul_3qr 294 SUB r4 , r4, r8 @sum_3qr = mul_1qr - mul_3qr 295 296 SUB r8 , r3, r7 @sum_0qi = mul_0qi - mul_2qi 297 ADD r3 , r3, r7 @sum_1qi = mul_0qi + mul_2qi 298 ADD r7 , r5, r9 @sum_2qi = mul_1qi + mul_3qi 299 SUB r5 , r5, r9 @sum_3qi = mul_1qi - mul_3qi 300 301 ADD r9 , r10, r6, lsl #1 @sum_0qr + (sum_2qr << 1) 302 SUB r10, r10, r6, lsl #1 @sum_0qr - (sum_2qr << 1) 303 ADD r6 , r2 , r5, lsl #1 @sum_1qr + (sum_3qi << 1) 304 SUB r2 , r2 , r5, lsl #1 @sum_1qr - (sum_3qi << 1) 305 STR r9 , [r1, #16] @y[4 ] = sum_0qr + (sum_2qr << 1) 306 STR r10, [r1, #80] @y[20] = sum_0qr - (sum_2qr << 1) 307 STR r6 , [r1, #48] @y[12] = sum_1qr + (sum_3qi << 1) 308 STR r2 , [r1, #112] @y[28] = sum_1qr - (sum_3qi << 1) 309 310 ADD r5, r8, r7, lsl #1 @sum_0qi + (sum_2qi << 1) 311 SUB r8, r8, r7, lsl #1 @sum_0qi - (sum_2qi << 1) 312 SUB r7, r3, r4, lsl #1 @sum_1qi - (sum_3qr << 1) 313 ADD r3, r3, r4, lsl #1 @sum_1qi + (sum_3qr << 1) 314 STR r5 , [r1, #20] @y[4 +1] = sum_0qi + (sum_2qi << 1) 315 STR r8 , [r1, #84] @y[20+1] = sum_0qi - (sum_2qi << 1) 316 STR r7 , [r1, #52] @y[12+1] = sum_1qi - (sum_3qr << 1) 317 STR r3 , [r1, #116] @y[28+1] = sum_1qi + (sum_3qr << 1) 318 319 @Fourth Butterfly 320 LDR r2, [r0, #96] @mul_0qr = inp_0qr = x[24] 321 LDR r3, [r0, #100] @mul_0qi = inp_1qr = x[25] 322 323 LDR r5, [r0, #104] @inp_1qr = x[26] 324 LDR r6, [r0, #108] @inp_1qi = x[27] 325 SMULWB r4, r5, r12 @mul_1qr = mpy_16_32_ns( 0x30FC , inp_1qr) 326 SMLAWB r4, r6, r11, r4 @mul_1qr -= mpy_16_32_ns(-0x7642 , inp_1qi) 327 SMULWT r5, r5, r11 @mul_1qi = mpy_16_32_ns(-0x7642 , inp_1qr) 328 329 LDR r7, [r0, #112] @inp_2qr = x[28] 330 LDR r8, [r0, #116] @inp_2qi = x[29] 331 332 @Moved for delay slot 333 SMLAWB r5, r6, r12, r5 @mul_1qi += mpy_16_32_ns( 0x30FC , inp_1qi) 334 335 SUB r6, r8, r7 @(-inp_2qr + inp_2qi) 336 SMULWB r6, r6, r14 @mul_2qr = mpy_16_32_ns( 0x5A83 , (-inp_2qr + inp_2qi)) 337 ADD r7, r8, r7 @(inp_2qr + inp_2qi) 338 SMULWT r7, r7, r14 @mul_2qi = mpy_16_32_ns(-0x5A83 , (inp_2qr + inp_2qi)) 339 340 LDR r9 , [r0, #120] @inp_3qr = x[30] 341 LDR r10, [r0, #124] @inp_3qi = x[31] 342 SMULWT r8, r9 , r11 @mul_3qr = mpy_16_32_ns(-0x7642 , inp_3qr) 343 SMLAWT r8, r10, r12, r8 @mul_3qr -= mpy_16_32_ns( 0x30FC , inp_3qi)@ 344 SMULWB r9, r9 , r12 @mul_3qi = mpy_16_32_ns( 0x30FC , inp_3qr) 345 SMLAWT r9, r10, r11, r9 @mul_3qi += mpy_16_32_ns(-0x7642 , inp_3qi) 346 347 ADD r10, r2, r6, lsl #1 @sum_0qr = mul_0qr + (mul_2qr << 1) 348 SUB r2 , r2, r6, lsl #1 @sum_1qr = mul_0qr - (mul_2qr << 1) 349 ADD r6 , r4, r8 @sum_2qr = mul_1qr + mul_3qr 350 SUB r4 , r4, r8 @sum_3qr = mul_1qr - mul_3qr 351 352 ADD r8 , r3, r7, lsl #1 @sum_0qi = mul_0qi + (mul_2qi << 1) 353 SUB r3 , r3, r7, lsl #1 @sum_1qi = mul_0qi - (mul_2qi << 1) 354 ADD r7 , r5, r9 @sum_2qi = mul_1qi + mul_3qi 355 SUB r5 , r5, r9 @sum_3qi = mul_1qi - mul_3qi 356 357 ADD r9 , r10, r6, lsl #1 @sum_0qr + (sum_2qr << 1) 358 SUB r10, r10, r6, lsl #1 @sum_0qr - (sum_2qr << 1) 359 ADD r6 , r2 , r5, lsl #1 @sum_1qr + (sum_3qi << 1) 360 SUB r2 , r2 , r5, lsl #1 @sum_1qr - (sum_3qi << 1) 361 STR r9 , [r1, #24] @y[6 ] = sum_0qr + (sum_2qr << 1) 362 STR r10, [r1, #88] @y[22] = sum_0qr - (sum_2qr << 1) 363 STR r6 , [r1, #56] @y[14] = sum_1qr + (sum_3qi << 1) 364 STR r2 , [r1, #120] @y[30] = sum_1qr - (sum_3qi << 1) 365 366 ADD r5 , r8 , r7, lsl #1 @sum_0qi + (sum_2qi << 1) 367 SUB r8 , r8 , r7, lsl #1 @sum_0qi - (sum_2qi << 1) 368 SUB r7 , r3 , r4, lsl #1 @sum_1qi - (sum_3qr << 1) 369 ADD r3 , r3 , r4, lsl #1 @sum_1qi + (sum_3qr << 1) 370 STR r5 , [r1, #28] @y[6 +1] = sum_0qi + (sum_2qi << 1) 371 STR r8 , [r1, #92] @y[22+1] = sum_0qi - (sum_2qi << 1) 372 STR r7 , [r1, #60] @y[14+1] = sum_1qi - (sum_3qr << 1) 373 STR r3 , [r1, #124] @y[30+1] = sum_1qi + (sum_3qr << 1) 374 375 LDMFD sp!, {r4-r12, r15} 376 377