xref: /aosp_15_r20/external/libvpx/config/arm-neon/vpx_dsp/arm/idct4x4_1_add_neon.asm.S (revision fb1b10ab9aebc7c7068eedab379b749d7e3900be)
1@ This file was created from a .asm file
2@  using the ads2gas.pl script.
3.syntax unified
4@
5@  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
6@
7@  Use of this source code is governed by a BSD-style license and patent
8@  grant that can be found in the LICENSE file in the root of the source
9@  tree. All contributing project authors may be found in the AUTHORS
10@  file in the root of the source tree.
11@
12
13
14    .global vpx_idct4x4_1_add_neon
15    .type vpx_idct4x4_1_add_neon, function
16    .arm
17    .eabi_attribute 24, 1 @Tag_ABI_align_needed
18    .eabi_attribute 25, 1 @Tag_ABI_align_preserved
19
20    .text
21    .p2align 2
22
23@void vpx_idct4x4_1_add_neon(int16_t *input, uint8_t *dest, int stride)
24@
25@ r0  int16_t input
26@ r1  uint8_t *dest
27@ r2  int stride)
28
29vpx_idct4x4_1_add_neon: @ PROC
30    ldrsh            r0, [r0]
31
32    @ cospi_16_64 = 11585
33    movw             r12, #0x2d41
34
35    @ out = dct_const_round_shift(input[0] * cospi_16_64)
36    mul              r0, r0, r12               @ input[0] * cospi_16_64
37    add              r0, r0, #0x2000           @ +(1 << ((DCT_CONST_BITS) - 1))
38    asr              r0, r0, #14               @ >> DCT_CONST_BITS
39
40    @ out = dct_const_round_shift(out * cospi_16_64)
41    mul              r0, r0, r12               @ out * cospi_16_64
42    mov              r12, r1                   @ save dest
43    add              r0, r0, #0x2000           @ +(1 << ((DCT_CONST_BITS) - 1))
44    asr              r0, r0, #14               @ >> DCT_CONST_BITS
45
46    @ a1 = ROUND_POWER_OF_TWO(out, 4)
47    add              r0, r0, #8                @ + (1 <<((4) - 1))
48    asr              r0, r0, #4                @ >> 4
49
50    vdup.s16         q0, r0                    @ duplicate a1
51
52    vld1.32          {d2[0]}, [r1], r2
53    vld1.32          {d2[1]}, [r1], r2
54    vld1.32          {d4[0]}, [r1], r2
55    vld1.32          {d4[1]}, [r1]
56
57    vaddw.u8         q8, q0, d2                @ dest[x] + a1
58    vaddw.u8         q9, q0, d4
59
60    vqmovun.s16      d6, q8                    @ clip_pixel
61    vqmovun.s16      d7, q9
62
63    vst1.32          {d6[0]}, [r12], r2
64    vst1.32          {d6[1]}, [r12], r2
65    vst1.32          {d7[0]}, [r12], r2
66    vst1.32          {d7[1]}, [r12]
67
68    bx               lr
69.size vpx_idct4x4_1_add_neon, .-vpx_idct4x4_1_add_neon    @ ENDP             @ |vpx_idct4x4_1_add_neon|
70
71    .section .note.GNU-stack,"",%progbits
72