xref: /aosp_15_r20/external/libaom/aom_dsp/arm/aom_neon_sve_bridge.h (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1 /*
2  * Copyright (c) 2023, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #ifndef AOM_AOM_DSP_ARM_AOM_NEON_SVE_BRIDGE_H_
13 #define AOM_AOM_DSP_ARM_AOM_NEON_SVE_BRIDGE_H_
14 
15 #include <arm_neon_sve_bridge.h>
16 
17 #include "config/aom_dsp_rtcd.h"
18 #include "config/aom_config.h"
19 
20 // We can access instructions exclusive to the SVE instruction set from a
21 // predominantly Neon context by making use of the Neon-SVE bridge intrinsics
22 // to reinterpret Neon vectors as SVE vectors - with the high part of the SVE
23 // vector (if it's longer than 128 bits) being "don't care".
24 
25 // While sub-optimal on machines that have SVE vector length > 128-bit - as the
26 // remainder of the vector is unused - this approach is still beneficial when
27 // compared to a Neon-only solution.
28 
aom_udotq_u16(uint64x2_t acc,uint16x8_t x,uint16x8_t y)29 static inline uint64x2_t aom_udotq_u16(uint64x2_t acc, uint16x8_t x,
30                                        uint16x8_t y) {
31   return svget_neonq_u64(svdot_u64(svset_neonq_u64(svundef_u64(), acc),
32                                    svset_neonq_u16(svundef_u16(), x),
33                                    svset_neonq_u16(svundef_u16(), y)));
34 }
35 
aom_sdotq_s16(int64x2_t acc,int16x8_t x,int16x8_t y)36 static inline int64x2_t aom_sdotq_s16(int64x2_t acc, int16x8_t x, int16x8_t y) {
37   return svget_neonq_s64(svdot_s64(svset_neonq_s64(svundef_s64(), acc),
38                                    svset_neonq_s16(svundef_s16(), x),
39                                    svset_neonq_s16(svundef_s16(), y)));
40 }
41 
42 #define aom_svdot_lane_s16(sum, s0, f, lane)                          \
43   svget_neonq_s64(svdot_lane_s64(svset_neonq_s64(svundef_s64(), sum), \
44                                  svset_neonq_s16(svundef_s16(), s0),  \
45                                  svset_neonq_s16(svundef_s16(), f), lane))
46 
aom_tbl_u16(uint16x8_t s,uint16x8_t tbl)47 static inline uint16x8_t aom_tbl_u16(uint16x8_t s, uint16x8_t tbl) {
48   return svget_neonq_u16(svtbl_u16(svset_neonq_u16(svundef_u16(), s),
49                                    svset_neonq_u16(svundef_u16(), tbl)));
50 }
51 
aom_tbl_s16(int16x8_t s,uint16x8_t tbl)52 static inline int16x8_t aom_tbl_s16(int16x8_t s, uint16x8_t tbl) {
53   return svget_neonq_s16(svtbl_s16(svset_neonq_s16(svundef_s16(), s),
54                                    svset_neonq_u16(svundef_u16(), tbl)));
55 }
56 
57 #endif  // AOM_AOM_DSP_ARM_AOM_NEON_SVE_BRIDGE_H_
58