1 /*
2 * Copyright (c) 2023, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #ifndef AOM_AOM_DSP_ARM_AOM_NEON_SVE_BRIDGE_H_
13 #define AOM_AOM_DSP_ARM_AOM_NEON_SVE_BRIDGE_H_
14
15 #include <arm_neon_sve_bridge.h>
16
17 #include "config/aom_dsp_rtcd.h"
18 #include "config/aom_config.h"
19
20 // We can access instructions exclusive to the SVE instruction set from a
21 // predominantly Neon context by making use of the Neon-SVE bridge intrinsics
22 // to reinterpret Neon vectors as SVE vectors - with the high part of the SVE
23 // vector (if it's longer than 128 bits) being "don't care".
24
25 // While sub-optimal on machines that have SVE vector length > 128-bit - as the
26 // remainder of the vector is unused - this approach is still beneficial when
27 // compared to a Neon-only solution.
28
aom_udotq_u16(uint64x2_t acc,uint16x8_t x,uint16x8_t y)29 static inline uint64x2_t aom_udotq_u16(uint64x2_t acc, uint16x8_t x,
30 uint16x8_t y) {
31 return svget_neonq_u64(svdot_u64(svset_neonq_u64(svundef_u64(), acc),
32 svset_neonq_u16(svundef_u16(), x),
33 svset_neonq_u16(svundef_u16(), y)));
34 }
35
aom_sdotq_s16(int64x2_t acc,int16x8_t x,int16x8_t y)36 static inline int64x2_t aom_sdotq_s16(int64x2_t acc, int16x8_t x, int16x8_t y) {
37 return svget_neonq_s64(svdot_s64(svset_neonq_s64(svundef_s64(), acc),
38 svset_neonq_s16(svundef_s16(), x),
39 svset_neonq_s16(svundef_s16(), y)));
40 }
41
42 #define aom_svdot_lane_s16(sum, s0, f, lane) \
43 svget_neonq_s64(svdot_lane_s64(svset_neonq_s64(svundef_s64(), sum), \
44 svset_neonq_s16(svundef_s16(), s0), \
45 svset_neonq_s16(svundef_s16(), f), lane))
46
aom_tbl_u16(uint16x8_t s,uint16x8_t tbl)47 static inline uint16x8_t aom_tbl_u16(uint16x8_t s, uint16x8_t tbl) {
48 return svget_neonq_u16(svtbl_u16(svset_neonq_u16(svundef_u16(), s),
49 svset_neonq_u16(svundef_u16(), tbl)));
50 }
51
aom_tbl_s16(int16x8_t s,uint16x8_t tbl)52 static inline int16x8_t aom_tbl_s16(int16x8_t s, uint16x8_t tbl) {
53 return svget_neonq_s16(svtbl_s16(svset_neonq_s16(svundef_s16(), s),
54 svset_neonq_u16(svundef_u16(), tbl)));
55 }
56
57 #endif // AOM_AOM_DSP_ARM_AOM_NEON_SVE_BRIDGE_H_
58