1 /*
2 * Copyright (c) 2024 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #ifndef VPX_VPX_DSP_ARM_VPX_NEON_SVE_BRIDGE_H_
12 #define VPX_VPX_DSP_ARM_VPX_NEON_SVE_BRIDGE_H_
13
14 #include <arm_neon.h>
15 #include <arm_sve.h>
16 #include <arm_neon_sve_bridge.h>
17
18 // Dot product instructions operating on 16-bit input elements are exclusive to
19 // the SVE instruction set. However, we can access these instructions from a
20 // predominantly Neon context by making use of the Neon-SVE bridge intrinsics
21 // to reinterpret Neon vectors as SVE vectors - with the high part of the SVE
22 // vector (if it's longer than 128 bits) being "don't care".
23
24 // While sub-optimal on machines that have SVE vector length > 128-bit - as the
25 // remainder of the vector is unused - this approach is still beneficial when
26 // compared to a Neon-only solution.
27
vpx_dotq_u16(uint64x2_t acc,uint16x8_t x,uint16x8_t y)28 static INLINE uint64x2_t vpx_dotq_u16(uint64x2_t acc, uint16x8_t x,
29 uint16x8_t y) {
30 return svget_neonq_u64(svdot_u64(svset_neonq_u64(svundef_u64(), acc),
31 svset_neonq_u16(svundef_u16(), x),
32 svset_neonq_u16(svundef_u16(), y)));
33 }
34
vpx_dotq_s16(int64x2_t acc,int16x8_t x,int16x8_t y)35 static INLINE int64x2_t vpx_dotq_s16(int64x2_t acc, int16x8_t x, int16x8_t y) {
36 return svget_neonq_s64(svdot_s64(svset_neonq_s64(svundef_s64(), acc),
37 svset_neonq_s16(svundef_s16(), x),
38 svset_neonq_s16(svundef_s16(), y)));
39 }
40
41 #define vpx_dotq_lane_s16(acc, x, y, lane) \
42 svget_neonq_s64(svdot_lane_s64(svset_neonq_s64(svundef_s64(), acc), \
43 svset_neonq_s16(svundef_s16(), x), \
44 svset_neonq_s16(svundef_s16(), y), lane))
45
vpx_tbl_u16(uint16x8_t data,uint16x8_t indices)46 static INLINE uint16x8_t vpx_tbl_u16(uint16x8_t data, uint16x8_t indices) {
47 return svget_neonq_u16(svtbl_u16(svset_neonq_u16(svundef_u16(), data),
48 svset_neonq_u16(svundef_u16(), indices)));
49 }
50
51 #endif // VPX_VPX_DSP_ARM_VPX_NEON_SVE_BRIDGE_H_
52