1 /*
2 * Copyright (c) 2023, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include "config/aom_config.h"
13
14 #include "arm_cpudetect.h"
15
16 #include "aom_ports/arm.h"
17
18 #if defined(__APPLE__)
19 #include <sys/sysctl.h>
20 #endif
21
22 #if !CONFIG_RUNTIME_CPU_DETECT
23
arm_get_cpu_caps(void)24 static int arm_get_cpu_caps(void) {
25 // This function should actually be a no-op. There is no way to adjust any of
26 // these because the RTCD tables do not exist: the functions are called
27 // statically.
28 int flags = 0;
29 #if HAVE_NEON
30 flags |= HAS_NEON;
31 #endif // HAVE_NEON
32 return flags;
33 }
34
35 #elif defined(__APPLE__) // end !CONFIG_RUNTIME_CPU_DETECT
36
37 // sysctlbyname() parameter documentation for instruction set characteristics:
38 // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics
have_feature(const char * feature)39 static inline bool have_feature(const char *feature) {
40 int64_t feature_present = 0;
41 size_t size = sizeof(feature_present);
42 if (sysctlbyname(feature, &feature_present, &size, NULL, 0) != 0) {
43 return false;
44 }
45 return feature_present;
46 }
47
arm_get_cpu_caps(void)48 static int arm_get_cpu_caps(void) {
49 int flags = 0;
50 #if HAVE_NEON
51 flags |= HAS_NEON;
52 #endif // HAVE_NEON
53 #if HAVE_ARM_CRC32
54 if (have_feature("hw.optional.armv8_crc32")) flags |= HAS_ARM_CRC32;
55 #endif // HAVE_ARM_CRC32
56 #if HAVE_NEON_DOTPROD
57 if (have_feature("hw.optional.arm.FEAT_DotProd")) flags |= HAS_NEON_DOTPROD;
58 #endif // HAVE_NEON_DOTPROD
59 #if HAVE_NEON_I8MM
60 if (have_feature("hw.optional.arm.FEAT_I8MM")) flags |= HAS_NEON_I8MM;
61 #endif // HAVE_NEON_I8MM
62 return flags;
63 }
64
65 #elif defined(_WIN32) // end __APPLE__
66
arm_get_cpu_caps(void)67 static int arm_get_cpu_caps(void) {
68 int flags = 0;
69 // IsProcessorFeaturePresent() parameter documentation:
70 // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent#parameters
71 #if HAVE_NEON
72 flags |= HAS_NEON; // Neon is mandatory in Armv8.0-A.
73 #endif // HAVE_NEON
74 #if HAVE_ARM_CRC32
75 if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) {
76 flags |= HAS_ARM_CRC32;
77 }
78 #endif // HAVE_ARM_CRC32
79 #if HAVE_NEON_DOTPROD
80 // Support for PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE was added in Windows SDK
81 // 20348, supported by Windows 11 and Windows Server 2022.
82 #if defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)
83 if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) {
84 flags |= HAS_NEON_DOTPROD;
85 }
86 #endif // defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)
87 #endif // HAVE_NEON_DOTPROD
88 #if HAVE_NEON_I8MM
89 // Support for PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE was added in Windows SDK
90 // 26100.
91 #if defined(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE)
92 // There's no PF_* flag that indicates whether plain I8MM is available
93 // or not. But if SVE_I8MM is available, that also implies that
94 // regular I8MM is available.
95 if (IsProcessorFeaturePresent(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE)) {
96 flags |= HAS_NEON_I8MM;
97 }
98 #endif // defined(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE)
99 #endif // HAVE_NEON_I8MM
100 #if HAVE_SVE
101 // Support for PF_ARM_SVE_INSTRUCTIONS_AVAILABLE was added in Windows SDK 26100.
102 #if defined(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)
103 if (IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)) {
104 flags |= HAS_SVE;
105 }
106 #endif // defined(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)
107 #endif // HAVE_SVE
108 #if HAVE_SVE2
109 // Support for PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE was added in Windows SDK
110 // 26100.
111 #if defined(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE)
112 if (IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE)) {
113 flags |= HAS_SVE2;
114 }
115 #endif // defined(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE)
116 #endif // HAVE_SVE2
117 return flags;
118 }
119
120 #elif defined(AOM_USE_ANDROID_CPU_FEATURES)
121
arm_get_cpu_caps(void)122 static int arm_get_cpu_caps(void) {
123 int flags = 0;
124 #if HAVE_NEON
125 flags |= HAS_NEON; // Neon is mandatory in Armv8.0-A.
126 #endif // HAVE_NEON
127 return flags;
128 }
129
130 #elif defined(__linux__) // end defined(AOM_USE_ANDROID_CPU_FEATURES)
131
132 #include <sys/auxv.h>
133
134 // Define hwcap values ourselves: building with an old auxv header where these
135 // hwcap values are not defined should not prevent features from being enabled.
136 #define AOM_AARCH64_HWCAP_CRC32 (1 << 7)
137 #define AOM_AARCH64_HWCAP_ASIMDDP (1 << 20)
138 #define AOM_AARCH64_HWCAP_SVE (1 << 22)
139 #define AOM_AARCH64_HWCAP2_SVE2 (1 << 1)
140 #define AOM_AARCH64_HWCAP2_I8MM (1 << 13)
141
arm_get_cpu_caps(void)142 static int arm_get_cpu_caps(void) {
143 int flags = 0;
144 #if HAVE_ARM_CRC32 || HAVE_NEON_DOTPROD || HAVE_SVE
145 unsigned long hwcap = getauxval(AT_HWCAP);
146 #endif
147 #if HAVE_NEON_I8MM || HAVE_SVE2
148 unsigned long hwcap2 = getauxval(AT_HWCAP2);
149 #endif
150
151 #if HAVE_NEON
152 flags |= HAS_NEON; // Neon is mandatory in Armv8.0-A.
153 #endif // HAVE_NEON
154 #if HAVE_ARM_CRC32
155 if (hwcap & AOM_AARCH64_HWCAP_CRC32) flags |= HAS_ARM_CRC32;
156 #endif // HAVE_ARM_CRC32
157 #if HAVE_NEON_DOTPROD
158 if (hwcap & AOM_AARCH64_HWCAP_ASIMDDP) flags |= HAS_NEON_DOTPROD;
159 #endif // HAVE_NEON_DOTPROD
160 #if HAVE_NEON_I8MM
161 if (hwcap2 & AOM_AARCH64_HWCAP2_I8MM) flags |= HAS_NEON_I8MM;
162 #endif // HAVE_NEON_I8MM
163 #if HAVE_SVE
164 if (hwcap & AOM_AARCH64_HWCAP_SVE) flags |= HAS_SVE;
165 #endif // HAVE_SVE
166 #if HAVE_SVE2
167 if (hwcap2 & AOM_AARCH64_HWCAP2_SVE2) flags |= HAS_SVE2;
168 #endif // HAVE_SVE2
169 return flags;
170 }
171
172 #elif defined(__Fuchsia__) // end __linux__
173
174 #include <zircon/features.h>
175 #include <zircon/syscalls.h>
176
177 // Added in https://fuchsia-review.googlesource.com/c/fuchsia/+/894282.
178 #ifndef ZX_ARM64_FEATURE_ISA_I8MM
179 #define ZX_ARM64_FEATURE_ISA_I8MM ((uint32_t)(1u << 19))
180 #endif
181 // Added in https://fuchsia-review.googlesource.com/c/fuchsia/+/895083.
182 #ifndef ZX_ARM64_FEATURE_ISA_SVE
183 #define ZX_ARM64_FEATURE_ISA_SVE ((uint32_t)(1u << 20))
184 #endif
185
arm_get_cpu_caps(void)186 static int arm_get_cpu_caps(void) {
187 int flags = 0;
188 #if HAVE_NEON
189 flags |= HAS_NEON; // Neon is mandatory in Armv8.0-A.
190 #endif // HAVE_NEON
191 uint32_t features;
192 zx_status_t status = zx_system_get_features(ZX_FEATURE_KIND_CPU, &features);
193 if (status != ZX_OK) return flags;
194 #if HAVE_ARM_CRC32
195 if (features & ZX_ARM64_FEATURE_ISA_CRC32) flags |= HAS_ARM_CRC32;
196 #endif // HAVE_ARM_CRC32
197 #if HAVE_NEON_DOTPROD
198 if (features & ZX_ARM64_FEATURE_ISA_DP) flags |= HAS_NEON_DOTPROD;
199 #endif // HAVE_NEON_DOTPROD
200 #if HAVE_NEON_I8MM
201 if (features & ZX_ARM64_FEATURE_ISA_I8MM) flags |= HAS_NEON_I8MM;
202 #endif // HAVE_NEON_I8MM
203 #if HAVE_SVE
204 if (features & ZX_ARM64_FEATURE_ISA_SVE) flags |= HAS_SVE;
205 #endif // HAVE_SVE
206 return flags;
207 }
208
209 #else // end __Fuchsia__
210 #error \
211 "Runtime CPU detection selected, but no CPU detection method " \
212 "available for your platform. Rerun cmake with -DCONFIG_RUNTIME_CPU_DETECT=0."
213 #endif
214
aom_arm_cpu_caps(void)215 int aom_arm_cpu_caps(void) {
216 int flags = 0;
217 if (!arm_cpu_env_flags(&flags)) {
218 flags = arm_get_cpu_caps() & arm_cpu_env_mask();
219 }
220
221 // Restrict flags: FEAT_I8MM assumes that FEAT_DotProd is available.
222 if (!(flags & HAS_NEON_DOTPROD)) flags &= ~HAS_NEON_I8MM;
223
224 // Restrict flags: SVE assumes that FEAT_{DotProd,I8MM} are available.
225 if (!(flags & HAS_NEON_DOTPROD)) flags &= ~HAS_SVE;
226 if (!(flags & HAS_NEON_I8MM)) flags &= ~HAS_SVE;
227
228 // Restrict flags: SVE2 assumes that FEAT_SVE is available.
229 if (!(flags & HAS_SVE)) flags &= ~HAS_SVE2;
230
231 return flags;
232 }
233