xref: /aosp_15_r20/external/libaom/aom_ports/aarch64_cpudetect.c (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1 /*
2  * Copyright (c) 2023, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include "config/aom_config.h"
13 
14 #include "arm_cpudetect.h"
15 
16 #include "aom_ports/arm.h"
17 
18 #if defined(__APPLE__)
19 #include <sys/sysctl.h>
20 #endif
21 
22 #if !CONFIG_RUNTIME_CPU_DETECT
23 
arm_get_cpu_caps(void)24 static int arm_get_cpu_caps(void) {
25   // This function should actually be a no-op. There is no way to adjust any of
26   // these because the RTCD tables do not exist: the functions are called
27   // statically.
28   int flags = 0;
29 #if HAVE_NEON
30   flags |= HAS_NEON;
31 #endif  // HAVE_NEON
32   return flags;
33 }
34 
35 #elif defined(__APPLE__)  // end !CONFIG_RUNTIME_CPU_DETECT
36 
37 // sysctlbyname() parameter documentation for instruction set characteristics:
38 // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics
have_feature(const char * feature)39 static inline bool have_feature(const char *feature) {
40   int64_t feature_present = 0;
41   size_t size = sizeof(feature_present);
42   if (sysctlbyname(feature, &feature_present, &size, NULL, 0) != 0) {
43     return false;
44   }
45   return feature_present;
46 }
47 
arm_get_cpu_caps(void)48 static int arm_get_cpu_caps(void) {
49   int flags = 0;
50 #if HAVE_NEON
51   flags |= HAS_NEON;
52 #endif  // HAVE_NEON
53 #if HAVE_ARM_CRC32
54   if (have_feature("hw.optional.armv8_crc32")) flags |= HAS_ARM_CRC32;
55 #endif  // HAVE_ARM_CRC32
56 #if HAVE_NEON_DOTPROD
57   if (have_feature("hw.optional.arm.FEAT_DotProd")) flags |= HAS_NEON_DOTPROD;
58 #endif  // HAVE_NEON_DOTPROD
59 #if HAVE_NEON_I8MM
60   if (have_feature("hw.optional.arm.FEAT_I8MM")) flags |= HAS_NEON_I8MM;
61 #endif  // HAVE_NEON_I8MM
62   return flags;
63 }
64 
65 #elif defined(_WIN32)  // end __APPLE__
66 
arm_get_cpu_caps(void)67 static int arm_get_cpu_caps(void) {
68   int flags = 0;
69 // IsProcessorFeaturePresent() parameter documentation:
70 // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent#parameters
71 #if HAVE_NEON
72   flags |= HAS_NEON;  // Neon is mandatory in Armv8.0-A.
73 #endif  // HAVE_NEON
74 #if HAVE_ARM_CRC32
75   if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) {
76     flags |= HAS_ARM_CRC32;
77   }
78 #endif  // HAVE_ARM_CRC32
79 #if HAVE_NEON_DOTPROD
80 // Support for PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE was added in Windows SDK
81 // 20348, supported by Windows 11 and Windows Server 2022.
82 #if defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)
83   if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) {
84     flags |= HAS_NEON_DOTPROD;
85   }
86 #endif  // defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)
87 #endif  // HAVE_NEON_DOTPROD
88 #if HAVE_NEON_I8MM
89 // Support for PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE was added in Windows SDK
90 // 26100.
91 #if defined(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE)
92   // There's no PF_* flag that indicates whether plain I8MM is available
93   // or not. But if SVE_I8MM is available, that also implies that
94   // regular I8MM is available.
95   if (IsProcessorFeaturePresent(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE)) {
96     flags |= HAS_NEON_I8MM;
97   }
98 #endif  // defined(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE)
99 #endif  // HAVE_NEON_I8MM
100 #if HAVE_SVE
101 // Support for PF_ARM_SVE_INSTRUCTIONS_AVAILABLE was added in Windows SDK 26100.
102 #if defined(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)
103   if (IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)) {
104     flags |= HAS_SVE;
105   }
106 #endif  // defined(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)
107 #endif  // HAVE_SVE
108 #if HAVE_SVE2
109 // Support for PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE was added in Windows SDK
110 // 26100.
111 #if defined(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE)
112   if (IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE)) {
113     flags |= HAS_SVE2;
114   }
115 #endif  // defined(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE)
116 #endif  // HAVE_SVE2
117   return flags;
118 }
119 
120 #elif defined(AOM_USE_ANDROID_CPU_FEATURES)
121 
arm_get_cpu_caps(void)122 static int arm_get_cpu_caps(void) {
123   int flags = 0;
124 #if HAVE_NEON
125   flags |= HAS_NEON;  // Neon is mandatory in Armv8.0-A.
126 #endif  // HAVE_NEON
127   return flags;
128 }
129 
130 #elif defined(__linux__)  // end defined(AOM_USE_ANDROID_CPU_FEATURES)
131 
132 #include <sys/auxv.h>
133 
134 // Define hwcap values ourselves: building with an old auxv header where these
135 // hwcap values are not defined should not prevent features from being enabled.
136 #define AOM_AARCH64_HWCAP_CRC32 (1 << 7)
137 #define AOM_AARCH64_HWCAP_ASIMDDP (1 << 20)
138 #define AOM_AARCH64_HWCAP_SVE (1 << 22)
139 #define AOM_AARCH64_HWCAP2_SVE2 (1 << 1)
140 #define AOM_AARCH64_HWCAP2_I8MM (1 << 13)
141 
arm_get_cpu_caps(void)142 static int arm_get_cpu_caps(void) {
143   int flags = 0;
144 #if HAVE_ARM_CRC32 || HAVE_NEON_DOTPROD || HAVE_SVE
145   unsigned long hwcap = getauxval(AT_HWCAP);
146 #endif
147 #if HAVE_NEON_I8MM || HAVE_SVE2
148   unsigned long hwcap2 = getauxval(AT_HWCAP2);
149 #endif
150 
151 #if HAVE_NEON
152   flags |= HAS_NEON;  // Neon is mandatory in Armv8.0-A.
153 #endif  // HAVE_NEON
154 #if HAVE_ARM_CRC32
155   if (hwcap & AOM_AARCH64_HWCAP_CRC32) flags |= HAS_ARM_CRC32;
156 #endif  // HAVE_ARM_CRC32
157 #if HAVE_NEON_DOTPROD
158   if (hwcap & AOM_AARCH64_HWCAP_ASIMDDP) flags |= HAS_NEON_DOTPROD;
159 #endif  // HAVE_NEON_DOTPROD
160 #if HAVE_NEON_I8MM
161   if (hwcap2 & AOM_AARCH64_HWCAP2_I8MM) flags |= HAS_NEON_I8MM;
162 #endif  // HAVE_NEON_I8MM
163 #if HAVE_SVE
164   if (hwcap & AOM_AARCH64_HWCAP_SVE) flags |= HAS_SVE;
165 #endif  // HAVE_SVE
166 #if HAVE_SVE2
167   if (hwcap2 & AOM_AARCH64_HWCAP2_SVE2) flags |= HAS_SVE2;
168 #endif  // HAVE_SVE2
169   return flags;
170 }
171 
172 #elif defined(__Fuchsia__)  // end __linux__
173 
174 #include <zircon/features.h>
175 #include <zircon/syscalls.h>
176 
177 // Added in https://fuchsia-review.googlesource.com/c/fuchsia/+/894282.
178 #ifndef ZX_ARM64_FEATURE_ISA_I8MM
179 #define ZX_ARM64_FEATURE_ISA_I8MM ((uint32_t)(1u << 19))
180 #endif
181 // Added in https://fuchsia-review.googlesource.com/c/fuchsia/+/895083.
182 #ifndef ZX_ARM64_FEATURE_ISA_SVE
183 #define ZX_ARM64_FEATURE_ISA_SVE ((uint32_t)(1u << 20))
184 #endif
185 
arm_get_cpu_caps(void)186 static int arm_get_cpu_caps(void) {
187   int flags = 0;
188 #if HAVE_NEON
189   flags |= HAS_NEON;  // Neon is mandatory in Armv8.0-A.
190 #endif  // HAVE_NEON
191   uint32_t features;
192   zx_status_t status = zx_system_get_features(ZX_FEATURE_KIND_CPU, &features);
193   if (status != ZX_OK) return flags;
194 #if HAVE_ARM_CRC32
195   if (features & ZX_ARM64_FEATURE_ISA_CRC32) flags |= HAS_ARM_CRC32;
196 #endif  // HAVE_ARM_CRC32
197 #if HAVE_NEON_DOTPROD
198   if (features & ZX_ARM64_FEATURE_ISA_DP) flags |= HAS_NEON_DOTPROD;
199 #endif  // HAVE_NEON_DOTPROD
200 #if HAVE_NEON_I8MM
201   if (features & ZX_ARM64_FEATURE_ISA_I8MM) flags |= HAS_NEON_I8MM;
202 #endif  // HAVE_NEON_I8MM
203 #if HAVE_SVE
204   if (features & ZX_ARM64_FEATURE_ISA_SVE) flags |= HAS_SVE;
205 #endif  // HAVE_SVE
206   return flags;
207 }
208 
209 #else  // end __Fuchsia__
210 #error \
211     "Runtime CPU detection selected, but no CPU detection method " \
212 "available for your platform. Rerun cmake with -DCONFIG_RUNTIME_CPU_DETECT=0."
213 #endif
214 
aom_arm_cpu_caps(void)215 int aom_arm_cpu_caps(void) {
216   int flags = 0;
217   if (!arm_cpu_env_flags(&flags)) {
218     flags = arm_get_cpu_caps() & arm_cpu_env_mask();
219   }
220 
221   // Restrict flags: FEAT_I8MM assumes that FEAT_DotProd is available.
222   if (!(flags & HAS_NEON_DOTPROD)) flags &= ~HAS_NEON_I8MM;
223 
224   // Restrict flags: SVE assumes that FEAT_{DotProd,I8MM} are available.
225   if (!(flags & HAS_NEON_DOTPROD)) flags &= ~HAS_SVE;
226   if (!(flags & HAS_NEON_I8MM)) flags &= ~HAS_SVE;
227 
228   // Restrict flags: SVE2 assumes that FEAT_SVE is available.
229   if (!(flags & HAS_SVE)) flags &= ~HAS_SVE2;
230 
231   return flags;
232 }
233