xref: /aosp_15_r20/external/libvpx/vpx_ports/aarch64_cpudetect.c (revision fb1b10ab9aebc7c7068eedab379b749d7e3900be)
1 /*
2  *  Copyright (c) 2023 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "./vpx_config.h"
12 #include "vpx_ports/arm.h"
13 #include "vpx_ports/arm_cpudetect.h"
14 
15 #if defined(__APPLE__)
16 #include <sys/sysctl.h>
17 #endif
18 
19 #if !CONFIG_RUNTIME_CPU_DETECT
20 
arm_get_cpu_caps(void)21 static int arm_get_cpu_caps(void) {
22   // This function should actually be a no-op. There is no way to adjust any of
23   // these because the RTCD tables do not exist: the functions are called
24   // statically.
25   int flags = 0;
26 #if HAVE_NEON
27   flags |= HAS_NEON;
28 #endif  // HAVE_NEON
29   return flags;
30 }
31 
32 #elif defined(__APPLE__)  // end !CONFIG_RUNTIME_CPU_DETECT
33 
34 // sysctlbyname() parameter documentation for instruction set characteristics:
35 // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics
have_feature(const char * feature)36 static INLINE int64_t have_feature(const char *feature) {
37   int64_t feature_present = 0;
38   size_t size = sizeof(feature_present);
39   if (sysctlbyname(feature, &feature_present, &size, NULL, 0) != 0) {
40     return 0;
41   }
42   return feature_present;
43 }
44 
arm_get_cpu_caps(void)45 static int arm_get_cpu_caps(void) {
46   int flags = 0;
47 #if HAVE_NEON
48   flags |= HAS_NEON;
49 #endif  // HAVE_NEON
50 #if HAVE_NEON_DOTPROD
51   if (have_feature("hw.optional.arm.FEAT_DotProd")) {
52     flags |= HAS_NEON_DOTPROD;
53   }
54 #endif  // HAVE_NEON_DOTPROD
55 #if HAVE_NEON_I8MM
56   if (have_feature("hw.optional.arm.FEAT_I8MM")) {
57     flags |= HAS_NEON_I8MM;
58   }
59 #endif  // HAVE_NEON_I8MM
60   return flags;
61 }
62 
63 #elif defined(_WIN32)  // end __APPLE__
64 
arm_get_cpu_caps(void)65 static int arm_get_cpu_caps(void) {
66   int flags = 0;
67 // IsProcessorFeaturePresent() parameter documentation:
68 // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent#parameters
69 #if HAVE_NEON
70   flags |= HAS_NEON;  // Neon is mandatory in Armv8.0-A.
71 #endif  // HAVE_NEON
72 #if HAVE_NEON_DOTPROD
73 // Support for PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE was added in Windows SDK
74 // 20348, supported by Windows 11 and Windows Server 2022.
75 #if defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)
76   if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) {
77     flags |= HAS_NEON_DOTPROD;
78   }
79 #endif  // defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)
80 #endif  // HAVE_NEON_DOTPROD
81 #if HAVE_NEON_I8MM
82 // Support for PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE was added in Windows SDK
83 // 26100.
84 #if defined(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE)
85   // There's no PF_* flag that indicates whether plain I8MM is available
86   // or not. But if SVE_I8MM is available, that also implies that
87   // regular I8MM is available.
88   if (IsProcessorFeaturePresent(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE)) {
89     flags |= HAS_NEON_I8MM;
90   }
91 #endif  // defined(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE)
92 #endif  // HAVE_NEON_I8MM
93 #if HAVE_SVE
94 // Support for PF_ARM_SVE_INSTRUCTIONS_AVAILABLE was added in Windows SDK 26100.
95 #if defined(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)
96   if (IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)) {
97     flags |= HAS_SVE;
98   }
99 #endif  // defined(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)
100 #endif  // HAVE_SVE
101 #if HAVE_SVE2
102 // Support for PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE was added in Windows SDK
103 // 26100.
104 #if defined(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE)
105   if (IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE)) {
106     flags |= HAS_SVE2;
107   }
108 #endif  // defined(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE)
109 #endif  // HAVE_SVE2
110   return flags;
111 }
112 
113 #elif defined(VPX_USE_ANDROID_CPU_FEATURES)
114 
arm_get_cpu_caps(void)115 static int arm_get_cpu_caps(void) {
116   int flags = 0;
117 #if HAVE_NEON
118   flags |= HAS_NEON;  // Neon is mandatory in Armv8.0-A.
119 #endif  // HAVE_NEON
120   return flags;
121 }
122 
123 #elif defined(__linux__)  // end defined(VPX_USE_ANDROID_CPU_FEATURES)
124 
125 #include <sys/auxv.h>
126 
127 // Define hwcap values ourselves: building with an old auxv header where these
128 // hwcap values are not defined should not prevent features from being enabled.
129 #define VPX_AARCH64_HWCAP_ASIMDDP (1 << 20)
130 #define VPX_AARCH64_HWCAP_SVE (1 << 22)
131 #define VPX_AARCH64_HWCAP2_SVE2 (1 << 1)
132 #define VPX_AARCH64_HWCAP2_I8MM (1 << 13)
133 
arm_get_cpu_caps(void)134 static int arm_get_cpu_caps(void) {
135   int flags = 0;
136 #if HAVE_NEON_DOTPROD || HAVE_SVE
137   unsigned long hwcap = getauxval(AT_HWCAP);
138 #endif  // HAVE_NEON_DOTPROD || HAVE_SVE
139 #if HAVE_NEON_I8MM || HAVE_SVE2
140   unsigned long hwcap2 = getauxval(AT_HWCAP2);
141 #endif  // HAVE_NEON_I8MM || HAVE_SVE2
142 #if HAVE_NEON
143   flags |= HAS_NEON;  // Neon is mandatory in Armv8.0-A.
144 #endif  // HAVE_NEON
145 #if HAVE_NEON_DOTPROD
146   if (hwcap & VPX_AARCH64_HWCAP_ASIMDDP) {
147     flags |= HAS_NEON_DOTPROD;
148   }
149 #endif  // HAVE_NEON_DOTPROD
150 #if HAVE_NEON_I8MM
151   if (hwcap2 & VPX_AARCH64_HWCAP2_I8MM) {
152     flags |= HAS_NEON_I8MM;
153   }
154 #endif  // HAVE_NEON_I8MM
155 #if HAVE_SVE
156   if (hwcap & VPX_AARCH64_HWCAP_SVE) {
157     flags |= HAS_SVE;
158   }
159 #endif  // HAVE_SVE
160 #if HAVE_SVE2
161   if (hwcap2 & VPX_AARCH64_HWCAP2_SVE2) {
162     flags |= HAS_SVE2;
163   }
164 #endif  // HAVE_SVE2
165   return flags;
166 }
167 
168 #elif defined(__Fuchsia__)  // end __linux__
169 
170 #include <zircon/features.h>
171 #include <zircon/syscalls.h>
172 
173 // Added in https://fuchsia-review.googlesource.com/c/fuchsia/+/894282.
174 #ifndef ZX_ARM64_FEATURE_ISA_I8MM
175 #define ZX_ARM64_FEATURE_ISA_I8MM ((uint32_t)(1u << 19))
176 #endif
177 // Added in https://fuchsia-review.googlesource.com/c/fuchsia/+/895083.
178 #ifndef ZX_ARM64_FEATURE_ISA_SVE
179 #define ZX_ARM64_FEATURE_ISA_SVE ((uint32_t)(1u << 20))
180 #endif
181 
arm_get_cpu_caps(void)182 static int arm_get_cpu_caps(void) {
183   int flags = 0;
184 #if HAVE_NEON
185   flags |= HAS_NEON;  // Neon is mandatory in Armv8.0-A.
186 #endif  // HAVE_NEON
187   uint32_t features;
188   zx_status_t status = zx_system_get_features(ZX_FEATURE_KIND_CPU, &features);
189   if (status != ZX_OK) {
190     return flags;
191   }
192 #if HAVE_NEON_DOTPROD
193   if (features & ZX_ARM64_FEATURE_ISA_DP) {
194     flags |= HAS_NEON_DOTPROD;
195   }
196 #endif  // HAVE_NEON_DOTPROD
197 #if HAVE_NEON_I8MM
198   if (features & ZX_ARM64_FEATURE_ISA_I8MM) {
199     flags |= HAS_NEON_I8MM;
200   }
201 #endif  // HAVE_NEON_I8MM
202 #if HAVE_SVE
203   if (features & ZX_ARM64_FEATURE_ISA_SVE) {
204     flags |= HAS_SVE;
205   }
206 #endif  // HAVE_SVE
207   return flags;
208 }
209 
210 #else  // end __Fuchsia__
211 #error \
212     "Runtime CPU detection selected, but no CPU detection method available" \
213 "for your platform. Rerun configure with --disable-runtime-cpu-detect."
214 #endif
215 
arm_cpu_caps(void)216 int arm_cpu_caps(void) {
217   int flags = 0;
218   if (!arm_cpu_env_flags(&flags)) {
219     flags = arm_get_cpu_caps() & arm_cpu_env_mask();
220   }
221 
222   // Restrict flags: FEAT_I8MM assumes that FEAT_DotProd is available.
223   if (!(flags & HAS_NEON_DOTPROD)) {
224     flags &= ~HAS_NEON_I8MM;
225   }
226 
227   // Restrict flags: FEAT_SVE assumes that FEAT_{DotProd,I8MM} are available.
228   if (!(flags & HAS_NEON_DOTPROD)) {
229     flags &= ~HAS_SVE;
230   }
231   if (!(flags & HAS_NEON_I8MM)) {
232     flags &= ~HAS_SVE;
233   }
234 
235   // Restrict flags: FEAT_SVE2 assumes that FEAT_SVE is available.
236   if (!(flags & HAS_SVE)) {
237     flags &= ~HAS_SVE2;
238   }
239 
240   return flags;
241 }
242