xref: /aosp_15_r20/external/gemmlowp/internal/detect_platform.h (revision 5f39d1b313f0528e11bae88b3029b54b9e1033e7)
1*5f39d1b3SJooyung Han // Copyright 2018 The Gemmlowp Authors. All Rights Reserved.
2*5f39d1b3SJooyung Han //
3*5f39d1b3SJooyung Han // Licensed under the Apache License, Version 2.0 (the "License");
4*5f39d1b3SJooyung Han // you may not use this file except in compliance with the License.
5*5f39d1b3SJooyung Han // You may obtain a copy of the License at
6*5f39d1b3SJooyung Han //
7*5f39d1b3SJooyung Han //     http://www.apache.org/licenses/LICENSE-2.0
8*5f39d1b3SJooyung Han //
9*5f39d1b3SJooyung Han // Unless required by applicable law or agreed to in writing, software
10*5f39d1b3SJooyung Han // distributed under the License is distributed on an "AS IS" BASIS,
11*5f39d1b3SJooyung Han // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*5f39d1b3SJooyung Han // See the License for the specific language governing permissions and
13*5f39d1b3SJooyung Han // limitations under the License.
14*5f39d1b3SJooyung Han 
15*5f39d1b3SJooyung Han // detect_platform.h: Sets up macros that control architecture-specific
16*5f39d1b3SJooyung Han // features of gemmlowp's implementation.
17*5f39d1b3SJooyung Han 
18*5f39d1b3SJooyung Han #ifndef GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_
19*5f39d1b3SJooyung Han #define GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_
20*5f39d1b3SJooyung Han 
21*5f39d1b3SJooyung Han // Our inline assembly path assume GCC/Clang syntax.
22*5f39d1b3SJooyung Han // Native Client doesn't seem to support inline assembly(?).
23*5f39d1b3SJooyung Han #if defined(__GNUC__) && !defined(__native_client__)
24*5f39d1b3SJooyung Han #define GEMMLOWP_ALLOW_INLINE_ASM
25*5f39d1b3SJooyung Han #endif
26*5f39d1b3SJooyung Han 
27*5f39d1b3SJooyung Han // Define macro statement that avoids inlining for GCC.
28*5f39d1b3SJooyung Han // For non-GCC, define as empty macro.
29*5f39d1b3SJooyung Han #if defined(__GNUC__)
30*5f39d1b3SJooyung Han #define GEMMLOWP_NOINLINE __attribute__((noinline))
31*5f39d1b3SJooyung Han #else
32*5f39d1b3SJooyung Han #define GEMMLOWP_NOINLINE
33*5f39d1b3SJooyung Han #endif
34*5f39d1b3SJooyung Han 
35*5f39d1b3SJooyung Han // Detect ARM, 32-bit or 64-bit
36*5f39d1b3SJooyung Han #ifdef __arm__
37*5f39d1b3SJooyung Han #define GEMMLOWP_ARM_32
38*5f39d1b3SJooyung Han #endif
39*5f39d1b3SJooyung Han 
40*5f39d1b3SJooyung Han #ifdef __aarch64__
41*5f39d1b3SJooyung Han #define GEMMLOWP_ARM_64
42*5f39d1b3SJooyung Han #endif
43*5f39d1b3SJooyung Han 
44*5f39d1b3SJooyung Han #if defined(GEMMLOWP_ARM_32) || defined(GEMMLOWP_ARM_64)
45*5f39d1b3SJooyung Han #define GEMMLOWP_ARM
46*5f39d1b3SJooyung Han #endif
47*5f39d1b3SJooyung Han 
48*5f39d1b3SJooyung Han // Detect MIPS, 32-bit or 64-bit
49*5f39d1b3SJooyung Han #if defined(__mips) && !defined(__LP64__)
50*5f39d1b3SJooyung Han #define GEMMLOWP_MIPS_32
51*5f39d1b3SJooyung Han #endif
52*5f39d1b3SJooyung Han 
53*5f39d1b3SJooyung Han #if defined(__mips) && defined(__LP64__)
54*5f39d1b3SJooyung Han #define GEMMLOWP_MIPS_64
55*5f39d1b3SJooyung Han #endif
56*5f39d1b3SJooyung Han 
57*5f39d1b3SJooyung Han #if defined(GEMMLOWP_MIPS_32) || defined(GEMMLOWP_MIPS_64)
58*5f39d1b3SJooyung Han #define GEMMLOWP_MIPS
59*5f39d1b3SJooyung Han #endif
60*5f39d1b3SJooyung Han 
61*5f39d1b3SJooyung Han // Detect x86, 32-bit or 64-bit
62*5f39d1b3SJooyung Han #if defined(__i386__) || defined(_M_IX86) || defined(_X86_) || defined(__i386)
63*5f39d1b3SJooyung Han #define GEMMLOWP_X86_32
64*5f39d1b3SJooyung Han #endif
65*5f39d1b3SJooyung Han 
66*5f39d1b3SJooyung Han #if defined(__x86_64__) || defined(_M_X64) || defined(__amd64)
67*5f39d1b3SJooyung Han #define GEMMLOWP_X86_64
68*5f39d1b3SJooyung Han #endif
69*5f39d1b3SJooyung Han 
70*5f39d1b3SJooyung Han #if defined(GEMMLOWP_X86_32) || defined(GEMMLOWP_X86_64)
71*5f39d1b3SJooyung Han #define GEMMLOWP_X86
72*5f39d1b3SJooyung Han #endif
73*5f39d1b3SJooyung Han 
74*5f39d1b3SJooyung Han // Detect WebAssembly SIMD.
75*5f39d1b3SJooyung Han #if defined(__wasm_simd128__)
76*5f39d1b3SJooyung Han #define GEMMLOWP_WASMSIMD
77*5f39d1b3SJooyung Han #endif
78*5f39d1b3SJooyung Han 
79*5f39d1b3SJooyung Han // Some of our optimized paths use inline assembly and for
80*5f39d1b3SJooyung Han // now we don't bother enabling some other optimized paths using intrinddics
81*5f39d1b3SJooyung Han // where we can't use inline assembly paths.
82*5f39d1b3SJooyung Han #ifdef GEMMLOWP_ALLOW_INLINE_ASM
83*5f39d1b3SJooyung Han 
84*5f39d1b3SJooyung Han // Detect NEON. It's important to check for both tokens.
85*5f39d1b3SJooyung Han #if (defined __ARM_NEON) || (defined __ARM_NEON__)
86*5f39d1b3SJooyung Han #define GEMMLOWP_NEON
87*5f39d1b3SJooyung Han #endif
88*5f39d1b3SJooyung Han 
89*5f39d1b3SJooyung Han // Convenience NEON tokens for 32-bit or 64-bit
90*5f39d1b3SJooyung Han #if defined(GEMMLOWP_NEON) && defined(GEMMLOWP_ARM_32)
91*5f39d1b3SJooyung Han #define GEMMLOWP_NEON_32
92*5f39d1b3SJooyung Han #endif
93*5f39d1b3SJooyung Han 
94*5f39d1b3SJooyung Han #if defined(GEMMLOWP_NEON) && defined(GEMMLOWP_ARM_64)
95*5f39d1b3SJooyung Han #define GEMMLOWP_NEON_64
96*5f39d1b3SJooyung Han #endif
97*5f39d1b3SJooyung Han 
98*5f39d1b3SJooyung Han // Detect MIPS MSA.
99*5f39d1b3SJooyung Han // Limit MSA optimizations to little-endian CPUs for now.
100*5f39d1b3SJooyung Han // TODO: Perhaps, eventually support MSA optimizations on big-endian CPUs?
101*5f39d1b3SJooyung Han #if defined(GEMMLOWP_MIPS) && (__mips_isa_rev >= 5) && defined(__mips_msa) && \
102*5f39d1b3SJooyung Han     defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
103*5f39d1b3SJooyung Han #define GEMMLOWP_MSA
104*5f39d1b3SJooyung Han #endif
105*5f39d1b3SJooyung Han 
106*5f39d1b3SJooyung Han // Convenience MIPS MSA tokens for 32-bit or 64-bit.
107*5f39d1b3SJooyung Han #if defined(GEMMLOWP_MSA) && defined(GEMMLOWP_MIPS_32)
108*5f39d1b3SJooyung Han #define GEMMLOWP_MSA_32
109*5f39d1b3SJooyung Han #endif
110*5f39d1b3SJooyung Han 
111*5f39d1b3SJooyung Han #if defined(GEMMLOWP_MSA) && defined(GEMMLOWP_MIPS_64)
112*5f39d1b3SJooyung Han #define GEMMLOWP_MSA_64
113*5f39d1b3SJooyung Han #endif
114*5f39d1b3SJooyung Han 
115*5f39d1b3SJooyung Han // compiler define for AVX2 -D GEMMLOWP_ENABLE_AVX2
116*5f39d1b3SJooyung Han // Detect AVX2
117*5f39d1b3SJooyung Han #if defined(__AVX2__) && defined(GEMMLOWP_ENABLE_AVX2)
118*5f39d1b3SJooyung Han #define GEMMLOWP_AVX2
119*5f39d1b3SJooyung Han // Detect SSE4.
120*5f39d1b3SJooyung Han // MSVC does not have __SSE4_1__ macro, but will enable SSE4
121*5f39d1b3SJooyung Han // when AVX is turned on.
122*5f39d1b3SJooyung Han #elif defined(__SSE4_1__) || (defined(_MSC_VER) && defined(__AVX__))
123*5f39d1b3SJooyung Han #define GEMMLOWP_SSE4
124*5f39d1b3SJooyung Han // Detect SSE3.
125*5f39d1b3SJooyung Han #elif defined(__SSE3__)
126*5f39d1b3SJooyung Han #define GEMMLOWP_SSE3
127*5f39d1b3SJooyung Han #endif
128*5f39d1b3SJooyung Han 
129*5f39d1b3SJooyung Han // Convenience SSE4 tokens for 32-bit or 64-bit
130*5f39d1b3SJooyung Han #if defined(GEMMLOWP_SSE4) && defined(GEMMLOWP_X86_32) && \
131*5f39d1b3SJooyung Han     !defined(GEMMLOWP_DISABLE_SSE4)
132*5f39d1b3SJooyung Han #define GEMMLOWP_SSE4_32
133*5f39d1b3SJooyung Han #endif
134*5f39d1b3SJooyung Han 
135*5f39d1b3SJooyung Han #if defined(GEMMLOWP_SSE3) && defined(GEMMLOWP_X86_32)
136*5f39d1b3SJooyung Han #define GEMMLOWP_SSE3_32
137*5f39d1b3SJooyung Han #endif
138*5f39d1b3SJooyung Han 
139*5f39d1b3SJooyung Han #if defined(GEMMLOWP_SSE4) && defined(GEMMLOWP_X86_64) && \
140*5f39d1b3SJooyung Han     !defined(GEMMLOWP_DISABLE_SSE4)
141*5f39d1b3SJooyung Han #define GEMMLOWP_SSE4_64
142*5f39d1b3SJooyung Han #endif
143*5f39d1b3SJooyung Han 
144*5f39d1b3SJooyung Han #if defined(GEMMLOWP_SSE3) && defined(GEMMLOWP_X86_64)
145*5f39d1b3SJooyung Han #define GEMMLOWP_SSE3_64
146*5f39d1b3SJooyung Han #endif
147*5f39d1b3SJooyung Han 
148*5f39d1b3SJooyung Han #if defined(GEMMLOWP_AVX2) && defined(GEMMLOWP_X86_64)
149*5f39d1b3SJooyung Han #define GEMMLOWP_AVX2_64
150*5f39d1b3SJooyung Han #endif
151*5f39d1b3SJooyung Han 
152*5f39d1b3SJooyung Han #if defined(__has_feature)
153*5f39d1b3SJooyung Han #if __has_feature(memory_sanitizer)
154*5f39d1b3SJooyung Han #include <sanitizer/msan_interface.h>
155*5f39d1b3SJooyung Han #define GEMMLOWP_MARK_MEMORY_AS_INITIALIZED __msan_unpoison
156*5f39d1b3SJooyung Han #elif __has_feature(address_sanitizer)
157*5f39d1b3SJooyung Han #include <sanitizer/asan_interface.h>
158*5f39d1b3SJooyung Han #define GEMMLOWP_MARK_MEMORY_AS_INITIALIZED __asan_unpoison_memory_region
159*5f39d1b3SJooyung Han #endif
160*5f39d1b3SJooyung Han #endif
161*5f39d1b3SJooyung Han 
162*5f39d1b3SJooyung Han #endif  // GEMMLOWP_ALLOW_INLINE_ASM
163*5f39d1b3SJooyung Han 
164*5f39d1b3SJooyung Han // Detect Android. Don't conflate with ARM - we care about tuning
165*5f39d1b3SJooyung Han // for non-ARM Android devices too. This can be used in conjunction
166*5f39d1b3SJooyung Han // with x86 to tune differently for mobile x86 CPUs (Atom) vs. desktop x86 CPUs.
167*5f39d1b3SJooyung Han #if defined(__ANDROID__) || defined(ANDROID)
168*5f39d1b3SJooyung Han #define GEMMLOWP_ANDROID
169*5f39d1b3SJooyung Han #endif
170*5f39d1b3SJooyung Han 
171*5f39d1b3SJooyung Han #endif  // GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_
172