xref: /aosp_15_r20/external/libaom/aom_ports/x86.h (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1*77c1e3ccSAndroid Build Coastguard Worker /*
2*77c1e3ccSAndroid Build Coastguard Worker  * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3*77c1e3ccSAndroid Build Coastguard Worker  *
4*77c1e3ccSAndroid Build Coastguard Worker  * This source code is subject to the terms of the BSD 2 Clause License and
5*77c1e3ccSAndroid Build Coastguard Worker  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6*77c1e3ccSAndroid Build Coastguard Worker  * was not distributed with this source code in the LICENSE file, you can
7*77c1e3ccSAndroid Build Coastguard Worker  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8*77c1e3ccSAndroid Build Coastguard Worker  * Media Patent License 1.0 was not distributed with this source code in the
9*77c1e3ccSAndroid Build Coastguard Worker  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10*77c1e3ccSAndroid Build Coastguard Worker  */
11*77c1e3ccSAndroid Build Coastguard Worker 
12*77c1e3ccSAndroid Build Coastguard Worker #ifndef AOM_AOM_PORTS_X86_H_
13*77c1e3ccSAndroid Build Coastguard Worker #define AOM_AOM_PORTS_X86_H_
14*77c1e3ccSAndroid Build Coastguard Worker #include <stdlib.h>
15*77c1e3ccSAndroid Build Coastguard Worker 
16*77c1e3ccSAndroid Build Coastguard Worker #if defined(_MSC_VER)
17*77c1e3ccSAndroid Build Coastguard Worker #include <intrin.h> /* For __cpuidex, __rdtsc */
18*77c1e3ccSAndroid Build Coastguard Worker #endif
19*77c1e3ccSAndroid Build Coastguard Worker 
20*77c1e3ccSAndroid Build Coastguard Worker #include "aom/aom_integer.h"
21*77c1e3ccSAndroid Build Coastguard Worker #include "config/aom_config.h"
22*77c1e3ccSAndroid Build Coastguard Worker 
23*77c1e3ccSAndroid Build Coastguard Worker #ifdef __cplusplus
24*77c1e3ccSAndroid Build Coastguard Worker extern "C" {
25*77c1e3ccSAndroid Build Coastguard Worker #endif
26*77c1e3ccSAndroid Build Coastguard Worker 
27*77c1e3ccSAndroid Build Coastguard Worker typedef enum {
28*77c1e3ccSAndroid Build Coastguard Worker   AOM_CPU_UNKNOWN = -1,
29*77c1e3ccSAndroid Build Coastguard Worker   AOM_CPU_AMD,
30*77c1e3ccSAndroid Build Coastguard Worker   AOM_CPU_AMD_OLD,
31*77c1e3ccSAndroid Build Coastguard Worker   AOM_CPU_CENTAUR,
32*77c1e3ccSAndroid Build Coastguard Worker   AOM_CPU_CYRIX,
33*77c1e3ccSAndroid Build Coastguard Worker   AOM_CPU_INTEL,
34*77c1e3ccSAndroid Build Coastguard Worker   AOM_CPU_NEXGEN,
35*77c1e3ccSAndroid Build Coastguard Worker   AOM_CPU_NSC,
36*77c1e3ccSAndroid Build Coastguard Worker   AOM_CPU_RISE,
37*77c1e3ccSAndroid Build Coastguard Worker   AOM_CPU_SIS,
38*77c1e3ccSAndroid Build Coastguard Worker   AOM_CPU_TRANSMETA,
39*77c1e3ccSAndroid Build Coastguard Worker   AOM_CPU_TRANSMETA_OLD,
40*77c1e3ccSAndroid Build Coastguard Worker   AOM_CPU_UMC,
41*77c1e3ccSAndroid Build Coastguard Worker   AOM_CPU_VIA,
42*77c1e3ccSAndroid Build Coastguard Worker 
43*77c1e3ccSAndroid Build Coastguard Worker   AOM_CPU_LAST
44*77c1e3ccSAndroid Build Coastguard Worker } aom_cpu_t;
45*77c1e3ccSAndroid Build Coastguard Worker 
46*77c1e3ccSAndroid Build Coastguard Worker #if defined(__GNUC__) || defined(__ANDROID__)
47*77c1e3ccSAndroid Build Coastguard Worker #if AOM_ARCH_X86_64
48*77c1e3ccSAndroid Build Coastguard Worker #define cpuid(func, func2, ax, bx, cx, dx)                      \
49*77c1e3ccSAndroid Build Coastguard Worker   __asm__ __volatile__("cpuid           \n\t"                   \
50*77c1e3ccSAndroid Build Coastguard Worker                        : "=a"(ax), "=b"(bx), "=c"(cx), "=d"(dx) \
51*77c1e3ccSAndroid Build Coastguard Worker                        : "a"(func), "c"(func2))
52*77c1e3ccSAndroid Build Coastguard Worker #else
53*77c1e3ccSAndroid Build Coastguard Worker #define cpuid(func, func2, ax, bx, cx, dx)     \
54*77c1e3ccSAndroid Build Coastguard Worker   __asm__ __volatile__(                        \
55*77c1e3ccSAndroid Build Coastguard Worker       "mov %%ebx, %%edi   \n\t"                \
56*77c1e3ccSAndroid Build Coastguard Worker       "cpuid              \n\t"                \
57*77c1e3ccSAndroid Build Coastguard Worker       "xchg %%edi, %%ebx  \n\t"                \
58*77c1e3ccSAndroid Build Coastguard Worker       : "=a"(ax), "=D"(bx), "=c"(cx), "=d"(dx) \
59*77c1e3ccSAndroid Build Coastguard Worker       : "a"(func), "c"(func2))
60*77c1e3ccSAndroid Build Coastguard Worker #endif
61*77c1e3ccSAndroid Build Coastguard Worker #elif defined(__SUNPRO_C) || \
62*77c1e3ccSAndroid Build Coastguard Worker     defined(__SUNPRO_CC) /* end __GNUC__ or __ANDROID__*/
63*77c1e3ccSAndroid Build Coastguard Worker #if AOM_ARCH_X86_64
64*77c1e3ccSAndroid Build Coastguard Worker #define cpuid(func, func2, ax, bx, cx, dx)     \
65*77c1e3ccSAndroid Build Coastguard Worker   asm volatile(                                \
66*77c1e3ccSAndroid Build Coastguard Worker       "xchg %rsi, %rbx \n\t"                   \
67*77c1e3ccSAndroid Build Coastguard Worker       "cpuid           \n\t"                   \
68*77c1e3ccSAndroid Build Coastguard Worker       "movl %ebx, %edi \n\t"                   \
69*77c1e3ccSAndroid Build Coastguard Worker       "xchg %rsi, %rbx \n\t"                   \
70*77c1e3ccSAndroid Build Coastguard Worker       : "=a"(ax), "=D"(bx), "=c"(cx), "=d"(dx) \
71*77c1e3ccSAndroid Build Coastguard Worker       : "a"(func), "c"(func2))
72*77c1e3ccSAndroid Build Coastguard Worker #else
73*77c1e3ccSAndroid Build Coastguard Worker #define cpuid(func, func2, ax, bx, cx, dx)     \
74*77c1e3ccSAndroid Build Coastguard Worker   asm volatile(                                \
75*77c1e3ccSAndroid Build Coastguard Worker       "pushl %ebx       \n\t"                  \
76*77c1e3ccSAndroid Build Coastguard Worker       "cpuid            \n\t"                  \
77*77c1e3ccSAndroid Build Coastguard Worker       "movl %ebx, %edi  \n\t"                  \
78*77c1e3ccSAndroid Build Coastguard Worker       "popl %ebx        \n\t"                  \
79*77c1e3ccSAndroid Build Coastguard Worker       : "=a"(ax), "=D"(bx), "=c"(cx), "=d"(dx) \
80*77c1e3ccSAndroid Build Coastguard Worker       : "a"(func), "c"(func2))
81*77c1e3ccSAndroid Build Coastguard Worker #endif
82*77c1e3ccSAndroid Build Coastguard Worker #else /* end __SUNPRO__ */
83*77c1e3ccSAndroid Build Coastguard Worker #if AOM_ARCH_X86_64
84*77c1e3ccSAndroid Build Coastguard Worker #if defined(_MSC_VER) && _MSC_VER > 1500
85*77c1e3ccSAndroid Build Coastguard Worker #define cpuid(func, func2, a, b, c, d) \
86*77c1e3ccSAndroid Build Coastguard Worker   do {                                 \
87*77c1e3ccSAndroid Build Coastguard Worker     int regs[4];                       \
88*77c1e3ccSAndroid Build Coastguard Worker     __cpuidex(regs, func, func2);      \
89*77c1e3ccSAndroid Build Coastguard Worker     a = regs[0];                       \
90*77c1e3ccSAndroid Build Coastguard Worker     b = regs[1];                       \
91*77c1e3ccSAndroid Build Coastguard Worker     c = regs[2];                       \
92*77c1e3ccSAndroid Build Coastguard Worker     d = regs[3];                       \
93*77c1e3ccSAndroid Build Coastguard Worker   } while (0)
94*77c1e3ccSAndroid Build Coastguard Worker #else
95*77c1e3ccSAndroid Build Coastguard Worker #define cpuid(func, func2, a, b, c, d) \
96*77c1e3ccSAndroid Build Coastguard Worker   do {                                 \
97*77c1e3ccSAndroid Build Coastguard Worker     int regs[4];                       \
98*77c1e3ccSAndroid Build Coastguard Worker     __cpuid(regs, func);               \
99*77c1e3ccSAndroid Build Coastguard Worker     a = regs[0];                       \
100*77c1e3ccSAndroid Build Coastguard Worker     b = regs[1];                       \
101*77c1e3ccSAndroid Build Coastguard Worker     c = regs[2];                       \
102*77c1e3ccSAndroid Build Coastguard Worker     d = regs[3];                       \
103*77c1e3ccSAndroid Build Coastguard Worker   } while (0)
104*77c1e3ccSAndroid Build Coastguard Worker #endif
105*77c1e3ccSAndroid Build Coastguard Worker #else
106*77c1e3ccSAndroid Build Coastguard Worker /* clang-format off */
107*77c1e3ccSAndroid Build Coastguard Worker #define cpuid(func, func2, a, b, c, d) \
108*77c1e3ccSAndroid Build Coastguard Worker   __asm mov eax, func                  \
109*77c1e3ccSAndroid Build Coastguard Worker   __asm mov ecx, func2                 \
110*77c1e3ccSAndroid Build Coastguard Worker   __asm cpuid                          \
111*77c1e3ccSAndroid Build Coastguard Worker   __asm mov a, eax                     \
112*77c1e3ccSAndroid Build Coastguard Worker   __asm mov b, ebx                     \
113*77c1e3ccSAndroid Build Coastguard Worker   __asm mov c, ecx                     \
114*77c1e3ccSAndroid Build Coastguard Worker   __asm mov d, edx
115*77c1e3ccSAndroid Build Coastguard Worker #endif
116*77c1e3ccSAndroid Build Coastguard Worker /* clang-format on */
117*77c1e3ccSAndroid Build Coastguard Worker #endif /* end others */
118*77c1e3ccSAndroid Build Coastguard Worker 
119*77c1e3ccSAndroid Build Coastguard Worker // NaCl has no support for xgetbv or the raw opcode.
120*77c1e3ccSAndroid Build Coastguard Worker #if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
xgetbv(void)121*77c1e3ccSAndroid Build Coastguard Worker static inline uint64_t xgetbv(void) {
122*77c1e3ccSAndroid Build Coastguard Worker   const uint32_t ecx = 0;
123*77c1e3ccSAndroid Build Coastguard Worker   uint32_t eax, edx;
124*77c1e3ccSAndroid Build Coastguard Worker   // Use the raw opcode for xgetbv for compatibility with older toolchains.
125*77c1e3ccSAndroid Build Coastguard Worker   __asm__ volatile(".byte 0x0f, 0x01, 0xd0\n"
126*77c1e3ccSAndroid Build Coastguard Worker                    : "=a"(eax), "=d"(edx)
127*77c1e3ccSAndroid Build Coastguard Worker                    : "c"(ecx));
128*77c1e3ccSAndroid Build Coastguard Worker   return ((uint64_t)edx << 32) | eax;
129*77c1e3ccSAndroid Build Coastguard Worker }
130*77c1e3ccSAndroid Build Coastguard Worker #elif (defined(_M_X64) || defined(_M_IX86)) && defined(_MSC_FULL_VER) && \
131*77c1e3ccSAndroid Build Coastguard Worker     _MSC_FULL_VER >= 160040219  // >= VS2010 SP1
132*77c1e3ccSAndroid Build Coastguard Worker #include <immintrin.h>
133*77c1e3ccSAndroid Build Coastguard Worker #define xgetbv() _xgetbv(0)
134*77c1e3ccSAndroid Build Coastguard Worker #elif defined(_MSC_VER) && defined(_M_IX86)
xgetbv(void)135*77c1e3ccSAndroid Build Coastguard Worker static inline uint64_t xgetbv(void) {
136*77c1e3ccSAndroid Build Coastguard Worker   uint32_t eax_, edx_;
137*77c1e3ccSAndroid Build Coastguard Worker   __asm {
138*77c1e3ccSAndroid Build Coastguard Worker     xor ecx, ecx  // ecx = 0
139*77c1e3ccSAndroid Build Coastguard Worker     // Use the raw opcode for xgetbv for compatibility with older toolchains.
140*77c1e3ccSAndroid Build Coastguard Worker     __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0
141*77c1e3ccSAndroid Build Coastguard Worker     mov eax_, eax
142*77c1e3ccSAndroid Build Coastguard Worker     mov edx_, edx
143*77c1e3ccSAndroid Build Coastguard Worker   }
144*77c1e3ccSAndroid Build Coastguard Worker   return ((uint64_t)edx_ << 32) | eax_;
145*77c1e3ccSAndroid Build Coastguard Worker }
146*77c1e3ccSAndroid Build Coastguard Worker #else
147*77c1e3ccSAndroid Build Coastguard Worker #define xgetbv() 0U  // no AVX for older x64 or unrecognized toolchains.
148*77c1e3ccSAndroid Build Coastguard Worker #endif
149*77c1e3ccSAndroid Build Coastguard Worker 
150*77c1e3ccSAndroid Build Coastguard Worker #if defined(_MSC_VER) && _MSC_VER >= 1700
151*77c1e3ccSAndroid Build Coastguard Worker #undef NOMINMAX
152*77c1e3ccSAndroid Build Coastguard Worker #define NOMINMAX
153*77c1e3ccSAndroid Build Coastguard Worker #undef WIN32_LEAN_AND_MEAN
154*77c1e3ccSAndroid Build Coastguard Worker #define WIN32_LEAN_AND_MEAN
155*77c1e3ccSAndroid Build Coastguard Worker #include <windows.h>
156*77c1e3ccSAndroid Build Coastguard Worker #if WINAPI_FAMILY_PARTITION(WINAPI_FAMILY_APP)
157*77c1e3ccSAndroid Build Coastguard Worker #define getenv(x) NULL
158*77c1e3ccSAndroid Build Coastguard Worker #endif
159*77c1e3ccSAndroid Build Coastguard Worker #endif
160*77c1e3ccSAndroid Build Coastguard Worker 
161*77c1e3ccSAndroid Build Coastguard Worker #define HAS_MMX 0x01
162*77c1e3ccSAndroid Build Coastguard Worker #define HAS_SSE 0x02
163*77c1e3ccSAndroid Build Coastguard Worker #define HAS_SSE2 0x04
164*77c1e3ccSAndroid Build Coastguard Worker #define HAS_SSE3 0x08
165*77c1e3ccSAndroid Build Coastguard Worker #define HAS_SSSE3 0x10
166*77c1e3ccSAndroid Build Coastguard Worker #define HAS_SSE4_1 0x20
167*77c1e3ccSAndroid Build Coastguard Worker #define HAS_AVX 0x40
168*77c1e3ccSAndroid Build Coastguard Worker #define HAS_AVX2 0x80
169*77c1e3ccSAndroid Build Coastguard Worker #define HAS_SSE4_2 0x100
170*77c1e3ccSAndroid Build Coastguard Worker #ifndef BIT
171*77c1e3ccSAndroid Build Coastguard Worker #define BIT(n) (1u << (n))
172*77c1e3ccSAndroid Build Coastguard Worker #endif
173*77c1e3ccSAndroid Build Coastguard Worker 
x86_simd_caps(void)174*77c1e3ccSAndroid Build Coastguard Worker static inline int x86_simd_caps(void) {
175*77c1e3ccSAndroid Build Coastguard Worker   unsigned int flags = 0;
176*77c1e3ccSAndroid Build Coastguard Worker   unsigned int mask = ~0u;
177*77c1e3ccSAndroid Build Coastguard Worker   unsigned int max_cpuid_val, reg_eax, reg_ebx, reg_ecx, reg_edx;
178*77c1e3ccSAndroid Build Coastguard Worker   char *env;
179*77c1e3ccSAndroid Build Coastguard Worker 
180*77c1e3ccSAndroid Build Coastguard Worker   /* See if the CPU capabilities are being overridden by the environment */
181*77c1e3ccSAndroid Build Coastguard Worker   env = getenv("AOM_SIMD_CAPS");
182*77c1e3ccSAndroid Build Coastguard Worker 
183*77c1e3ccSAndroid Build Coastguard Worker   if (env && *env) return (int)strtol(env, NULL, 0);
184*77c1e3ccSAndroid Build Coastguard Worker 
185*77c1e3ccSAndroid Build Coastguard Worker   env = getenv("AOM_SIMD_CAPS_MASK");
186*77c1e3ccSAndroid Build Coastguard Worker 
187*77c1e3ccSAndroid Build Coastguard Worker   if (env && *env) mask = (unsigned int)strtoul(env, NULL, 0);
188*77c1e3ccSAndroid Build Coastguard Worker 
189*77c1e3ccSAndroid Build Coastguard Worker   /* Ensure that the CPUID instruction supports extended features */
190*77c1e3ccSAndroid Build Coastguard Worker   cpuid(0, 0, max_cpuid_val, reg_ebx, reg_ecx, reg_edx);
191*77c1e3ccSAndroid Build Coastguard Worker 
192*77c1e3ccSAndroid Build Coastguard Worker   if (max_cpuid_val < 1) return 0;
193*77c1e3ccSAndroid Build Coastguard Worker 
194*77c1e3ccSAndroid Build Coastguard Worker   /* Get the standard feature flags */
195*77c1e3ccSAndroid Build Coastguard Worker   cpuid(1, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
196*77c1e3ccSAndroid Build Coastguard Worker 
197*77c1e3ccSAndroid Build Coastguard Worker   if (reg_edx & BIT(23)) flags |= HAS_MMX;
198*77c1e3ccSAndroid Build Coastguard Worker 
199*77c1e3ccSAndroid Build Coastguard Worker   if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */
200*77c1e3ccSAndroid Build Coastguard Worker 
201*77c1e3ccSAndroid Build Coastguard Worker   if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */
202*77c1e3ccSAndroid Build Coastguard Worker 
203*77c1e3ccSAndroid Build Coastguard Worker   if (reg_ecx & BIT(0)) flags |= HAS_SSE3;
204*77c1e3ccSAndroid Build Coastguard Worker 
205*77c1e3ccSAndroid Build Coastguard Worker   if (reg_ecx & BIT(9)) flags |= HAS_SSSE3;
206*77c1e3ccSAndroid Build Coastguard Worker 
207*77c1e3ccSAndroid Build Coastguard Worker   if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1;
208*77c1e3ccSAndroid Build Coastguard Worker 
209*77c1e3ccSAndroid Build Coastguard Worker   if (reg_ecx & BIT(20)) flags |= HAS_SSE4_2;
210*77c1e3ccSAndroid Build Coastguard Worker 
211*77c1e3ccSAndroid Build Coastguard Worker   // bits 27 (OSXSAVE) & 28 (256-bit AVX)
212*77c1e3ccSAndroid Build Coastguard Worker   if ((reg_ecx & (BIT(27) | BIT(28))) == (BIT(27) | BIT(28))) {
213*77c1e3ccSAndroid Build Coastguard Worker     // Check for OS-support of YMM state. Necessary for AVX and AVX2.
214*77c1e3ccSAndroid Build Coastguard Worker     if ((xgetbv() & 0x6) == 0x6) {
215*77c1e3ccSAndroid Build Coastguard Worker       flags |= HAS_AVX;
216*77c1e3ccSAndroid Build Coastguard Worker 
217*77c1e3ccSAndroid Build Coastguard Worker       if (max_cpuid_val >= 7) {
218*77c1e3ccSAndroid Build Coastguard Worker         /* Get the leaf 7 feature flags. Needed to check for AVX2 support */
219*77c1e3ccSAndroid Build Coastguard Worker         cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
220*77c1e3ccSAndroid Build Coastguard Worker 
221*77c1e3ccSAndroid Build Coastguard Worker         if (reg_ebx & BIT(5)) flags |= HAS_AVX2;
222*77c1e3ccSAndroid Build Coastguard Worker       }
223*77c1e3ccSAndroid Build Coastguard Worker     }
224*77c1e3ccSAndroid Build Coastguard Worker   }
225*77c1e3ccSAndroid Build Coastguard Worker 
226*77c1e3ccSAndroid Build Coastguard Worker   (void)reg_eax;  // Avoid compiler warning on unused-but-set variable.
227*77c1e3ccSAndroid Build Coastguard Worker 
228*77c1e3ccSAndroid Build Coastguard Worker   return flags & mask;
229*77c1e3ccSAndroid Build Coastguard Worker }
230*77c1e3ccSAndroid Build Coastguard Worker 
231*77c1e3ccSAndroid Build Coastguard Worker // Fine-Grain Measurement Functions
232*77c1e3ccSAndroid Build Coastguard Worker //
233*77c1e3ccSAndroid Build Coastguard Worker // If you are timing a small region of code, access the timestamp counter
234*77c1e3ccSAndroid Build Coastguard Worker // (TSC) via:
235*77c1e3ccSAndroid Build Coastguard Worker //
236*77c1e3ccSAndroid Build Coastguard Worker // unsigned int start = x86_tsc_start();
237*77c1e3ccSAndroid Build Coastguard Worker //   ...
238*77c1e3ccSAndroid Build Coastguard Worker // unsigned int end = x86_tsc_end();
239*77c1e3ccSAndroid Build Coastguard Worker // unsigned int diff = end - start;
240*77c1e3ccSAndroid Build Coastguard Worker //
241*77c1e3ccSAndroid Build Coastguard Worker // The start/end functions introduce a few more instructions than using
242*77c1e3ccSAndroid Build Coastguard Worker // x86_readtsc directly, but prevent the CPU's out-of-order execution from
243*77c1e3ccSAndroid Build Coastguard Worker // affecting the measurement (by having earlier/later instructions be evaluated
244*77c1e3ccSAndroid Build Coastguard Worker // in the time interval). See the white paper, "How to Benchmark Code
245*77c1e3ccSAndroid Build Coastguard Worker // Execution Times on Intel(R) IA-32 and IA-64 Instruction Set Architectures" by
246*77c1e3ccSAndroid Build Coastguard Worker // Gabriele Paoloni for more information.
247*77c1e3ccSAndroid Build Coastguard Worker //
248*77c1e3ccSAndroid Build Coastguard Worker // If you are timing a large function (CPU time > a couple of seconds), use
249*77c1e3ccSAndroid Build Coastguard Worker // x86_readtsc64 to read the timestamp counter in a 64-bit integer. The
250*77c1e3ccSAndroid Build Coastguard Worker // out-of-order leakage that can occur is minimal compared to total runtime.
x86_readtsc(void)251*77c1e3ccSAndroid Build Coastguard Worker static inline unsigned int x86_readtsc(void) {
252*77c1e3ccSAndroid Build Coastguard Worker #if defined(__GNUC__)
253*77c1e3ccSAndroid Build Coastguard Worker   unsigned int tsc;
254*77c1e3ccSAndroid Build Coastguard Worker   __asm__ __volatile__("rdtsc\n\t" : "=a"(tsc) :);
255*77c1e3ccSAndroid Build Coastguard Worker   return tsc;
256*77c1e3ccSAndroid Build Coastguard Worker #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
257*77c1e3ccSAndroid Build Coastguard Worker   unsigned int tsc;
258*77c1e3ccSAndroid Build Coastguard Worker   asm volatile("rdtsc\n\t" : "=a"(tsc) :);
259*77c1e3ccSAndroid Build Coastguard Worker   return tsc;
260*77c1e3ccSAndroid Build Coastguard Worker #else
261*77c1e3ccSAndroid Build Coastguard Worker #if AOM_ARCH_X86_64
262*77c1e3ccSAndroid Build Coastguard Worker   return (unsigned int)__rdtsc();
263*77c1e3ccSAndroid Build Coastguard Worker #else
264*77c1e3ccSAndroid Build Coastguard Worker   __asm rdtsc;
265*77c1e3ccSAndroid Build Coastguard Worker #endif
266*77c1e3ccSAndroid Build Coastguard Worker #endif
267*77c1e3ccSAndroid Build Coastguard Worker }
268*77c1e3ccSAndroid Build Coastguard Worker // 64-bit CPU cycle counter
x86_readtsc64(void)269*77c1e3ccSAndroid Build Coastguard Worker static inline uint64_t x86_readtsc64(void) {
270*77c1e3ccSAndroid Build Coastguard Worker #if defined(__GNUC__)
271*77c1e3ccSAndroid Build Coastguard Worker   uint32_t hi, lo;
272*77c1e3ccSAndroid Build Coastguard Worker   __asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi));
273*77c1e3ccSAndroid Build Coastguard Worker   return ((uint64_t)hi << 32) | lo;
274*77c1e3ccSAndroid Build Coastguard Worker #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
275*77c1e3ccSAndroid Build Coastguard Worker   uint_t hi, lo;
276*77c1e3ccSAndroid Build Coastguard Worker   asm volatile("rdtsc\n\t" : "=a"(lo), "=d"(hi));
277*77c1e3ccSAndroid Build Coastguard Worker   return ((uint64_t)hi << 32) | lo;
278*77c1e3ccSAndroid Build Coastguard Worker #else
279*77c1e3ccSAndroid Build Coastguard Worker #if AOM_ARCH_X86_64
280*77c1e3ccSAndroid Build Coastguard Worker   return (uint64_t)__rdtsc();
281*77c1e3ccSAndroid Build Coastguard Worker #else
282*77c1e3ccSAndroid Build Coastguard Worker   __asm rdtsc;
283*77c1e3ccSAndroid Build Coastguard Worker #endif
284*77c1e3ccSAndroid Build Coastguard Worker #endif
285*77c1e3ccSAndroid Build Coastguard Worker }
286*77c1e3ccSAndroid Build Coastguard Worker 
287*77c1e3ccSAndroid Build Coastguard Worker // 32-bit CPU cycle counter with a partial fence against out-of-order execution.
x86_readtscp(void)288*77c1e3ccSAndroid Build Coastguard Worker static inline unsigned int x86_readtscp(void) {
289*77c1e3ccSAndroid Build Coastguard Worker #if defined(__GNUC__)
290*77c1e3ccSAndroid Build Coastguard Worker   unsigned int tscp;
291*77c1e3ccSAndroid Build Coastguard Worker   __asm__ __volatile__("rdtscp\n\t" : "=a"(tscp) :);
292*77c1e3ccSAndroid Build Coastguard Worker   return tscp;
293*77c1e3ccSAndroid Build Coastguard Worker #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
294*77c1e3ccSAndroid Build Coastguard Worker   unsigned int tscp;
295*77c1e3ccSAndroid Build Coastguard Worker   asm volatile("rdtscp\n\t" : "=a"(tscp) :);
296*77c1e3ccSAndroid Build Coastguard Worker   return tscp;
297*77c1e3ccSAndroid Build Coastguard Worker #elif defined(_MSC_VER)
298*77c1e3ccSAndroid Build Coastguard Worker   unsigned int ui;
299*77c1e3ccSAndroid Build Coastguard Worker   return (unsigned int)__rdtscp(&ui);
300*77c1e3ccSAndroid Build Coastguard Worker #else
301*77c1e3ccSAndroid Build Coastguard Worker #if AOM_ARCH_X86_64
302*77c1e3ccSAndroid Build Coastguard Worker   return (unsigned int)__rdtscp();
303*77c1e3ccSAndroid Build Coastguard Worker #else
304*77c1e3ccSAndroid Build Coastguard Worker   __asm rdtscp;
305*77c1e3ccSAndroid Build Coastguard Worker #endif
306*77c1e3ccSAndroid Build Coastguard Worker #endif
307*77c1e3ccSAndroid Build Coastguard Worker }
308*77c1e3ccSAndroid Build Coastguard Worker 
x86_tsc_start(void)309*77c1e3ccSAndroid Build Coastguard Worker static inline unsigned int x86_tsc_start(void) {
310*77c1e3ccSAndroid Build Coastguard Worker   unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx;
311*77c1e3ccSAndroid Build Coastguard Worker   // This call should not be removed. See function notes above.
312*77c1e3ccSAndroid Build Coastguard Worker   cpuid(0, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
313*77c1e3ccSAndroid Build Coastguard Worker   // Avoid compiler warnings on unused-but-set variables.
314*77c1e3ccSAndroid Build Coastguard Worker   (void)reg_eax;
315*77c1e3ccSAndroid Build Coastguard Worker   (void)reg_ebx;
316*77c1e3ccSAndroid Build Coastguard Worker   (void)reg_ecx;
317*77c1e3ccSAndroid Build Coastguard Worker   (void)reg_edx;
318*77c1e3ccSAndroid Build Coastguard Worker   return x86_readtsc();
319*77c1e3ccSAndroid Build Coastguard Worker }
320*77c1e3ccSAndroid Build Coastguard Worker 
x86_tsc_end(void)321*77c1e3ccSAndroid Build Coastguard Worker static inline unsigned int x86_tsc_end(void) {
322*77c1e3ccSAndroid Build Coastguard Worker   uint32_t v = x86_readtscp();
323*77c1e3ccSAndroid Build Coastguard Worker   unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx;
324*77c1e3ccSAndroid Build Coastguard Worker   // This call should not be removed. See function notes above.
325*77c1e3ccSAndroid Build Coastguard Worker   cpuid(0, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
326*77c1e3ccSAndroid Build Coastguard Worker   // Avoid compiler warnings on unused-but-set variables.
327*77c1e3ccSAndroid Build Coastguard Worker   (void)reg_eax;
328*77c1e3ccSAndroid Build Coastguard Worker   (void)reg_ebx;
329*77c1e3ccSAndroid Build Coastguard Worker   (void)reg_ecx;
330*77c1e3ccSAndroid Build Coastguard Worker   (void)reg_edx;
331*77c1e3ccSAndroid Build Coastguard Worker   return v;
332*77c1e3ccSAndroid Build Coastguard Worker }
333*77c1e3ccSAndroid Build Coastguard Worker 
334*77c1e3ccSAndroid Build Coastguard Worker #if defined(__GNUC__)
335*77c1e3ccSAndroid Build Coastguard Worker #define x86_pause_hint() __asm__ __volatile__("pause \n\t")
336*77c1e3ccSAndroid Build Coastguard Worker #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
337*77c1e3ccSAndroid Build Coastguard Worker #define x86_pause_hint() asm volatile("pause \n\t")
338*77c1e3ccSAndroid Build Coastguard Worker #else
339*77c1e3ccSAndroid Build Coastguard Worker #if AOM_ARCH_X86_64
340*77c1e3ccSAndroid Build Coastguard Worker #define x86_pause_hint() _mm_pause();
341*77c1e3ccSAndroid Build Coastguard Worker #else
342*77c1e3ccSAndroid Build Coastguard Worker #define x86_pause_hint() __asm pause
343*77c1e3ccSAndroid Build Coastguard Worker #endif
344*77c1e3ccSAndroid Build Coastguard Worker #endif
345*77c1e3ccSAndroid Build Coastguard Worker 
346*77c1e3ccSAndroid Build Coastguard Worker #if defined(__GNUC__)
x87_set_control_word(unsigned short mode)347*77c1e3ccSAndroid Build Coastguard Worker static void x87_set_control_word(unsigned short mode) {
348*77c1e3ccSAndroid Build Coastguard Worker   __asm__ __volatile__("fldcw %0" : : "m"(*&mode));
349*77c1e3ccSAndroid Build Coastguard Worker }
x87_get_control_word(void)350*77c1e3ccSAndroid Build Coastguard Worker static unsigned short x87_get_control_word(void) {
351*77c1e3ccSAndroid Build Coastguard Worker   unsigned short mode;
352*77c1e3ccSAndroid Build Coastguard Worker   __asm__ __volatile__("fstcw %0\n\t" : "=m"(*&mode) :);
353*77c1e3ccSAndroid Build Coastguard Worker   return mode;
354*77c1e3ccSAndroid Build Coastguard Worker }
355*77c1e3ccSAndroid Build Coastguard Worker #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
x87_set_control_word(unsigned short mode)356*77c1e3ccSAndroid Build Coastguard Worker static void x87_set_control_word(unsigned short mode) {
357*77c1e3ccSAndroid Build Coastguard Worker   asm volatile("fldcw %0" : : "m"(*&mode));
358*77c1e3ccSAndroid Build Coastguard Worker }
x87_get_control_word(void)359*77c1e3ccSAndroid Build Coastguard Worker static unsigned short x87_get_control_word(void) {
360*77c1e3ccSAndroid Build Coastguard Worker   unsigned short mode;
361*77c1e3ccSAndroid Build Coastguard Worker   asm volatile("fstcw %0\n\t" : "=m"(*&mode) :);
362*77c1e3ccSAndroid Build Coastguard Worker   return mode;
363*77c1e3ccSAndroid Build Coastguard Worker }
364*77c1e3ccSAndroid Build Coastguard Worker #elif AOM_ARCH_X86_64
365*77c1e3ccSAndroid Build Coastguard Worker /* No fldcw intrinsics on Windows x64, punt to external asm */
366*77c1e3ccSAndroid Build Coastguard Worker extern void aom_winx64_fldcw(unsigned short mode);
367*77c1e3ccSAndroid Build Coastguard Worker extern unsigned short aom_winx64_fstcw(void);
368*77c1e3ccSAndroid Build Coastguard Worker #define x87_set_control_word aom_winx64_fldcw
369*77c1e3ccSAndroid Build Coastguard Worker #define x87_get_control_word aom_winx64_fstcw
370*77c1e3ccSAndroid Build Coastguard Worker #else
x87_set_control_word(unsigned short mode)371*77c1e3ccSAndroid Build Coastguard Worker static void x87_set_control_word(unsigned short mode) {
372*77c1e3ccSAndroid Build Coastguard Worker   __asm { fldcw mode }
373*77c1e3ccSAndroid Build Coastguard Worker }
x87_get_control_word(void)374*77c1e3ccSAndroid Build Coastguard Worker static unsigned short x87_get_control_word(void) {
375*77c1e3ccSAndroid Build Coastguard Worker   unsigned short mode;
376*77c1e3ccSAndroid Build Coastguard Worker   __asm { fstcw mode }
377*77c1e3ccSAndroid Build Coastguard Worker   return mode;
378*77c1e3ccSAndroid Build Coastguard Worker }
379*77c1e3ccSAndroid Build Coastguard Worker #endif
380*77c1e3ccSAndroid Build Coastguard Worker 
x87_set_double_precision(void)381*77c1e3ccSAndroid Build Coastguard Worker static inline unsigned int x87_set_double_precision(void) {
382*77c1e3ccSAndroid Build Coastguard Worker   unsigned int mode = x87_get_control_word();
383*77c1e3ccSAndroid Build Coastguard Worker   // Intel 64 and IA-32 Architectures Developer's Manual: Vol. 1
384*77c1e3ccSAndroid Build Coastguard Worker   // https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-vol-1-manual.pdf
385*77c1e3ccSAndroid Build Coastguard Worker   // 8.1.5.2 Precision Control Field
386*77c1e3ccSAndroid Build Coastguard Worker   // Bits 8 and 9 (0x300) of the x87 FPU Control Word ("Precision Control")
387*77c1e3ccSAndroid Build Coastguard Worker   // determine the number of bits used in floating point calculations. To match
388*77c1e3ccSAndroid Build Coastguard Worker   // later SSE instructions restrict x87 operations to Double Precision (0x200).
389*77c1e3ccSAndroid Build Coastguard Worker   // Precision                     PC Field
390*77c1e3ccSAndroid Build Coastguard Worker   // Single Precision (24-Bits)    00B
391*77c1e3ccSAndroid Build Coastguard Worker   // Reserved                      01B
392*77c1e3ccSAndroid Build Coastguard Worker   // Double Precision (53-Bits)    10B
393*77c1e3ccSAndroid Build Coastguard Worker   // Extended Precision (64-Bits)  11B
394*77c1e3ccSAndroid Build Coastguard Worker   x87_set_control_word((mode & ~0x300u) | 0x200u);
395*77c1e3ccSAndroid Build Coastguard Worker   return mode;
396*77c1e3ccSAndroid Build Coastguard Worker }
397*77c1e3ccSAndroid Build Coastguard Worker 
398*77c1e3ccSAndroid Build Coastguard Worker #ifdef __cplusplus
399*77c1e3ccSAndroid Build Coastguard Worker }  // extern "C"
400*77c1e3ccSAndroid Build Coastguard Worker #endif
401*77c1e3ccSAndroid Build Coastguard Worker 
402*77c1e3ccSAndroid Build Coastguard Worker #endif  // AOM_AOM_PORTS_X86_H_
403