xref: /aosp_15_r20/external/libvpx/vpx_ports/x86.h (revision fb1b10ab9aebc7c7068eedab379b749d7e3900be)
1*fb1b10abSAndroid Build Coastguard Worker /*
2*fb1b10abSAndroid Build Coastguard Worker  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3*fb1b10abSAndroid Build Coastguard Worker  *
4*fb1b10abSAndroid Build Coastguard Worker  *  Use of this source code is governed by a BSD-style license
5*fb1b10abSAndroid Build Coastguard Worker  *  that can be found in the LICENSE file in the root of the source
6*fb1b10abSAndroid Build Coastguard Worker  *  tree. An additional intellectual property rights grant can be found
7*fb1b10abSAndroid Build Coastguard Worker  *  in the file PATENTS.  All contributing project authors may
8*fb1b10abSAndroid Build Coastguard Worker  *  be found in the AUTHORS file in the root of the source tree.
9*fb1b10abSAndroid Build Coastguard Worker  */
10*fb1b10abSAndroid Build Coastguard Worker 
11*fb1b10abSAndroid Build Coastguard Worker #ifndef VPX_VPX_PORTS_X86_H_
12*fb1b10abSAndroid Build Coastguard Worker #define VPX_VPX_PORTS_X86_H_
13*fb1b10abSAndroid Build Coastguard Worker #include <stdlib.h>
14*fb1b10abSAndroid Build Coastguard Worker 
15*fb1b10abSAndroid Build Coastguard Worker #if defined(_MSC_VER)
16*fb1b10abSAndroid Build Coastguard Worker #include <intrin.h> /* For __cpuidex, __rdtsc */
17*fb1b10abSAndroid Build Coastguard Worker #endif
18*fb1b10abSAndroid Build Coastguard Worker 
19*fb1b10abSAndroid Build Coastguard Worker #include "vpx_config.h"
20*fb1b10abSAndroid Build Coastguard Worker #include "vpx/vpx_integer.h"
21*fb1b10abSAndroid Build Coastguard Worker 
22*fb1b10abSAndroid Build Coastguard Worker #ifdef __cplusplus
23*fb1b10abSAndroid Build Coastguard Worker extern "C" {
24*fb1b10abSAndroid Build Coastguard Worker #endif
25*fb1b10abSAndroid Build Coastguard Worker 
26*fb1b10abSAndroid Build Coastguard Worker typedef enum {
27*fb1b10abSAndroid Build Coastguard Worker   VPX_CPU_UNKNOWN = -1,
28*fb1b10abSAndroid Build Coastguard Worker   VPX_CPU_AMD,
29*fb1b10abSAndroid Build Coastguard Worker   VPX_CPU_AMD_OLD,
30*fb1b10abSAndroid Build Coastguard Worker   VPX_CPU_CENTAUR,
31*fb1b10abSAndroid Build Coastguard Worker   VPX_CPU_CYRIX,
32*fb1b10abSAndroid Build Coastguard Worker   VPX_CPU_INTEL,
33*fb1b10abSAndroid Build Coastguard Worker   VPX_CPU_NEXGEN,
34*fb1b10abSAndroid Build Coastguard Worker   VPX_CPU_NSC,
35*fb1b10abSAndroid Build Coastguard Worker   VPX_CPU_RISE,
36*fb1b10abSAndroid Build Coastguard Worker   VPX_CPU_SIS,
37*fb1b10abSAndroid Build Coastguard Worker   VPX_CPU_TRANSMETA,
38*fb1b10abSAndroid Build Coastguard Worker   VPX_CPU_TRANSMETA_OLD,
39*fb1b10abSAndroid Build Coastguard Worker   VPX_CPU_UMC,
40*fb1b10abSAndroid Build Coastguard Worker   VPX_CPU_VIA,
41*fb1b10abSAndroid Build Coastguard Worker 
42*fb1b10abSAndroid Build Coastguard Worker   VPX_CPU_LAST
43*fb1b10abSAndroid Build Coastguard Worker } vpx_cpu_t;
44*fb1b10abSAndroid Build Coastguard Worker 
45*fb1b10abSAndroid Build Coastguard Worker #if defined(__GNUC__) || defined(__ANDROID__)
46*fb1b10abSAndroid Build Coastguard Worker #if VPX_ARCH_X86_64
47*fb1b10abSAndroid Build Coastguard Worker #define cpuid(func, func2, ax, bx, cx, dx)                      \
48*fb1b10abSAndroid Build Coastguard Worker   __asm__ __volatile__("cpuid           \n\t"                   \
49*fb1b10abSAndroid Build Coastguard Worker                        : "=a"(ax), "=b"(bx), "=c"(cx), "=d"(dx) \
50*fb1b10abSAndroid Build Coastguard Worker                        : "a"(func), "c"(func2))
51*fb1b10abSAndroid Build Coastguard Worker #else
52*fb1b10abSAndroid Build Coastguard Worker #define cpuid(func, func2, ax, bx, cx, dx)     \
53*fb1b10abSAndroid Build Coastguard Worker   __asm__ __volatile__(                        \
54*fb1b10abSAndroid Build Coastguard Worker       "mov %%ebx, %%edi   \n\t"                \
55*fb1b10abSAndroid Build Coastguard Worker       "cpuid              \n\t"                \
56*fb1b10abSAndroid Build Coastguard Worker       "xchg %%edi, %%ebx  \n\t"                \
57*fb1b10abSAndroid Build Coastguard Worker       : "=a"(ax), "=D"(bx), "=c"(cx), "=d"(dx) \
58*fb1b10abSAndroid Build Coastguard Worker       : "a"(func), "c"(func2))
59*fb1b10abSAndroid Build Coastguard Worker #endif
60*fb1b10abSAndroid Build Coastguard Worker #elif defined(__SUNPRO_C) || \
61*fb1b10abSAndroid Build Coastguard Worker     defined(__SUNPRO_CC) /* end __GNUC__ or __ANDROID__*/
62*fb1b10abSAndroid Build Coastguard Worker #if VPX_ARCH_X86_64
63*fb1b10abSAndroid Build Coastguard Worker #define cpuid(func, func2, ax, bx, cx, dx)     \
64*fb1b10abSAndroid Build Coastguard Worker   asm volatile(                                \
65*fb1b10abSAndroid Build Coastguard Worker       "xchg %rsi, %rbx \n\t"                   \
66*fb1b10abSAndroid Build Coastguard Worker       "cpuid           \n\t"                   \
67*fb1b10abSAndroid Build Coastguard Worker       "movl %ebx, %edi \n\t"                   \
68*fb1b10abSAndroid Build Coastguard Worker       "xchg %rsi, %rbx \n\t"                   \
69*fb1b10abSAndroid Build Coastguard Worker       : "=a"(ax), "=D"(bx), "=c"(cx), "=d"(dx) \
70*fb1b10abSAndroid Build Coastguard Worker       : "a"(func), "c"(func2))
71*fb1b10abSAndroid Build Coastguard Worker #else
72*fb1b10abSAndroid Build Coastguard Worker #define cpuid(func, func2, ax, bx, cx, dx)     \
73*fb1b10abSAndroid Build Coastguard Worker   asm volatile(                                \
74*fb1b10abSAndroid Build Coastguard Worker       "pushl %ebx       \n\t"                  \
75*fb1b10abSAndroid Build Coastguard Worker       "cpuid            \n\t"                  \
76*fb1b10abSAndroid Build Coastguard Worker       "movl %ebx, %edi  \n\t"                  \
77*fb1b10abSAndroid Build Coastguard Worker       "popl %ebx        \n\t"                  \
78*fb1b10abSAndroid Build Coastguard Worker       : "=a"(ax), "=D"(bx), "=c"(cx), "=d"(dx) \
79*fb1b10abSAndroid Build Coastguard Worker       : "a"(func), "c"(func2))
80*fb1b10abSAndroid Build Coastguard Worker #endif
81*fb1b10abSAndroid Build Coastguard Worker #else /* end __SUNPRO__ */
82*fb1b10abSAndroid Build Coastguard Worker #if VPX_ARCH_X86_64
83*fb1b10abSAndroid Build Coastguard Worker #if defined(_MSC_VER) && _MSC_VER > 1500
84*fb1b10abSAndroid Build Coastguard Worker #define cpuid(func, func2, a, b, c, d) \
85*fb1b10abSAndroid Build Coastguard Worker   do {                                 \
86*fb1b10abSAndroid Build Coastguard Worker     int regs[4];                       \
87*fb1b10abSAndroid Build Coastguard Worker     __cpuidex(regs, func, func2);      \
88*fb1b10abSAndroid Build Coastguard Worker     a = regs[0];                       \
89*fb1b10abSAndroid Build Coastguard Worker     b = regs[1];                       \
90*fb1b10abSAndroid Build Coastguard Worker     c = regs[2];                       \
91*fb1b10abSAndroid Build Coastguard Worker     d = regs[3];                       \
92*fb1b10abSAndroid Build Coastguard Worker   } while (0)
93*fb1b10abSAndroid Build Coastguard Worker #else
94*fb1b10abSAndroid Build Coastguard Worker #define cpuid(func, func2, a, b, c, d) \
95*fb1b10abSAndroid Build Coastguard Worker   do {                                 \
96*fb1b10abSAndroid Build Coastguard Worker     int regs[4];                       \
97*fb1b10abSAndroid Build Coastguard Worker     __cpuid(regs, func);               \
98*fb1b10abSAndroid Build Coastguard Worker     a = regs[0];                       \
99*fb1b10abSAndroid Build Coastguard Worker     b = regs[1];                       \
100*fb1b10abSAndroid Build Coastguard Worker     c = regs[2];                       \
101*fb1b10abSAndroid Build Coastguard Worker     d = regs[3];                       \
102*fb1b10abSAndroid Build Coastguard Worker   } while (0)
103*fb1b10abSAndroid Build Coastguard Worker #endif
104*fb1b10abSAndroid Build Coastguard Worker #else
105*fb1b10abSAndroid Build Coastguard Worker #define cpuid(func, func2, a, b, c, d)                              \
106*fb1b10abSAndroid Build Coastguard Worker   __asm mov eax, func __asm mov ecx, func2 __asm cpuid __asm mov a, \
107*fb1b10abSAndroid Build Coastguard Worker       eax __asm mov b, ebx __asm mov c, ecx __asm mov d, edx
108*fb1b10abSAndroid Build Coastguard Worker #endif
109*fb1b10abSAndroid Build Coastguard Worker #endif /* end others */
110*fb1b10abSAndroid Build Coastguard Worker 
111*fb1b10abSAndroid Build Coastguard Worker // NaCl has no support for xgetbv or the raw opcode.
112*fb1b10abSAndroid Build Coastguard Worker #if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
xgetbv(void)113*fb1b10abSAndroid Build Coastguard Worker static INLINE uint64_t xgetbv(void) {
114*fb1b10abSAndroid Build Coastguard Worker   const uint32_t ecx = 0;
115*fb1b10abSAndroid Build Coastguard Worker   uint32_t eax, edx;
116*fb1b10abSAndroid Build Coastguard Worker   // Use the raw opcode for xgetbv for compatibility with older toolchains.
117*fb1b10abSAndroid Build Coastguard Worker   __asm__ volatile(".byte 0x0f, 0x01, 0xd0\n"
118*fb1b10abSAndroid Build Coastguard Worker                    : "=a"(eax), "=d"(edx)
119*fb1b10abSAndroid Build Coastguard Worker                    : "c"(ecx));
120*fb1b10abSAndroid Build Coastguard Worker   return ((uint64_t)edx << 32) | eax;
121*fb1b10abSAndroid Build Coastguard Worker }
122*fb1b10abSAndroid Build Coastguard Worker #elif (defined(_M_X64) || defined(_M_IX86)) && defined(_MSC_FULL_VER) && \
123*fb1b10abSAndroid Build Coastguard Worker     _MSC_FULL_VER >= 160040219  // >= VS2010 SP1
124*fb1b10abSAndroid Build Coastguard Worker #include <immintrin.h>
125*fb1b10abSAndroid Build Coastguard Worker #define xgetbv() _xgetbv(0)
126*fb1b10abSAndroid Build Coastguard Worker #elif defined(_MSC_VER) && defined(_M_IX86)
xgetbv(void)127*fb1b10abSAndroid Build Coastguard Worker static INLINE uint64_t xgetbv(void) {
128*fb1b10abSAndroid Build Coastguard Worker   uint32_t eax_, edx_;
129*fb1b10abSAndroid Build Coastguard Worker   __asm {
130*fb1b10abSAndroid Build Coastguard Worker     xor ecx, ecx  // ecx = 0
131*fb1b10abSAndroid Build Coastguard Worker     // Use the raw opcode for xgetbv for compatibility with older toolchains.
132*fb1b10abSAndroid Build Coastguard Worker     __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0
133*fb1b10abSAndroid Build Coastguard Worker     mov eax_, eax
134*fb1b10abSAndroid Build Coastguard Worker     mov edx_, edx
135*fb1b10abSAndroid Build Coastguard Worker   }
136*fb1b10abSAndroid Build Coastguard Worker   return ((uint64_t)edx_ << 32) | eax_;
137*fb1b10abSAndroid Build Coastguard Worker }
138*fb1b10abSAndroid Build Coastguard Worker #else
139*fb1b10abSAndroid Build Coastguard Worker #define xgetbv() 0U  // no AVX for older x64 or unrecognized toolchains.
140*fb1b10abSAndroid Build Coastguard Worker #endif
141*fb1b10abSAndroid Build Coastguard Worker 
142*fb1b10abSAndroid Build Coastguard Worker #if defined(_MSC_VER) && _MSC_VER >= 1700
143*fb1b10abSAndroid Build Coastguard Worker #undef NOMINMAX
144*fb1b10abSAndroid Build Coastguard Worker #define NOMINMAX
145*fb1b10abSAndroid Build Coastguard Worker #ifndef WIN32_LEAN_AND_MEAN
146*fb1b10abSAndroid Build Coastguard Worker #define WIN32_LEAN_AND_MEAN
147*fb1b10abSAndroid Build Coastguard Worker #endif
148*fb1b10abSAndroid Build Coastguard Worker #include <windows.h>
149*fb1b10abSAndroid Build Coastguard Worker #if WINAPI_FAMILY_PARTITION(WINAPI_FAMILY_APP)
150*fb1b10abSAndroid Build Coastguard Worker #define getenv(x) NULL
151*fb1b10abSAndroid Build Coastguard Worker #endif
152*fb1b10abSAndroid Build Coastguard Worker #endif
153*fb1b10abSAndroid Build Coastguard Worker 
154*fb1b10abSAndroid Build Coastguard Worker #define HAS_MMX 0x001
155*fb1b10abSAndroid Build Coastguard Worker #define HAS_SSE 0x002
156*fb1b10abSAndroid Build Coastguard Worker #define HAS_SSE2 0x004
157*fb1b10abSAndroid Build Coastguard Worker #define HAS_SSE3 0x008
158*fb1b10abSAndroid Build Coastguard Worker #define HAS_SSSE3 0x010
159*fb1b10abSAndroid Build Coastguard Worker #define HAS_SSE4_1 0x020
160*fb1b10abSAndroid Build Coastguard Worker #define HAS_AVX 0x040
161*fb1b10abSAndroid Build Coastguard Worker #define HAS_AVX2 0x080
162*fb1b10abSAndroid Build Coastguard Worker #define HAS_AVX512 0x100
163*fb1b10abSAndroid Build Coastguard Worker #ifndef BIT
164*fb1b10abSAndroid Build Coastguard Worker #define BIT(n) (1u << (n))
165*fb1b10abSAndroid Build Coastguard Worker #endif
166*fb1b10abSAndroid Build Coastguard Worker 
x86_simd_caps(void)167*fb1b10abSAndroid Build Coastguard Worker static INLINE int x86_simd_caps(void) {
168*fb1b10abSAndroid Build Coastguard Worker   unsigned int flags = 0;
169*fb1b10abSAndroid Build Coastguard Worker   unsigned int mask = ~0u;
170*fb1b10abSAndroid Build Coastguard Worker   unsigned int max_cpuid_val, reg_eax, reg_ebx, reg_ecx, reg_edx;
171*fb1b10abSAndroid Build Coastguard Worker   char *env;
172*fb1b10abSAndroid Build Coastguard Worker   (void)reg_ebx;
173*fb1b10abSAndroid Build Coastguard Worker 
174*fb1b10abSAndroid Build Coastguard Worker   /* See if the CPU capabilities are being overridden by the environment */
175*fb1b10abSAndroid Build Coastguard Worker   env = getenv("VPX_SIMD_CAPS");
176*fb1b10abSAndroid Build Coastguard Worker 
177*fb1b10abSAndroid Build Coastguard Worker   if (env && *env) return (int)strtol(env, NULL, 0);
178*fb1b10abSAndroid Build Coastguard Worker 
179*fb1b10abSAndroid Build Coastguard Worker   env = getenv("VPX_SIMD_CAPS_MASK");
180*fb1b10abSAndroid Build Coastguard Worker 
181*fb1b10abSAndroid Build Coastguard Worker   if (env && *env) mask = (unsigned int)strtoul(env, NULL, 0);
182*fb1b10abSAndroid Build Coastguard Worker 
183*fb1b10abSAndroid Build Coastguard Worker   /* Ensure that the CPUID instruction supports extended features */
184*fb1b10abSAndroid Build Coastguard Worker   cpuid(0, 0, max_cpuid_val, reg_ebx, reg_ecx, reg_edx);
185*fb1b10abSAndroid Build Coastguard Worker 
186*fb1b10abSAndroid Build Coastguard Worker   if (max_cpuid_val < 1) return 0;
187*fb1b10abSAndroid Build Coastguard Worker 
188*fb1b10abSAndroid Build Coastguard Worker   /* Get the standard feature flags */
189*fb1b10abSAndroid Build Coastguard Worker   cpuid(1, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
190*fb1b10abSAndroid Build Coastguard Worker 
191*fb1b10abSAndroid Build Coastguard Worker   if (reg_edx & BIT(23)) flags |= HAS_MMX;
192*fb1b10abSAndroid Build Coastguard Worker 
193*fb1b10abSAndroid Build Coastguard Worker   if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */
194*fb1b10abSAndroid Build Coastguard Worker 
195*fb1b10abSAndroid Build Coastguard Worker   if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */
196*fb1b10abSAndroid Build Coastguard Worker 
197*fb1b10abSAndroid Build Coastguard Worker   if (reg_ecx & BIT(0)) flags |= HAS_SSE3;
198*fb1b10abSAndroid Build Coastguard Worker 
199*fb1b10abSAndroid Build Coastguard Worker   if (reg_ecx & BIT(9)) flags |= HAS_SSSE3;
200*fb1b10abSAndroid Build Coastguard Worker 
201*fb1b10abSAndroid Build Coastguard Worker   if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1;
202*fb1b10abSAndroid Build Coastguard Worker 
203*fb1b10abSAndroid Build Coastguard Worker   // bits 27 (OSXSAVE) & 28 (256-bit AVX)
204*fb1b10abSAndroid Build Coastguard Worker   if ((reg_ecx & (BIT(27) | BIT(28))) == (BIT(27) | BIT(28))) {
205*fb1b10abSAndroid Build Coastguard Worker     // Check for OS-support of YMM state. Necessary for AVX and AVX2.
206*fb1b10abSAndroid Build Coastguard Worker     if ((xgetbv() & 0x6) == 0x6) {
207*fb1b10abSAndroid Build Coastguard Worker       flags |= HAS_AVX;
208*fb1b10abSAndroid Build Coastguard Worker 
209*fb1b10abSAndroid Build Coastguard Worker       if (max_cpuid_val >= 7) {
210*fb1b10abSAndroid Build Coastguard Worker         /* Get the leaf 7 feature flags. Needed to check for AVX2 support */
211*fb1b10abSAndroid Build Coastguard Worker         cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
212*fb1b10abSAndroid Build Coastguard Worker 
213*fb1b10abSAndroid Build Coastguard Worker         if (reg_ebx & BIT(5)) flags |= HAS_AVX2;
214*fb1b10abSAndroid Build Coastguard Worker 
215*fb1b10abSAndroid Build Coastguard Worker         // bits 16 (AVX-512F) & 17 (AVX-512DQ) & 28 (AVX-512CD) &
216*fb1b10abSAndroid Build Coastguard Worker         // 30 (AVX-512BW) & 32 (AVX-512VL)
217*fb1b10abSAndroid Build Coastguard Worker         if ((reg_ebx & (BIT(16) | BIT(17) | BIT(28) | BIT(30) | BIT(31))) ==
218*fb1b10abSAndroid Build Coastguard Worker             (BIT(16) | BIT(17) | BIT(28) | BIT(30) | BIT(31))) {
219*fb1b10abSAndroid Build Coastguard Worker           // Check for OS-support of ZMM and YMM state. Necessary for AVX-512.
220*fb1b10abSAndroid Build Coastguard Worker           if ((xgetbv() & 0xe6) == 0xe6) flags |= HAS_AVX512;
221*fb1b10abSAndroid Build Coastguard Worker         }
222*fb1b10abSAndroid Build Coastguard Worker       }
223*fb1b10abSAndroid Build Coastguard Worker     }
224*fb1b10abSAndroid Build Coastguard Worker   }
225*fb1b10abSAndroid Build Coastguard Worker 
226*fb1b10abSAndroid Build Coastguard Worker   (void)reg_eax;  // Avoid compiler warning on unused-but-set variable.
227*fb1b10abSAndroid Build Coastguard Worker 
228*fb1b10abSAndroid Build Coastguard Worker   return flags & mask;
229*fb1b10abSAndroid Build Coastguard Worker }
230*fb1b10abSAndroid Build Coastguard Worker 
231*fb1b10abSAndroid Build Coastguard Worker // Fine-Grain Measurement Functions
232*fb1b10abSAndroid Build Coastguard Worker //
233*fb1b10abSAndroid Build Coastguard Worker // If you are timing a small region of code, access the timestamp counter
234*fb1b10abSAndroid Build Coastguard Worker // (TSC) via:
235*fb1b10abSAndroid Build Coastguard Worker //
236*fb1b10abSAndroid Build Coastguard Worker // unsigned int start = x86_tsc_start();
237*fb1b10abSAndroid Build Coastguard Worker //   ...
238*fb1b10abSAndroid Build Coastguard Worker // unsigned int end = x86_tsc_end();
239*fb1b10abSAndroid Build Coastguard Worker // unsigned int diff = end - start;
240*fb1b10abSAndroid Build Coastguard Worker //
241*fb1b10abSAndroid Build Coastguard Worker // The start/end functions introduce a few more instructions than using
242*fb1b10abSAndroid Build Coastguard Worker // x86_readtsc directly, but prevent the CPU's out-of-order execution from
243*fb1b10abSAndroid Build Coastguard Worker // affecting the measurement (by having earlier/later instructions be evaluated
244*fb1b10abSAndroid Build Coastguard Worker // in the time interval). See the white paper, "How to Benchmark Code
245*fb1b10abSAndroid Build Coastguard Worker // Execution Times on Intel(R) IA-32 and IA-64 Instruction Set Architectures" by
246*fb1b10abSAndroid Build Coastguard Worker // Gabriele Paoloni for more information.
247*fb1b10abSAndroid Build Coastguard Worker //
248*fb1b10abSAndroid Build Coastguard Worker // If you are timing a large function (CPU time > a couple of seconds), use
249*fb1b10abSAndroid Build Coastguard Worker // x86_readtsc64 to read the timestamp counter in a 64-bit integer. The
250*fb1b10abSAndroid Build Coastguard Worker // out-of-order leakage that can occur is minimal compared to total runtime.
x86_readtsc(void)251*fb1b10abSAndroid Build Coastguard Worker static INLINE unsigned int x86_readtsc(void) {
252*fb1b10abSAndroid Build Coastguard Worker #if defined(__GNUC__)
253*fb1b10abSAndroid Build Coastguard Worker   unsigned int tsc;
254*fb1b10abSAndroid Build Coastguard Worker   __asm__ __volatile__("rdtsc\n\t" : "=a"(tsc) :);
255*fb1b10abSAndroid Build Coastguard Worker   return tsc;
256*fb1b10abSAndroid Build Coastguard Worker #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
257*fb1b10abSAndroid Build Coastguard Worker   unsigned int tsc;
258*fb1b10abSAndroid Build Coastguard Worker   asm volatile("rdtsc\n\t" : "=a"(tsc) :);
259*fb1b10abSAndroid Build Coastguard Worker   return tsc;
260*fb1b10abSAndroid Build Coastguard Worker #else
261*fb1b10abSAndroid Build Coastguard Worker #if VPX_ARCH_X86_64
262*fb1b10abSAndroid Build Coastguard Worker   return (unsigned int)__rdtsc();
263*fb1b10abSAndroid Build Coastguard Worker #else
264*fb1b10abSAndroid Build Coastguard Worker   __asm rdtsc;
265*fb1b10abSAndroid Build Coastguard Worker #endif
266*fb1b10abSAndroid Build Coastguard Worker #endif
267*fb1b10abSAndroid Build Coastguard Worker }
268*fb1b10abSAndroid Build Coastguard Worker // 64-bit CPU cycle counter
x86_readtsc64(void)269*fb1b10abSAndroid Build Coastguard Worker static INLINE uint64_t x86_readtsc64(void) {
270*fb1b10abSAndroid Build Coastguard Worker #if defined(__GNUC__)
271*fb1b10abSAndroid Build Coastguard Worker   uint32_t hi, lo;
272*fb1b10abSAndroid Build Coastguard Worker   __asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi));
273*fb1b10abSAndroid Build Coastguard Worker   return ((uint64_t)hi << 32) | lo;
274*fb1b10abSAndroid Build Coastguard Worker #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
275*fb1b10abSAndroid Build Coastguard Worker   uint_t hi, lo;
276*fb1b10abSAndroid Build Coastguard Worker   asm volatile("rdtsc\n\t" : "=a"(lo), "=d"(hi));
277*fb1b10abSAndroid Build Coastguard Worker   return ((uint64_t)hi << 32) | lo;
278*fb1b10abSAndroid Build Coastguard Worker #else
279*fb1b10abSAndroid Build Coastguard Worker #if VPX_ARCH_X86_64
280*fb1b10abSAndroid Build Coastguard Worker   return (uint64_t)__rdtsc();
281*fb1b10abSAndroid Build Coastguard Worker #else
282*fb1b10abSAndroid Build Coastguard Worker   __asm rdtsc;
283*fb1b10abSAndroid Build Coastguard Worker #endif
284*fb1b10abSAndroid Build Coastguard Worker #endif
285*fb1b10abSAndroid Build Coastguard Worker }
286*fb1b10abSAndroid Build Coastguard Worker 
287*fb1b10abSAndroid Build Coastguard Worker // 32-bit CPU cycle counter with a partial fence against out-of-order execution.
x86_readtscp(void)288*fb1b10abSAndroid Build Coastguard Worker static INLINE unsigned int x86_readtscp(void) {
289*fb1b10abSAndroid Build Coastguard Worker #if defined(__GNUC__)
290*fb1b10abSAndroid Build Coastguard Worker   unsigned int tscp;
291*fb1b10abSAndroid Build Coastguard Worker   __asm__ __volatile__("rdtscp\n\t" : "=a"(tscp) :);
292*fb1b10abSAndroid Build Coastguard Worker   return tscp;
293*fb1b10abSAndroid Build Coastguard Worker #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
294*fb1b10abSAndroid Build Coastguard Worker   unsigned int tscp;
295*fb1b10abSAndroid Build Coastguard Worker   asm volatile("rdtscp\n\t" : "=a"(tscp) :);
296*fb1b10abSAndroid Build Coastguard Worker   return tscp;
297*fb1b10abSAndroid Build Coastguard Worker #elif defined(_MSC_VER)
298*fb1b10abSAndroid Build Coastguard Worker   unsigned int ui;
299*fb1b10abSAndroid Build Coastguard Worker   return (unsigned int)__rdtscp(&ui);
300*fb1b10abSAndroid Build Coastguard Worker #else
301*fb1b10abSAndroid Build Coastguard Worker #if VPX_ARCH_X86_64
302*fb1b10abSAndroid Build Coastguard Worker   return (unsigned int)__rdtscp();
303*fb1b10abSAndroid Build Coastguard Worker #else
304*fb1b10abSAndroid Build Coastguard Worker   __asm rdtscp;
305*fb1b10abSAndroid Build Coastguard Worker #endif
306*fb1b10abSAndroid Build Coastguard Worker #endif
307*fb1b10abSAndroid Build Coastguard Worker }
308*fb1b10abSAndroid Build Coastguard Worker 
x86_tsc_start(void)309*fb1b10abSAndroid Build Coastguard Worker static INLINE unsigned int x86_tsc_start(void) {
310*fb1b10abSAndroid Build Coastguard Worker   unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx;
311*fb1b10abSAndroid Build Coastguard Worker   // This call should not be removed. See function notes above.
312*fb1b10abSAndroid Build Coastguard Worker   cpuid(0, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
313*fb1b10abSAndroid Build Coastguard Worker   // Avoid compiler warnings on unused-but-set variables.
314*fb1b10abSAndroid Build Coastguard Worker   (void)reg_eax;
315*fb1b10abSAndroid Build Coastguard Worker   (void)reg_ebx;
316*fb1b10abSAndroid Build Coastguard Worker   (void)reg_ecx;
317*fb1b10abSAndroid Build Coastguard Worker   (void)reg_edx;
318*fb1b10abSAndroid Build Coastguard Worker   return x86_readtsc();
319*fb1b10abSAndroid Build Coastguard Worker }
320*fb1b10abSAndroid Build Coastguard Worker 
x86_tsc_end(void)321*fb1b10abSAndroid Build Coastguard Worker static INLINE unsigned int x86_tsc_end(void) {
322*fb1b10abSAndroid Build Coastguard Worker   uint32_t v = x86_readtscp();
323*fb1b10abSAndroid Build Coastguard Worker   unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx;
324*fb1b10abSAndroid Build Coastguard Worker   // This call should not be removed. See function notes above.
325*fb1b10abSAndroid Build Coastguard Worker   cpuid(0, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
326*fb1b10abSAndroid Build Coastguard Worker   // Avoid compiler warnings on unused-but-set variables.
327*fb1b10abSAndroid Build Coastguard Worker   (void)reg_eax;
328*fb1b10abSAndroid Build Coastguard Worker   (void)reg_ebx;
329*fb1b10abSAndroid Build Coastguard Worker   (void)reg_ecx;
330*fb1b10abSAndroid Build Coastguard Worker   (void)reg_edx;
331*fb1b10abSAndroid Build Coastguard Worker   return v;
332*fb1b10abSAndroid Build Coastguard Worker }
333*fb1b10abSAndroid Build Coastguard Worker 
334*fb1b10abSAndroid Build Coastguard Worker #if defined(__GNUC__)
335*fb1b10abSAndroid Build Coastguard Worker #define x86_pause_hint() __asm__ __volatile__("pause \n\t")
336*fb1b10abSAndroid Build Coastguard Worker #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
337*fb1b10abSAndroid Build Coastguard Worker #define x86_pause_hint() asm volatile("pause \n\t")
338*fb1b10abSAndroid Build Coastguard Worker #else
339*fb1b10abSAndroid Build Coastguard Worker #if VPX_ARCH_X86_64
340*fb1b10abSAndroid Build Coastguard Worker #define x86_pause_hint() _mm_pause();
341*fb1b10abSAndroid Build Coastguard Worker #else
342*fb1b10abSAndroid Build Coastguard Worker #define x86_pause_hint() __asm pause
343*fb1b10abSAndroid Build Coastguard Worker #endif
344*fb1b10abSAndroid Build Coastguard Worker #endif
345*fb1b10abSAndroid Build Coastguard Worker 
346*fb1b10abSAndroid Build Coastguard Worker #if defined(__GNUC__)
x87_set_control_word(unsigned short mode)347*fb1b10abSAndroid Build Coastguard Worker static void x87_set_control_word(unsigned short mode) {
348*fb1b10abSAndroid Build Coastguard Worker   __asm__ __volatile__("fldcw %0" : : "m"(*&mode));
349*fb1b10abSAndroid Build Coastguard Worker }
x87_get_control_word(void)350*fb1b10abSAndroid Build Coastguard Worker static unsigned short x87_get_control_word(void) {
351*fb1b10abSAndroid Build Coastguard Worker   unsigned short mode;
352*fb1b10abSAndroid Build Coastguard Worker   __asm__ __volatile__("fstcw %0\n\t" : "=m"(*&mode) :);
353*fb1b10abSAndroid Build Coastguard Worker   return mode;
354*fb1b10abSAndroid Build Coastguard Worker }
355*fb1b10abSAndroid Build Coastguard Worker #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
x87_set_control_word(unsigned short mode)356*fb1b10abSAndroid Build Coastguard Worker static void x87_set_control_word(unsigned short mode) {
357*fb1b10abSAndroid Build Coastguard Worker   asm volatile("fldcw %0" : : "m"(*&mode));
358*fb1b10abSAndroid Build Coastguard Worker }
x87_get_control_word(void)359*fb1b10abSAndroid Build Coastguard Worker static unsigned short x87_get_control_word(void) {
360*fb1b10abSAndroid Build Coastguard Worker   unsigned short mode;
361*fb1b10abSAndroid Build Coastguard Worker   asm volatile("fstcw %0\n\t" : "=m"(*&mode) :);
362*fb1b10abSAndroid Build Coastguard Worker   return mode;
363*fb1b10abSAndroid Build Coastguard Worker }
364*fb1b10abSAndroid Build Coastguard Worker #elif VPX_ARCH_X86_64
365*fb1b10abSAndroid Build Coastguard Worker /* No fldcw intrinsics on Windows x64, punt to external asm */
366*fb1b10abSAndroid Build Coastguard Worker extern void vpx_winx64_fldcw(unsigned short mode);
367*fb1b10abSAndroid Build Coastguard Worker extern unsigned short vpx_winx64_fstcw(void);
368*fb1b10abSAndroid Build Coastguard Worker #define x87_set_control_word vpx_winx64_fldcw
369*fb1b10abSAndroid Build Coastguard Worker #define x87_get_control_word vpx_winx64_fstcw
370*fb1b10abSAndroid Build Coastguard Worker #else
x87_set_control_word(unsigned short mode)371*fb1b10abSAndroid Build Coastguard Worker static void x87_set_control_word(unsigned short mode) {
372*fb1b10abSAndroid Build Coastguard Worker   __asm { fldcw mode }
373*fb1b10abSAndroid Build Coastguard Worker }
x87_get_control_word(void)374*fb1b10abSAndroid Build Coastguard Worker static unsigned short x87_get_control_word(void) {
375*fb1b10abSAndroid Build Coastguard Worker   unsigned short mode;
376*fb1b10abSAndroid Build Coastguard Worker   __asm { fstcw mode }
377*fb1b10abSAndroid Build Coastguard Worker   return mode;
378*fb1b10abSAndroid Build Coastguard Worker }
379*fb1b10abSAndroid Build Coastguard Worker #endif
380*fb1b10abSAndroid Build Coastguard Worker 
x87_set_double_precision(void)381*fb1b10abSAndroid Build Coastguard Worker static INLINE unsigned int x87_set_double_precision(void) {
382*fb1b10abSAndroid Build Coastguard Worker   unsigned int mode = x87_get_control_word();
383*fb1b10abSAndroid Build Coastguard Worker   // Intel 64 and IA-32 Architectures Developer's Manual: Vol. 1
384*fb1b10abSAndroid Build Coastguard Worker   // https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-vol-1-manual.pdf
385*fb1b10abSAndroid Build Coastguard Worker   // 8.1.5.2 Precision Control Field
386*fb1b10abSAndroid Build Coastguard Worker   // Bits 8 and 9 (0x300) of the x87 FPU Control Word ("Precision Control")
387*fb1b10abSAndroid Build Coastguard Worker   // determine the number of bits used in floating point calculations. To match
388*fb1b10abSAndroid Build Coastguard Worker   // later SSE instructions restrict x87 operations to Double Precision (0x200).
389*fb1b10abSAndroid Build Coastguard Worker   // Precision                     PC Field
390*fb1b10abSAndroid Build Coastguard Worker   // Single Precision (24-Bits)    00B
391*fb1b10abSAndroid Build Coastguard Worker   // Reserved                      01B
392*fb1b10abSAndroid Build Coastguard Worker   // Double Precision (53-Bits)    10B
393*fb1b10abSAndroid Build Coastguard Worker   // Extended Precision (64-Bits)  11B
394*fb1b10abSAndroid Build Coastguard Worker   x87_set_control_word((mode & ~0x300u) | 0x200u);
395*fb1b10abSAndroid Build Coastguard Worker   return mode;
396*fb1b10abSAndroid Build Coastguard Worker }
397*fb1b10abSAndroid Build Coastguard Worker 
398*fb1b10abSAndroid Build Coastguard Worker #ifdef __cplusplus
399*fb1b10abSAndroid Build Coastguard Worker }  // extern "C"
400*fb1b10abSAndroid Build Coastguard Worker #endif
401*fb1b10abSAndroid Build Coastguard Worker 
402*fb1b10abSAndroid Build Coastguard Worker #endif  // VPX_VPX_PORTS_X86_H_
403