xref: /aosp_15_r20/external/libopus/celt/x86/x86cpu.c (revision a58d3d2adb790c104798cd88c8a3aff4fa8b82cc)
1*a58d3d2aSXin Li /* Copyright (c) 2014, Cisco Systems, INC
2*a58d3d2aSXin Li    Written by XiangMingZhu WeiZhou MinPeng YanWang
3*a58d3d2aSXin Li 
4*a58d3d2aSXin Li    Redistribution and use in source and binary forms, with or without
5*a58d3d2aSXin Li    modification, are permitted provided that the following conditions
6*a58d3d2aSXin Li    are met:
7*a58d3d2aSXin Li 
8*a58d3d2aSXin Li    - Redistributions of source code must retain the above copyright
9*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer.
10*a58d3d2aSXin Li 
11*a58d3d2aSXin Li    - Redistributions in binary form must reproduce the above copyright
12*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer in the
13*a58d3d2aSXin Li    documentation and/or other materials provided with the distribution.
14*a58d3d2aSXin Li 
15*a58d3d2aSXin Li    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16*a58d3d2aSXin Li    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17*a58d3d2aSXin Li    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18*a58d3d2aSXin Li    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19*a58d3d2aSXin Li    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20*a58d3d2aSXin Li    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21*a58d3d2aSXin Li    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22*a58d3d2aSXin Li    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23*a58d3d2aSXin Li    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24*a58d3d2aSXin Li    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25*a58d3d2aSXin Li    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*a58d3d2aSXin Li */
27*a58d3d2aSXin Li 
28*a58d3d2aSXin Li #ifdef HAVE_CONFIG_H
29*a58d3d2aSXin Li #include "config.h"
30*a58d3d2aSXin Li #endif
31*a58d3d2aSXin Li 
32*a58d3d2aSXin Li #include "cpu_support.h"
33*a58d3d2aSXin Li #include "macros.h"
34*a58d3d2aSXin Li #include "main.h"
35*a58d3d2aSXin Li #include "pitch.h"
36*a58d3d2aSXin Li #include "x86cpu.h"
37*a58d3d2aSXin Li 
38*a58d3d2aSXin Li #if defined(OPUS_HAVE_RTCD) && \
39*a58d3d2aSXin Li   ((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
40*a58d3d2aSXin Li   (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
41*a58d3d2aSXin Li   (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
42*a58d3d2aSXin Li   (defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2)))
43*a58d3d2aSXin Li 
44*a58d3d2aSXin Li #if defined(_MSC_VER)
45*a58d3d2aSXin Li 
46*a58d3d2aSXin Li #include <intrin.h>
cpuid(unsigned int CPUInfo[4],unsigned int InfoType)47*a58d3d2aSXin Li static _inline void cpuid(unsigned int CPUInfo[4], unsigned int InfoType)
48*a58d3d2aSXin Li {
49*a58d3d2aSXin Li     __cpuid((int*)CPUInfo, InfoType);
50*a58d3d2aSXin Li }
51*a58d3d2aSXin Li 
52*a58d3d2aSXin Li #else
53*a58d3d2aSXin Li 
54*a58d3d2aSXin Li #if defined(CPU_INFO_BY_C)
55*a58d3d2aSXin Li #include <cpuid.h>
56*a58d3d2aSXin Li #endif
57*a58d3d2aSXin Li 
cpuid(unsigned int CPUInfo[4],unsigned int InfoType)58*a58d3d2aSXin Li static void cpuid(unsigned int CPUInfo[4], unsigned int InfoType)
59*a58d3d2aSXin Li {
60*a58d3d2aSXin Li #if defined(CPU_INFO_BY_ASM)
61*a58d3d2aSXin Li #if defined(__i386__) && defined(__PIC__)
62*a58d3d2aSXin Li /* %ebx is PIC register in 32-bit, so mustn't clobber it. */
63*a58d3d2aSXin Li     __asm__ __volatile__ (
64*a58d3d2aSXin Li         "xchg %%ebx, %1\n"
65*a58d3d2aSXin Li         "cpuid\n"
66*a58d3d2aSXin Li         "xchg %%ebx, %1\n":
67*a58d3d2aSXin Li         "=a" (CPUInfo[0]),
68*a58d3d2aSXin Li         "=r" (CPUInfo[1]),
69*a58d3d2aSXin Li         "=c" (CPUInfo[2]),
70*a58d3d2aSXin Li         "=d" (CPUInfo[3]) :
71*a58d3d2aSXin Li         /* We clear ECX to avoid a valgrind false-positive prior to v3.17.0. */
72*a58d3d2aSXin Li         "0" (InfoType), "2" (0)
73*a58d3d2aSXin Li     );
74*a58d3d2aSXin Li #else
75*a58d3d2aSXin Li     __asm__ __volatile__ (
76*a58d3d2aSXin Li         "cpuid":
77*a58d3d2aSXin Li         "=a" (CPUInfo[0]),
78*a58d3d2aSXin Li         "=b" (CPUInfo[1]),
79*a58d3d2aSXin Li         "=c" (CPUInfo[2]),
80*a58d3d2aSXin Li         "=d" (CPUInfo[3]) :
81*a58d3d2aSXin Li         /* We clear ECX to avoid a valgrind false-positive prior to v3.17.0. */
82*a58d3d2aSXin Li         "0" (InfoType), "2" (0)
83*a58d3d2aSXin Li     );
84*a58d3d2aSXin Li #endif
85*a58d3d2aSXin Li #elif defined(CPU_INFO_BY_C)
86*a58d3d2aSXin Li     /* We use __get_cpuid_count to clear ECX to avoid a valgrind false-positive
87*a58d3d2aSXin Li         prior to v3.17.0.*/
88*a58d3d2aSXin Li     if (!__get_cpuid_count(InfoType, 0, &(CPUInfo[0]), &(CPUInfo[1]), &(CPUInfo[2]), &(CPUInfo[3]))) {
89*a58d3d2aSXin Li         /* Our function cannot fail, but __get_cpuid{_count} can.
90*a58d3d2aSXin Li            Returning all zeroes will effectively disable all SIMD, which is
91*a58d3d2aSXin Li             what we want on CPUs that don't support CPUID. */
92*a58d3d2aSXin Li         CPUInfo[3] = CPUInfo[2] = CPUInfo[1] = CPUInfo[0] = 0;
93*a58d3d2aSXin Li     }
94*a58d3d2aSXin Li #else
95*a58d3d2aSXin Li # error "Configured to use x86 RTCD, but no CPU detection method available. " \
96*a58d3d2aSXin Li  "Reconfigure with --disable-rtcd (or send patches)."
97*a58d3d2aSXin Li #endif
98*a58d3d2aSXin Li }
99*a58d3d2aSXin Li 
100*a58d3d2aSXin Li #endif
101*a58d3d2aSXin Li 
102*a58d3d2aSXin Li typedef struct CPU_Feature{
103*a58d3d2aSXin Li     /*  SIMD: 128-bit */
104*a58d3d2aSXin Li     int HW_SSE;
105*a58d3d2aSXin Li     int HW_SSE2;
106*a58d3d2aSXin Li     int HW_SSE41;
107*a58d3d2aSXin Li     /*  SIMD: 256-bit */
108*a58d3d2aSXin Li     int HW_AVX2;
109*a58d3d2aSXin Li } CPU_Feature;
110*a58d3d2aSXin Li 
opus_cpu_feature_check(CPU_Feature * cpu_feature)111*a58d3d2aSXin Li static void opus_cpu_feature_check(CPU_Feature *cpu_feature)
112*a58d3d2aSXin Li {
113*a58d3d2aSXin Li     unsigned int info[4];
114*a58d3d2aSXin Li     unsigned int nIds = 0;
115*a58d3d2aSXin Li 
116*a58d3d2aSXin Li     cpuid(info, 0);
117*a58d3d2aSXin Li     nIds = info[0];
118*a58d3d2aSXin Li 
119*a58d3d2aSXin Li     if (nIds >= 1){
120*a58d3d2aSXin Li         cpuid(info, 1);
121*a58d3d2aSXin Li         cpu_feature->HW_SSE = (info[3] & (1 << 25)) != 0;
122*a58d3d2aSXin Li         cpu_feature->HW_SSE2 = (info[3] & (1 << 26)) != 0;
123*a58d3d2aSXin Li         cpu_feature->HW_SSE41 = (info[2] & (1 << 19)) != 0;
124*a58d3d2aSXin Li         cpu_feature->HW_AVX2 = (info[2] & (1 << 28)) != 0 && (info[2] & (1 << 12)) != 0;
125*a58d3d2aSXin Li         if (cpu_feature->HW_AVX2 && nIds >= 7) {
126*a58d3d2aSXin Li             cpuid(info, 7);
127*a58d3d2aSXin Li             cpu_feature->HW_AVX2 = cpu_feature->HW_AVX2 && (info[1] & (1 << 5)) != 0;
128*a58d3d2aSXin Li         } else {
129*a58d3d2aSXin Li             cpu_feature->HW_AVX2 = 0;
130*a58d3d2aSXin Li         }
131*a58d3d2aSXin Li     }
132*a58d3d2aSXin Li     else {
133*a58d3d2aSXin Li         cpu_feature->HW_SSE = 0;
134*a58d3d2aSXin Li         cpu_feature->HW_SSE2 = 0;
135*a58d3d2aSXin Li         cpu_feature->HW_SSE41 = 0;
136*a58d3d2aSXin Li         cpu_feature->HW_AVX2 = 0;
137*a58d3d2aSXin Li     }
138*a58d3d2aSXin Li }
139*a58d3d2aSXin Li 
opus_select_arch_impl(void)140*a58d3d2aSXin Li static int opus_select_arch_impl(void)
141*a58d3d2aSXin Li {
142*a58d3d2aSXin Li     CPU_Feature cpu_feature;
143*a58d3d2aSXin Li     int arch;
144*a58d3d2aSXin Li 
145*a58d3d2aSXin Li     opus_cpu_feature_check(&cpu_feature);
146*a58d3d2aSXin Li 
147*a58d3d2aSXin Li     arch = 0;
148*a58d3d2aSXin Li     if (!cpu_feature.HW_SSE)
149*a58d3d2aSXin Li     {
150*a58d3d2aSXin Li        return arch;
151*a58d3d2aSXin Li     }
152*a58d3d2aSXin Li     arch++;
153*a58d3d2aSXin Li 
154*a58d3d2aSXin Li     if (!cpu_feature.HW_SSE2)
155*a58d3d2aSXin Li     {
156*a58d3d2aSXin Li        return arch;
157*a58d3d2aSXin Li     }
158*a58d3d2aSXin Li     arch++;
159*a58d3d2aSXin Li 
160*a58d3d2aSXin Li     if (!cpu_feature.HW_SSE41)
161*a58d3d2aSXin Li     {
162*a58d3d2aSXin Li         return arch;
163*a58d3d2aSXin Li     }
164*a58d3d2aSXin Li     arch++;
165*a58d3d2aSXin Li 
166*a58d3d2aSXin Li     if (!cpu_feature.HW_AVX2)
167*a58d3d2aSXin Li     {
168*a58d3d2aSXin Li         return arch;
169*a58d3d2aSXin Li     }
170*a58d3d2aSXin Li     arch++;
171*a58d3d2aSXin Li 
172*a58d3d2aSXin Li     return arch;
173*a58d3d2aSXin Li }
174*a58d3d2aSXin Li 
opus_select_arch(void)175*a58d3d2aSXin Li int opus_select_arch(void) {
176*a58d3d2aSXin Li     int arch = opus_select_arch_impl();
177*a58d3d2aSXin Li #ifdef FUZZING
178*a58d3d2aSXin Li     /* Randomly downgrade the architecture. */
179*a58d3d2aSXin Li     arch = rand()%(arch+1);
180*a58d3d2aSXin Li #endif
181*a58d3d2aSXin Li     return arch;
182*a58d3d2aSXin Li }
183*a58d3d2aSXin Li 
184*a58d3d2aSXin Li #endif
185