xref: /aosp_15_r20/external/libopus/celt/x86/x86cpu.h (revision a58d3d2adb790c104798cd88c8a3aff4fa8b82cc)
1*a58d3d2aSXin Li /* Copyright (c) 2014, Cisco Systems, INC
2*a58d3d2aSXin Li    Written by XiangMingZhu WeiZhou MinPeng YanWang
3*a58d3d2aSXin Li 
4*a58d3d2aSXin Li    Redistribution and use in source and binary forms, with or without
5*a58d3d2aSXin Li    modification, are permitted provided that the following conditions
6*a58d3d2aSXin Li    are met:
7*a58d3d2aSXin Li 
8*a58d3d2aSXin Li    - Redistributions of source code must retain the above copyright
9*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer.
10*a58d3d2aSXin Li 
11*a58d3d2aSXin Li    - Redistributions in binary form must reproduce the above copyright
12*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer in the
13*a58d3d2aSXin Li    documentation and/or other materials provided with the distribution.
14*a58d3d2aSXin Li 
15*a58d3d2aSXin Li    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16*a58d3d2aSXin Li    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17*a58d3d2aSXin Li    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18*a58d3d2aSXin Li    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19*a58d3d2aSXin Li    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20*a58d3d2aSXin Li    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21*a58d3d2aSXin Li    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22*a58d3d2aSXin Li    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23*a58d3d2aSXin Li    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24*a58d3d2aSXin Li    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25*a58d3d2aSXin Li    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*a58d3d2aSXin Li */
27*a58d3d2aSXin Li 
28*a58d3d2aSXin Li #if !defined(X86CPU_H)
29*a58d3d2aSXin Li # define X86CPU_H
30*a58d3d2aSXin Li 
31*a58d3d2aSXin Li # if defined(OPUS_X86_MAY_HAVE_SSE)
32*a58d3d2aSXin Li #  define MAY_HAVE_SSE(name) name ## _sse
33*a58d3d2aSXin Li # else
34*a58d3d2aSXin Li #  define MAY_HAVE_SSE(name) name ## _c
35*a58d3d2aSXin Li # endif
36*a58d3d2aSXin Li 
37*a58d3d2aSXin Li # if defined(OPUS_X86_MAY_HAVE_SSE2)
38*a58d3d2aSXin Li #  define MAY_HAVE_SSE2(name) name ## _sse2
39*a58d3d2aSXin Li # else
40*a58d3d2aSXin Li #  define MAY_HAVE_SSE2(name) name ## _c
41*a58d3d2aSXin Li # endif
42*a58d3d2aSXin Li 
43*a58d3d2aSXin Li # if defined(OPUS_X86_MAY_HAVE_SSE4_1)
44*a58d3d2aSXin Li #  define MAY_HAVE_SSE4_1(name) name ## _sse4_1
45*a58d3d2aSXin Li # else
46*a58d3d2aSXin Li #  define MAY_HAVE_SSE4_1(name) name ## _c
47*a58d3d2aSXin Li # endif
48*a58d3d2aSXin Li 
49*a58d3d2aSXin Li # if defined(OPUS_X86_MAY_HAVE_AVX2)
50*a58d3d2aSXin Li #  define MAY_HAVE_AVX2(name) name ## _avx2
51*a58d3d2aSXin Li # else
52*a58d3d2aSXin Li #  define MAY_HAVE_AVX2(name) name ## _c
53*a58d3d2aSXin Li # endif
54*a58d3d2aSXin Li 
55*a58d3d2aSXin Li # if defined(OPUS_HAVE_RTCD) && \
56*a58d3d2aSXin Li   ((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
57*a58d3d2aSXin Li   (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
58*a58d3d2aSXin Li   (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
59*a58d3d2aSXin Li   (defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2)))
60*a58d3d2aSXin Li int opus_select_arch(void);
61*a58d3d2aSXin Li # endif
62*a58d3d2aSXin Li 
63*a58d3d2aSXin Li # if defined(OPUS_X86_MAY_HAVE_SSE2)
64*a58d3d2aSXin Li #  include "opus_defines.h"
65*a58d3d2aSXin Li 
66*a58d3d2aSXin Li /*MOVD should not impose any alignment restrictions, but the C standard does,
67*a58d3d2aSXin Li    and UBSan will report errors if we actually make unaligned accesses.
68*a58d3d2aSXin Li   Use this to work around those restrictions (which should hopefully all get
69*a58d3d2aSXin Li    optimized to a single MOVD instruction).
70*a58d3d2aSXin Li   GCC implemented _mm_loadu_si32() since GCC 11; HOWEVER, there is a bug!
71*a58d3d2aSXin Li   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99754 */
72*a58d3d2aSXin Li #  if !defined(_MSC_VER) && !OPUS_GNUC_PREREQ(11,3) && !(defined(__clang__) && (__clang_major__ >= 8))
73*a58d3d2aSXin Li #   include <string.h>
74*a58d3d2aSXin Li #   include <emmintrin.h>
75*a58d3d2aSXin Li 
76*a58d3d2aSXin Li #   ifdef _mm_loadu_si32
77*a58d3d2aSXin Li #    undef _mm_loadu_si32
78*a58d3d2aSXin Li #   endif
79*a58d3d2aSXin Li #   define _mm_loadu_si32 WORKAROUND_mm_loadu_si32
WORKAROUND_mm_loadu_si32(void const * mem_addr)80*a58d3d2aSXin Li static inline __m128i WORKAROUND_mm_loadu_si32(void const* mem_addr) {
81*a58d3d2aSXin Li   int val;
82*a58d3d2aSXin Li   memcpy(&val, mem_addr, sizeof(val));
83*a58d3d2aSXin Li   return _mm_cvtsi32_si128(val);
84*a58d3d2aSXin Li }
85*a58d3d2aSXin Li #  elif defined(_MSC_VER)
86*a58d3d2aSXin Li     /* MSVC needs this for _mm_loadu_si32 */
87*a58d3d2aSXin Li #   include <immintrin.h>
88*a58d3d2aSXin Li #  endif
89*a58d3d2aSXin Li 
90*a58d3d2aSXin Li #  define OP_CVTEPI8_EPI32_M32(x) \
91*a58d3d2aSXin Li  (_mm_cvtepi8_epi32(_mm_loadu_si32(x)))
92*a58d3d2aSXin Li 
93*a58d3d2aSXin Li #  define OP_CVTEPI16_EPI32_M64(x) \
94*a58d3d2aSXin Li  (_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(void*)(x))))
95*a58d3d2aSXin Li 
96*a58d3d2aSXin Li # endif
97*a58d3d2aSXin Li 
98*a58d3d2aSXin Li #endif
99