xref: /aosp_15_r20/external/libgav1/src/dsp/x86/common_sse4.h (revision 095378508e87ed692bf8dfeb34008b65b3735891)
1*09537850SAkhilesh Sanikop /*
2*09537850SAkhilesh Sanikop  * Copyright 2019 The libgav1 Authors
3*09537850SAkhilesh Sanikop  *
4*09537850SAkhilesh Sanikop  * Licensed under the Apache License, Version 2.0 (the "License");
5*09537850SAkhilesh Sanikop  * you may not use this file except in compliance with the License.
6*09537850SAkhilesh Sanikop  * You may obtain a copy of the License at
7*09537850SAkhilesh Sanikop  *
8*09537850SAkhilesh Sanikop  *      http://www.apache.org/licenses/LICENSE-2.0
9*09537850SAkhilesh Sanikop  *
10*09537850SAkhilesh Sanikop  * Unless required by applicable law or agreed to in writing, software
11*09537850SAkhilesh Sanikop  * distributed under the License is distributed on an "AS IS" BASIS,
12*09537850SAkhilesh Sanikop  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*09537850SAkhilesh Sanikop  * See the License for the specific language governing permissions and
14*09537850SAkhilesh Sanikop  * limitations under the License.
15*09537850SAkhilesh Sanikop  */
16*09537850SAkhilesh Sanikop 
17*09537850SAkhilesh Sanikop #ifndef LIBGAV1_SRC_DSP_X86_COMMON_SSE4_H_
18*09537850SAkhilesh Sanikop #define LIBGAV1_SRC_DSP_X86_COMMON_SSE4_H_
19*09537850SAkhilesh Sanikop 
20*09537850SAkhilesh Sanikop #include "src/utils/compiler_attributes.h"
21*09537850SAkhilesh Sanikop #include "src/utils/cpu.h"
22*09537850SAkhilesh Sanikop 
23*09537850SAkhilesh Sanikop #if LIBGAV1_TARGETING_SSE4_1
24*09537850SAkhilesh Sanikop 
25*09537850SAkhilesh Sanikop #include <emmintrin.h>
26*09537850SAkhilesh Sanikop #include <smmintrin.h>
27*09537850SAkhilesh Sanikop 
28*09537850SAkhilesh Sanikop #include <cassert>
29*09537850SAkhilesh Sanikop #include <cstddef>
30*09537850SAkhilesh Sanikop #include <cstdint>
31*09537850SAkhilesh Sanikop #include <cstring>
32*09537850SAkhilesh Sanikop 
33*09537850SAkhilesh Sanikop #if 0
34*09537850SAkhilesh Sanikop #include <cinttypes>
35*09537850SAkhilesh Sanikop #include <cstdio>
36*09537850SAkhilesh Sanikop 
37*09537850SAkhilesh Sanikop // Quite useful macro for debugging. Left here for convenience.
38*09537850SAkhilesh Sanikop inline void PrintReg(const __m128i r, const char* const name, int size) {
39*09537850SAkhilesh Sanikop   int n;
40*09537850SAkhilesh Sanikop   union {
41*09537850SAkhilesh Sanikop     __m128i r;
42*09537850SAkhilesh Sanikop     uint8_t i8[16];
43*09537850SAkhilesh Sanikop     uint16_t i16[8];
44*09537850SAkhilesh Sanikop     uint32_t i32[4];
45*09537850SAkhilesh Sanikop     uint64_t i64[2];
46*09537850SAkhilesh Sanikop   } tmp;
47*09537850SAkhilesh Sanikop   tmp.r = r;
48*09537850SAkhilesh Sanikop   fprintf(stderr, "%s\t: ", name);
49*09537850SAkhilesh Sanikop   if (size == 8) {
50*09537850SAkhilesh Sanikop     for (n = 0; n < 16; ++n) fprintf(stderr, "%.2x ", tmp.i8[n]);
51*09537850SAkhilesh Sanikop   } else if (size == 16) {
52*09537850SAkhilesh Sanikop     for (n = 0; n < 8; ++n) fprintf(stderr, "%.4x ", tmp.i16[n]);
53*09537850SAkhilesh Sanikop   } else if (size == 32) {
54*09537850SAkhilesh Sanikop     for (n = 0; n < 4; ++n) fprintf(stderr, "%.8x ", tmp.i32[n]);
55*09537850SAkhilesh Sanikop   } else {
56*09537850SAkhilesh Sanikop     for (n = 0; n < 2; ++n)
57*09537850SAkhilesh Sanikop       fprintf(stderr, "%.16" PRIx64 " ", static_cast<uint64_t>(tmp.i64[n]));
58*09537850SAkhilesh Sanikop   }
59*09537850SAkhilesh Sanikop   fprintf(stderr, "\n");
60*09537850SAkhilesh Sanikop }
61*09537850SAkhilesh Sanikop 
62*09537850SAkhilesh Sanikop inline void PrintReg(const int r, const char* const name) {
63*09537850SAkhilesh Sanikop   fprintf(stderr, "%s: %d\n", name, r);
64*09537850SAkhilesh Sanikop }
65*09537850SAkhilesh Sanikop 
66*09537850SAkhilesh Sanikop inline void PrintRegX(const int r, const char* const name) {
67*09537850SAkhilesh Sanikop   fprintf(stderr, "%s: %.8x\n", name, r);
68*09537850SAkhilesh Sanikop }
69*09537850SAkhilesh Sanikop 
70*09537850SAkhilesh Sanikop #define PR(var, N) PrintReg(var, #var, N)
71*09537850SAkhilesh Sanikop #define PD(var) PrintReg(var, #var);
72*09537850SAkhilesh Sanikop #define PX(var) PrintRegX(var, #var);
73*09537850SAkhilesh Sanikop 
74*09537850SAkhilesh Sanikop #if LIBGAV1_MSAN
75*09537850SAkhilesh Sanikop #include <sanitizer/msan_interface.h>
76*09537850SAkhilesh Sanikop 
77*09537850SAkhilesh Sanikop inline void PrintShadow(const void* r, const char* const name,
78*09537850SAkhilesh Sanikop                         const size_t size) {
79*09537850SAkhilesh Sanikop   fprintf(stderr, "Shadow for %s:\n", name);
80*09537850SAkhilesh Sanikop   __msan_print_shadow(r, size);
81*09537850SAkhilesh Sanikop }
82*09537850SAkhilesh Sanikop #define PS(var, N) PrintShadow(var, #var, N)
83*09537850SAkhilesh Sanikop 
84*09537850SAkhilesh Sanikop #endif  // LIBGAV1_MSAN
85*09537850SAkhilesh Sanikop 
86*09537850SAkhilesh Sanikop #endif  // 0
87*09537850SAkhilesh Sanikop 
88*09537850SAkhilesh Sanikop namespace libgav1 {
89*09537850SAkhilesh Sanikop namespace dsp {
90*09537850SAkhilesh Sanikop namespace sse4 {
91*09537850SAkhilesh Sanikop 
92*09537850SAkhilesh Sanikop #include "src/dsp/x86/common_sse4.inc"
93*09537850SAkhilesh Sanikop 
94*09537850SAkhilesh Sanikop }  // namespace sse4
95*09537850SAkhilesh Sanikop 
96*09537850SAkhilesh Sanikop // NOLINTBEGIN(misc-unused-using-decls)
97*09537850SAkhilesh Sanikop // These function aliases shall not be visible to external code. They are
98*09537850SAkhilesh Sanikop // restricted to x86/*_sse4.cc files only. This scheme exists to distinguish two
99*09537850SAkhilesh Sanikop // possible implementations of common functions, which may differ based on
100*09537850SAkhilesh Sanikop // whether the compiler is permitted to use avx2 instructions.
101*09537850SAkhilesh Sanikop using sse4::Load2;
102*09537850SAkhilesh Sanikop using sse4::Load2x2;
103*09537850SAkhilesh Sanikop using sse4::Load4;
104*09537850SAkhilesh Sanikop using sse4::Load4x2;
105*09537850SAkhilesh Sanikop using sse4::LoadAligned16;
106*09537850SAkhilesh Sanikop using sse4::LoadAligned16Msan;
107*09537850SAkhilesh Sanikop using sse4::LoadHi8;
108*09537850SAkhilesh Sanikop using sse4::LoadHi8Msan;
109*09537850SAkhilesh Sanikop using sse4::LoadLo8;
110*09537850SAkhilesh Sanikop using sse4::LoadLo8Msan;
111*09537850SAkhilesh Sanikop using sse4::LoadUnaligned16;
112*09537850SAkhilesh Sanikop using sse4::LoadUnaligned16Msan;
113*09537850SAkhilesh Sanikop using sse4::MaskHighNBytes;
114*09537850SAkhilesh Sanikop using sse4::RightShiftWithRounding_S16;
115*09537850SAkhilesh Sanikop using sse4::RightShiftWithRounding_S32;
116*09537850SAkhilesh Sanikop using sse4::RightShiftWithRounding_U16;
117*09537850SAkhilesh Sanikop using sse4::RightShiftWithRounding_U32;
118*09537850SAkhilesh Sanikop using sse4::Store2;
119*09537850SAkhilesh Sanikop using sse4::Store4;
120*09537850SAkhilesh Sanikop using sse4::StoreAligned16;
121*09537850SAkhilesh Sanikop using sse4::StoreHi8;
122*09537850SAkhilesh Sanikop using sse4::StoreLo8;
123*09537850SAkhilesh Sanikop using sse4::StoreUnaligned16;
124*09537850SAkhilesh Sanikop // NOLINTEND
125*09537850SAkhilesh Sanikop 
126*09537850SAkhilesh Sanikop }  // namespace dsp
127*09537850SAkhilesh Sanikop }  // namespace libgav1
128*09537850SAkhilesh Sanikop 
129*09537850SAkhilesh Sanikop #endif  // LIBGAV1_TARGETING_SSE4_1
130*09537850SAkhilesh Sanikop #endif  // LIBGAV1_SRC_DSP_X86_COMMON_SSE4_H_
131