1*09537850SAkhilesh Sanikop /* 2*09537850SAkhilesh Sanikop * Copyright 2019 The libgav1 Authors 3*09537850SAkhilesh Sanikop * 4*09537850SAkhilesh Sanikop * Licensed under the Apache License, Version 2.0 (the "License"); 5*09537850SAkhilesh Sanikop * you may not use this file except in compliance with the License. 6*09537850SAkhilesh Sanikop * You may obtain a copy of the License at 7*09537850SAkhilesh Sanikop * 8*09537850SAkhilesh Sanikop * http://www.apache.org/licenses/LICENSE-2.0 9*09537850SAkhilesh Sanikop * 10*09537850SAkhilesh Sanikop * Unless required by applicable law or agreed to in writing, software 11*09537850SAkhilesh Sanikop * distributed under the License is distributed on an "AS IS" BASIS, 12*09537850SAkhilesh Sanikop * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13*09537850SAkhilesh Sanikop * See the License for the specific language governing permissions and 14*09537850SAkhilesh Sanikop * limitations under the License. 15*09537850SAkhilesh Sanikop */ 16*09537850SAkhilesh Sanikop 17*09537850SAkhilesh Sanikop #ifndef LIBGAV1_SRC_DSP_X86_COMMON_SSE4_H_ 18*09537850SAkhilesh Sanikop #define LIBGAV1_SRC_DSP_X86_COMMON_SSE4_H_ 19*09537850SAkhilesh Sanikop 20*09537850SAkhilesh Sanikop #include "src/utils/compiler_attributes.h" 21*09537850SAkhilesh Sanikop #include "src/utils/cpu.h" 22*09537850SAkhilesh Sanikop 23*09537850SAkhilesh Sanikop #if LIBGAV1_TARGETING_SSE4_1 24*09537850SAkhilesh Sanikop 25*09537850SAkhilesh Sanikop #include <emmintrin.h> 26*09537850SAkhilesh Sanikop #include <smmintrin.h> 27*09537850SAkhilesh Sanikop 28*09537850SAkhilesh Sanikop #include <cassert> 29*09537850SAkhilesh Sanikop #include <cstddef> 30*09537850SAkhilesh Sanikop #include <cstdint> 31*09537850SAkhilesh Sanikop #include <cstring> 32*09537850SAkhilesh Sanikop 33*09537850SAkhilesh Sanikop #if 0 34*09537850SAkhilesh Sanikop #include <cinttypes> 35*09537850SAkhilesh Sanikop #include <cstdio> 36*09537850SAkhilesh Sanikop 37*09537850SAkhilesh Sanikop // Quite useful macro for debugging. Left here for convenience. 38*09537850SAkhilesh Sanikop inline void PrintReg(const __m128i r, const char* const name, int size) { 39*09537850SAkhilesh Sanikop int n; 40*09537850SAkhilesh Sanikop union { 41*09537850SAkhilesh Sanikop __m128i r; 42*09537850SAkhilesh Sanikop uint8_t i8[16]; 43*09537850SAkhilesh Sanikop uint16_t i16[8]; 44*09537850SAkhilesh Sanikop uint32_t i32[4]; 45*09537850SAkhilesh Sanikop uint64_t i64[2]; 46*09537850SAkhilesh Sanikop } tmp; 47*09537850SAkhilesh Sanikop tmp.r = r; 48*09537850SAkhilesh Sanikop fprintf(stderr, "%s\t: ", name); 49*09537850SAkhilesh Sanikop if (size == 8) { 50*09537850SAkhilesh Sanikop for (n = 0; n < 16; ++n) fprintf(stderr, "%.2x ", tmp.i8[n]); 51*09537850SAkhilesh Sanikop } else if (size == 16) { 52*09537850SAkhilesh Sanikop for (n = 0; n < 8; ++n) fprintf(stderr, "%.4x ", tmp.i16[n]); 53*09537850SAkhilesh Sanikop } else if (size == 32) { 54*09537850SAkhilesh Sanikop for (n = 0; n < 4; ++n) fprintf(stderr, "%.8x ", tmp.i32[n]); 55*09537850SAkhilesh Sanikop } else { 56*09537850SAkhilesh Sanikop for (n = 0; n < 2; ++n) 57*09537850SAkhilesh Sanikop fprintf(stderr, "%.16" PRIx64 " ", static_cast<uint64_t>(tmp.i64[n])); 58*09537850SAkhilesh Sanikop } 59*09537850SAkhilesh Sanikop fprintf(stderr, "\n"); 60*09537850SAkhilesh Sanikop } 61*09537850SAkhilesh Sanikop 62*09537850SAkhilesh Sanikop inline void PrintReg(const int r, const char* const name) { 63*09537850SAkhilesh Sanikop fprintf(stderr, "%s: %d\n", name, r); 64*09537850SAkhilesh Sanikop } 65*09537850SAkhilesh Sanikop 66*09537850SAkhilesh Sanikop inline void PrintRegX(const int r, const char* const name) { 67*09537850SAkhilesh Sanikop fprintf(stderr, "%s: %.8x\n", name, r); 68*09537850SAkhilesh Sanikop } 69*09537850SAkhilesh Sanikop 70*09537850SAkhilesh Sanikop #define PR(var, N) PrintReg(var, #var, N) 71*09537850SAkhilesh Sanikop #define PD(var) PrintReg(var, #var); 72*09537850SAkhilesh Sanikop #define PX(var) PrintRegX(var, #var); 73*09537850SAkhilesh Sanikop 74*09537850SAkhilesh Sanikop #if LIBGAV1_MSAN 75*09537850SAkhilesh Sanikop #include <sanitizer/msan_interface.h> 76*09537850SAkhilesh Sanikop 77*09537850SAkhilesh Sanikop inline void PrintShadow(const void* r, const char* const name, 78*09537850SAkhilesh Sanikop const size_t size) { 79*09537850SAkhilesh Sanikop fprintf(stderr, "Shadow for %s:\n", name); 80*09537850SAkhilesh Sanikop __msan_print_shadow(r, size); 81*09537850SAkhilesh Sanikop } 82*09537850SAkhilesh Sanikop #define PS(var, N) PrintShadow(var, #var, N) 83*09537850SAkhilesh Sanikop 84*09537850SAkhilesh Sanikop #endif // LIBGAV1_MSAN 85*09537850SAkhilesh Sanikop 86*09537850SAkhilesh Sanikop #endif // 0 87*09537850SAkhilesh Sanikop 88*09537850SAkhilesh Sanikop namespace libgav1 { 89*09537850SAkhilesh Sanikop namespace dsp { 90*09537850SAkhilesh Sanikop namespace sse4 { 91*09537850SAkhilesh Sanikop 92*09537850SAkhilesh Sanikop #include "src/dsp/x86/common_sse4.inc" 93*09537850SAkhilesh Sanikop 94*09537850SAkhilesh Sanikop } // namespace sse4 95*09537850SAkhilesh Sanikop 96*09537850SAkhilesh Sanikop // NOLINTBEGIN(misc-unused-using-decls) 97*09537850SAkhilesh Sanikop // These function aliases shall not be visible to external code. They are 98*09537850SAkhilesh Sanikop // restricted to x86/*_sse4.cc files only. This scheme exists to distinguish two 99*09537850SAkhilesh Sanikop // possible implementations of common functions, which may differ based on 100*09537850SAkhilesh Sanikop // whether the compiler is permitted to use avx2 instructions. 101*09537850SAkhilesh Sanikop using sse4::Load2; 102*09537850SAkhilesh Sanikop using sse4::Load2x2; 103*09537850SAkhilesh Sanikop using sse4::Load4; 104*09537850SAkhilesh Sanikop using sse4::Load4x2; 105*09537850SAkhilesh Sanikop using sse4::LoadAligned16; 106*09537850SAkhilesh Sanikop using sse4::LoadAligned16Msan; 107*09537850SAkhilesh Sanikop using sse4::LoadHi8; 108*09537850SAkhilesh Sanikop using sse4::LoadHi8Msan; 109*09537850SAkhilesh Sanikop using sse4::LoadLo8; 110*09537850SAkhilesh Sanikop using sse4::LoadLo8Msan; 111*09537850SAkhilesh Sanikop using sse4::LoadUnaligned16; 112*09537850SAkhilesh Sanikop using sse4::LoadUnaligned16Msan; 113*09537850SAkhilesh Sanikop using sse4::MaskHighNBytes; 114*09537850SAkhilesh Sanikop using sse4::RightShiftWithRounding_S16; 115*09537850SAkhilesh Sanikop using sse4::RightShiftWithRounding_S32; 116*09537850SAkhilesh Sanikop using sse4::RightShiftWithRounding_U16; 117*09537850SAkhilesh Sanikop using sse4::RightShiftWithRounding_U32; 118*09537850SAkhilesh Sanikop using sse4::Store2; 119*09537850SAkhilesh Sanikop using sse4::Store4; 120*09537850SAkhilesh Sanikop using sse4::StoreAligned16; 121*09537850SAkhilesh Sanikop using sse4::StoreHi8; 122*09537850SAkhilesh Sanikop using sse4::StoreLo8; 123*09537850SAkhilesh Sanikop using sse4::StoreUnaligned16; 124*09537850SAkhilesh Sanikop // NOLINTEND 125*09537850SAkhilesh Sanikop 126*09537850SAkhilesh Sanikop } // namespace dsp 127*09537850SAkhilesh Sanikop } // namespace libgav1 128*09537850SAkhilesh Sanikop 129*09537850SAkhilesh Sanikop #endif // LIBGAV1_TARGETING_SSE4_1 130*09537850SAkhilesh Sanikop #endif // LIBGAV1_SRC_DSP_X86_COMMON_SSE4_H_ 131