1*b2055c35SXin Li // Copyright 2015 Google Inc. All Rights Reserved. 2*b2055c35SXin Li // 3*b2055c35SXin Li // Use of this source code is governed by a BSD-style license 4*b2055c35SXin Li // that can be found in the COPYING file in the root of the source 5*b2055c35SXin Li // tree. An additional intellectual property rights grant can be found 6*b2055c35SXin Li // in the file PATENTS. All contributing project authors may 7*b2055c35SXin Li // be found in the AUTHORS file in the root of the source tree. 8*b2055c35SXin Li // ----------------------------------------------------------------------------- 9*b2055c35SXin Li // 10*b2055c35SXin Li // SSE4 version of some decoding functions. 11*b2055c35SXin Li // 12*b2055c35SXin Li // Author: Skal ([email protected]) 13*b2055c35SXin Li 14*b2055c35SXin Li #include "src/dsp/dsp.h" 15*b2055c35SXin Li 16*b2055c35SXin Li #if defined(WEBP_USE_SSE41) 17*b2055c35SXin Li 18*b2055c35SXin Li #include <smmintrin.h> 19*b2055c35SXin Li #include "src/dec/vp8i_dec.h" 20*b2055c35SXin Li #include "src/utils/utils.h" 21*b2055c35SXin Li HE16_SSE41(uint8_t * dst)22*b2055c35SXin Listatic void HE16_SSE41(uint8_t* dst) { // horizontal 23*b2055c35SXin Li int j; 24*b2055c35SXin Li const __m128i kShuffle3 = _mm_set1_epi8(3); 25*b2055c35SXin Li for (j = 16; j > 0; --j) { 26*b2055c35SXin Li const __m128i in = _mm_cvtsi32_si128(WebPMemToInt32(dst - 4)); 27*b2055c35SXin Li const __m128i values = _mm_shuffle_epi8(in, kShuffle3); 28*b2055c35SXin Li _mm_storeu_si128((__m128i*)dst, values); 29*b2055c35SXin Li dst += BPS; 30*b2055c35SXin Li } 31*b2055c35SXin Li } 32*b2055c35SXin Li 33*b2055c35SXin Li //------------------------------------------------------------------------------ 34*b2055c35SXin Li // Entry point 35*b2055c35SXin Li 36*b2055c35SXin Li extern void VP8DspInitSSE41(void); 37*b2055c35SXin Li VP8DspInitSSE41(void)38*b2055c35SXin LiWEBP_TSAN_IGNORE_FUNCTION void VP8DspInitSSE41(void) { 39*b2055c35SXin Li VP8PredLuma16[3] = HE16_SSE41; 40*b2055c35SXin Li } 41*b2055c35SXin Li 42*b2055c35SXin Li #else // !WEBP_USE_SSE41 43*b2055c35SXin Li 44*b2055c35SXin Li WEBP_DSP_INIT_STUB(VP8DspInitSSE41) 45*b2055c35SXin Li 46*b2055c35SXin Li #endif // WEBP_USE_SSE41 47