xref: /aosp_15_r20/external/webp/src/dsp/dec_sse41.c (revision b2055c353e87c8814eb2b6b1b11112a1562253bd)
1*b2055c35SXin Li // Copyright 2015 Google Inc. All Rights Reserved.
2*b2055c35SXin Li //
3*b2055c35SXin Li // Use of this source code is governed by a BSD-style license
4*b2055c35SXin Li // that can be found in the COPYING file in the root of the source
5*b2055c35SXin Li // tree. An additional intellectual property rights grant can be found
6*b2055c35SXin Li // in the file PATENTS. All contributing project authors may
7*b2055c35SXin Li // be found in the AUTHORS file in the root of the source tree.
8*b2055c35SXin Li // -----------------------------------------------------------------------------
9*b2055c35SXin Li //
10*b2055c35SXin Li // SSE4 version of some decoding functions.
11*b2055c35SXin Li //
12*b2055c35SXin Li // Author: Skal ([email protected])
13*b2055c35SXin Li 
14*b2055c35SXin Li #include "src/dsp/dsp.h"
15*b2055c35SXin Li 
16*b2055c35SXin Li #if defined(WEBP_USE_SSE41)
17*b2055c35SXin Li 
18*b2055c35SXin Li #include <smmintrin.h>
19*b2055c35SXin Li #include "src/dec/vp8i_dec.h"
20*b2055c35SXin Li #include "src/utils/utils.h"
21*b2055c35SXin Li 
HE16_SSE41(uint8_t * dst)22*b2055c35SXin Li static void HE16_SSE41(uint8_t* dst) {     // horizontal
23*b2055c35SXin Li   int j;
24*b2055c35SXin Li   const __m128i kShuffle3 = _mm_set1_epi8(3);
25*b2055c35SXin Li   for (j = 16; j > 0; --j) {
26*b2055c35SXin Li     const __m128i in = _mm_cvtsi32_si128(WebPMemToInt32(dst - 4));
27*b2055c35SXin Li     const __m128i values = _mm_shuffle_epi8(in, kShuffle3);
28*b2055c35SXin Li     _mm_storeu_si128((__m128i*)dst, values);
29*b2055c35SXin Li     dst += BPS;
30*b2055c35SXin Li   }
31*b2055c35SXin Li }
32*b2055c35SXin Li 
33*b2055c35SXin Li //------------------------------------------------------------------------------
34*b2055c35SXin Li // Entry point
35*b2055c35SXin Li 
36*b2055c35SXin Li extern void VP8DspInitSSE41(void);
37*b2055c35SXin Li 
VP8DspInitSSE41(void)38*b2055c35SXin Li WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitSSE41(void) {
39*b2055c35SXin Li   VP8PredLuma16[3] = HE16_SSE41;
40*b2055c35SXin Li }
41*b2055c35SXin Li 
42*b2055c35SXin Li #else  // !WEBP_USE_SSE41
43*b2055c35SXin Li 
44*b2055c35SXin Li WEBP_DSP_INIT_STUB(VP8DspInitSSE41)
45*b2055c35SXin Li 
46*b2055c35SXin Li #endif  // WEBP_USE_SSE41
47