xref: /aosp_15_r20/external/libaom/av1/common/x86/reconinter_ssse3.c (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1*77c1e3ccSAndroid Build Coastguard Worker /*
2*77c1e3ccSAndroid Build Coastguard Worker  * Copyright (c) 2018, Alliance for Open Media. All rights reserved.
3*77c1e3ccSAndroid Build Coastguard Worker  *
4*77c1e3ccSAndroid Build Coastguard Worker  * This source code is subject to the terms of the BSD 2 Clause License and
5*77c1e3ccSAndroid Build Coastguard Worker  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6*77c1e3ccSAndroid Build Coastguard Worker  * was not distributed with this source code in the LICENSE file, you can
7*77c1e3ccSAndroid Build Coastguard Worker  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8*77c1e3ccSAndroid Build Coastguard Worker  * Media Patent License 1.0 was not distributed with this source code in the
9*77c1e3ccSAndroid Build Coastguard Worker  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10*77c1e3ccSAndroid Build Coastguard Worker  */
11*77c1e3ccSAndroid Build Coastguard Worker 
12*77c1e3ccSAndroid Build Coastguard Worker #include "config/av1_rtcd.h"
13*77c1e3ccSAndroid Build Coastguard Worker 
14*77c1e3ccSAndroid Build Coastguard Worker #if CONFIG_AV1_HIGHBITDEPTH
15*77c1e3ccSAndroid Build Coastguard Worker 
16*77c1e3ccSAndroid Build Coastguard Worker #include <tmmintrin.h>
17*77c1e3ccSAndroid Build Coastguard Worker 
18*77c1e3ccSAndroid Build Coastguard Worker #include "aom/aom_integer.h"
19*77c1e3ccSAndroid Build Coastguard Worker #include "aom_dsp/blend.h"
20*77c1e3ccSAndroid Build Coastguard Worker #include "aom_dsp/x86/synonyms.h"
21*77c1e3ccSAndroid Build Coastguard Worker #include "av1/common/blockd.h"
22*77c1e3ccSAndroid Build Coastguard Worker 
av1_build_compound_diffwtd_mask_highbd_ssse3(uint8_t * mask,DIFFWTD_MASK_TYPE mask_type,const uint8_t * src0,int src0_stride,const uint8_t * src1,int src1_stride,int h,int w,int bd)23*77c1e3ccSAndroid Build Coastguard Worker void av1_build_compound_diffwtd_mask_highbd_ssse3(
24*77c1e3ccSAndroid Build Coastguard Worker     uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0,
25*77c1e3ccSAndroid Build Coastguard Worker     int src0_stride, const uint8_t *src1, int src1_stride, int h, int w,
26*77c1e3ccSAndroid Build Coastguard Worker     int bd) {
27*77c1e3ccSAndroid Build Coastguard Worker   if (w < 8) {
28*77c1e3ccSAndroid Build Coastguard Worker     av1_build_compound_diffwtd_mask_highbd_c(mask, mask_type, src0, src0_stride,
29*77c1e3ccSAndroid Build Coastguard Worker                                              src1, src1_stride, h, w, bd);
30*77c1e3ccSAndroid Build Coastguard Worker   } else {
31*77c1e3ccSAndroid Build Coastguard Worker     assert(bd >= 8);
32*77c1e3ccSAndroid Build Coastguard Worker     assert((w % 8) == 0);
33*77c1e3ccSAndroid Build Coastguard Worker     assert(mask_type == DIFFWTD_38 || mask_type == DIFFWTD_38_INV);
34*77c1e3ccSAndroid Build Coastguard Worker     const __m128i x0 = _mm_setzero_si128();
35*77c1e3ccSAndroid Build Coastguard Worker     const __m128i xAOM_BLEND_A64_MAX_ALPHA =
36*77c1e3ccSAndroid Build Coastguard Worker         _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
37*77c1e3ccSAndroid Build Coastguard Worker     const int mask_base = 38;
38*77c1e3ccSAndroid Build Coastguard Worker     const __m128i xmask_base = _mm_set1_epi16(mask_base);
39*77c1e3ccSAndroid Build Coastguard Worker     const uint16_t *ssrc0 = CONVERT_TO_SHORTPTR(src0);
40*77c1e3ccSAndroid Build Coastguard Worker     const uint16_t *ssrc1 = CONVERT_TO_SHORTPTR(src1);
41*77c1e3ccSAndroid Build Coastguard Worker     if (bd == 8) {
42*77c1e3ccSAndroid Build Coastguard Worker       if (mask_type == DIFFWTD_38_INV) {
43*77c1e3ccSAndroid Build Coastguard Worker         for (int i = 0; i < h; ++i) {
44*77c1e3ccSAndroid Build Coastguard Worker           for (int j = 0; j < w; j += 8) {
45*77c1e3ccSAndroid Build Coastguard Worker             __m128i s0 = _mm_loadu_si128((const __m128i *)&ssrc0[j]);
46*77c1e3ccSAndroid Build Coastguard Worker             __m128i s1 = _mm_loadu_si128((const __m128i *)&ssrc1[j]);
47*77c1e3ccSAndroid Build Coastguard Worker             __m128i diff = _mm_srai_epi16(_mm_abs_epi16(_mm_sub_epi16(s0, s1)),
48*77c1e3ccSAndroid Build Coastguard Worker                                           DIFF_FACTOR_LOG2);
49*77c1e3ccSAndroid Build Coastguard Worker             __m128i m = _mm_min_epi16(
50*77c1e3ccSAndroid Build Coastguard Worker                 _mm_max_epi16(x0, _mm_add_epi16(diff, xmask_base)),
51*77c1e3ccSAndroid Build Coastguard Worker                 xAOM_BLEND_A64_MAX_ALPHA);
52*77c1e3ccSAndroid Build Coastguard Worker             m = _mm_sub_epi16(xAOM_BLEND_A64_MAX_ALPHA, m);
53*77c1e3ccSAndroid Build Coastguard Worker             m = _mm_packus_epi16(m, m);
54*77c1e3ccSAndroid Build Coastguard Worker             _mm_storel_epi64((__m128i *)&mask[j], m);
55*77c1e3ccSAndroid Build Coastguard Worker           }
56*77c1e3ccSAndroid Build Coastguard Worker           ssrc0 += src0_stride;
57*77c1e3ccSAndroid Build Coastguard Worker           ssrc1 += src1_stride;
58*77c1e3ccSAndroid Build Coastguard Worker           mask += w;
59*77c1e3ccSAndroid Build Coastguard Worker         }
60*77c1e3ccSAndroid Build Coastguard Worker       } else {
61*77c1e3ccSAndroid Build Coastguard Worker         for (int i = 0; i < h; ++i) {
62*77c1e3ccSAndroid Build Coastguard Worker           for (int j = 0; j < w; j += 8) {
63*77c1e3ccSAndroid Build Coastguard Worker             __m128i s0 = _mm_loadu_si128((const __m128i *)&ssrc0[j]);
64*77c1e3ccSAndroid Build Coastguard Worker             __m128i s1 = _mm_loadu_si128((const __m128i *)&ssrc1[j]);
65*77c1e3ccSAndroid Build Coastguard Worker             __m128i diff = _mm_srai_epi16(_mm_abs_epi16(_mm_sub_epi16(s0, s1)),
66*77c1e3ccSAndroid Build Coastguard Worker                                           DIFF_FACTOR_LOG2);
67*77c1e3ccSAndroid Build Coastguard Worker             __m128i m = _mm_min_epi16(
68*77c1e3ccSAndroid Build Coastguard Worker                 _mm_max_epi16(x0, _mm_add_epi16(diff, xmask_base)),
69*77c1e3ccSAndroid Build Coastguard Worker                 xAOM_BLEND_A64_MAX_ALPHA);
70*77c1e3ccSAndroid Build Coastguard Worker             m = _mm_packus_epi16(m, m);
71*77c1e3ccSAndroid Build Coastguard Worker             _mm_storel_epi64((__m128i *)&mask[j], m);
72*77c1e3ccSAndroid Build Coastguard Worker           }
73*77c1e3ccSAndroid Build Coastguard Worker           ssrc0 += src0_stride;
74*77c1e3ccSAndroid Build Coastguard Worker           ssrc1 += src1_stride;
75*77c1e3ccSAndroid Build Coastguard Worker           mask += w;
76*77c1e3ccSAndroid Build Coastguard Worker         }
77*77c1e3ccSAndroid Build Coastguard Worker       }
78*77c1e3ccSAndroid Build Coastguard Worker     } else {
79*77c1e3ccSAndroid Build Coastguard Worker       const __m128i xshift = _mm_set1_epi64x(bd - 8 + DIFF_FACTOR_LOG2);
80*77c1e3ccSAndroid Build Coastguard Worker       if (mask_type == DIFFWTD_38_INV) {
81*77c1e3ccSAndroid Build Coastguard Worker         for (int i = 0; i < h; ++i) {
82*77c1e3ccSAndroid Build Coastguard Worker           for (int j = 0; j < w; j += 8) {
83*77c1e3ccSAndroid Build Coastguard Worker             __m128i s0 = _mm_loadu_si128((const __m128i *)&ssrc0[j]);
84*77c1e3ccSAndroid Build Coastguard Worker             __m128i s1 = _mm_loadu_si128((const __m128i *)&ssrc1[j]);
85*77c1e3ccSAndroid Build Coastguard Worker             __m128i diff =
86*77c1e3ccSAndroid Build Coastguard Worker                 _mm_sra_epi16(_mm_abs_epi16(_mm_sub_epi16(s0, s1)), xshift);
87*77c1e3ccSAndroid Build Coastguard Worker             __m128i m = _mm_min_epi16(
88*77c1e3ccSAndroid Build Coastguard Worker                 _mm_max_epi16(x0, _mm_add_epi16(diff, xmask_base)),
89*77c1e3ccSAndroid Build Coastguard Worker                 xAOM_BLEND_A64_MAX_ALPHA);
90*77c1e3ccSAndroid Build Coastguard Worker             m = _mm_sub_epi16(xAOM_BLEND_A64_MAX_ALPHA, m);
91*77c1e3ccSAndroid Build Coastguard Worker             m = _mm_packus_epi16(m, m);
92*77c1e3ccSAndroid Build Coastguard Worker             _mm_storel_epi64((__m128i *)&mask[j], m);
93*77c1e3ccSAndroid Build Coastguard Worker           }
94*77c1e3ccSAndroid Build Coastguard Worker           ssrc0 += src0_stride;
95*77c1e3ccSAndroid Build Coastguard Worker           ssrc1 += src1_stride;
96*77c1e3ccSAndroid Build Coastguard Worker           mask += w;
97*77c1e3ccSAndroid Build Coastguard Worker         }
98*77c1e3ccSAndroid Build Coastguard Worker       } else {
99*77c1e3ccSAndroid Build Coastguard Worker         for (int i = 0; i < h; ++i) {
100*77c1e3ccSAndroid Build Coastguard Worker           for (int j = 0; j < w; j += 8) {
101*77c1e3ccSAndroid Build Coastguard Worker             __m128i s0 = _mm_loadu_si128((const __m128i *)&ssrc0[j]);
102*77c1e3ccSAndroid Build Coastguard Worker             __m128i s1 = _mm_loadu_si128((const __m128i *)&ssrc1[j]);
103*77c1e3ccSAndroid Build Coastguard Worker             __m128i diff =
104*77c1e3ccSAndroid Build Coastguard Worker                 _mm_sra_epi16(_mm_abs_epi16(_mm_sub_epi16(s0, s1)), xshift);
105*77c1e3ccSAndroid Build Coastguard Worker             __m128i m = _mm_min_epi16(
106*77c1e3ccSAndroid Build Coastguard Worker                 _mm_max_epi16(x0, _mm_add_epi16(diff, xmask_base)),
107*77c1e3ccSAndroid Build Coastguard Worker                 xAOM_BLEND_A64_MAX_ALPHA);
108*77c1e3ccSAndroid Build Coastguard Worker             m = _mm_packus_epi16(m, m);
109*77c1e3ccSAndroid Build Coastguard Worker             _mm_storel_epi64((__m128i *)&mask[j], m);
110*77c1e3ccSAndroid Build Coastguard Worker           }
111*77c1e3ccSAndroid Build Coastguard Worker           ssrc0 += src0_stride;
112*77c1e3ccSAndroid Build Coastguard Worker           ssrc1 += src1_stride;
113*77c1e3ccSAndroid Build Coastguard Worker           mask += w;
114*77c1e3ccSAndroid Build Coastguard Worker         }
115*77c1e3ccSAndroid Build Coastguard Worker       }
116*77c1e3ccSAndroid Build Coastguard Worker     }
117*77c1e3ccSAndroid Build Coastguard Worker   }
118*77c1e3ccSAndroid Build Coastguard Worker }
119*77c1e3ccSAndroid Build Coastguard Worker 
120*77c1e3ccSAndroid Build Coastguard Worker #endif  // CONFIG_AV1_HIGHBITDEPTH
121