1 /*
2 * Copyright (c) 2022 Samsung Electronics Co., Ltd.
3 * All Rights Reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * - Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * - Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * - Neither the name of the copyright owner, nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include "oapv_def.h"
33 #include <math.h>
34
35 /* SAD for 16bit **************************************************************/
oapv_sad_16b(int w,int h,void * src1,void * src2,int s_src1,int s_src2,int bit_depth)36 int oapv_sad_16b(int w, int h, void *src1, void *src2, int s_src1, int s_src2, int bit_depth)
37 {
38 u16 *s1;
39 s16 *s2;
40 int i, j, sad;
41
42 s1 = (u16 *)src1;
43 s2 = (s16 *)src2;
44
45 sad = 0;
46
47 for(i = 0; i < h; i++) {
48 for(j = 0; j < w; j++) {
49 sad += oapv_abs16((s16)s1[j] - (s16)s2[j]);
50 }
51 s1 += s_src1;
52 s2 += s_src2;
53 }
54
55 return (sad >> (bit_depth - 8));
56 }
57
58 const oapv_fn_sad_t oapv_tbl_fn_sad_16b[2] = {
59 oapv_sad_16b,
60 NULL
61 };
62
63 /* DIFF **********************************************************************/
oapv_diff_16b(int w,int h,void * src1,void * src2,int s_src1,int s_src2,int s_diff,s16 * diff,int bit_depth)64 void oapv_diff_16b(int w, int h, void *src1, void *src2, int s_src1, int s_src2, int s_diff, s16 *diff, int bit_depth)
65 {
66 s16 *s1;
67 s16 *s2;
68 int i, j;
69
70 s1 = (s16 *)src1;
71 s2 = (s16 *)src2;
72
73 for(i = 0; i < h; i++) {
74 for(j = 0; j < w; j++) {
75 diff[j] = (s16)s1[j] - (s16)s2[j];
76 }
77 diff += s_diff;
78 s1 += s_src1;
79 s2 += s_src2;
80 }
81 }
82
83 const oapv_fn_diff_t oapv_tbl_fn_diff_16b[2] = {
84 oapv_diff_16b,
85 NULL
86 };
87
88 /* SSD ***********************************************************************/
oapv_ssd_16b(int w,int h,void * src1,void * src2,int s_src1,int s_src2,int bit_depth)89 s64 oapv_ssd_16b(int w, int h, void *src1, void *src2, int s_src1, int s_src2, int bit_depth)
90 {
91 s16 *s1;
92 s16 *s2;
93 int i, j, diff;
94 s64 ssd;
95
96 s1 = (s16 *)src1;
97 s2 = (s16 *)src2;
98
99 ssd = 0;
100
101 for(i = 0; i < h; i++) {
102 for(j = 0; j < w; j++) {
103 diff = s1[j] - s2[j];
104 ssd += (diff * diff);
105 }
106 s1 += s_src1;
107 s2 += s_src2;
108 }
109 return ssd;
110 }
111
112 const oapv_fn_ssd_t oapv_tbl_fn_ssd_16b[2] = {
113 oapv_ssd_16b,
114 NULL
115 };
116
oapv_dc_removed_had8x8(pel * org,int s_org)117 int oapv_dc_removed_had8x8(pel *org, int s_org)
118 {
119 int k, i, j, jj;
120 int satd = 0;
121 int sub[64], interm1[8][8], interm2[8][8], interm3[8][8];
122 pel *orgn = org;
123
124 for(k = 0; k < 64; k += 8) {
125 sub[k + 0] = orgn[0];
126 sub[k + 1] = orgn[1];
127 sub[k + 2] = orgn[2];
128 sub[k + 3] = orgn[3];
129 sub[k + 4] = orgn[4];
130 sub[k + 5] = orgn[5];
131 sub[k + 6] = orgn[6];
132 sub[k + 7] = orgn[7];
133 orgn += s_org;
134 }
135
136 /* horizontal */
137 for(j = 0; j < 8; j++) {
138 jj = j << 3;
139 interm2[j][0] = sub[jj] + sub[jj + 4];
140 interm2[j][1] = sub[jj + 1] + sub[jj + 5];
141 interm2[j][2] = sub[jj + 2] + sub[jj + 6];
142 interm2[j][3] = sub[jj + 3] + sub[jj + 7];
143 interm2[j][4] = sub[jj] - sub[jj + 4];
144 interm2[j][5] = sub[jj + 1] - sub[jj + 5];
145 interm2[j][6] = sub[jj + 2] - sub[jj + 6];
146 interm2[j][7] = sub[jj + 3] - sub[jj + 7];
147
148 interm1[j][0] = interm2[j][0] + interm2[j][2];
149 interm1[j][1] = interm2[j][1] + interm2[j][3];
150 interm1[j][2] = interm2[j][0] - interm2[j][2];
151 interm1[j][3] = interm2[j][1] - interm2[j][3];
152 interm1[j][4] = interm2[j][4] + interm2[j][6];
153 interm1[j][5] = interm2[j][5] + interm2[j][7];
154 interm1[j][6] = interm2[j][4] - interm2[j][6];
155 interm1[j][7] = interm2[j][5] - interm2[j][7];
156
157 interm2[j][0] = interm1[j][0] + interm1[j][1];
158 interm2[j][1] = interm1[j][0] - interm1[j][1];
159 interm2[j][2] = interm1[j][2] + interm1[j][3];
160 interm2[j][3] = interm1[j][2] - interm1[j][3];
161 interm2[j][4] = interm1[j][4] + interm1[j][5];
162 interm2[j][5] = interm1[j][4] - interm1[j][5];
163 interm2[j][6] = interm1[j][6] + interm1[j][7];
164 interm2[j][7] = interm1[j][6] - interm1[j][7];
165 }
166
167 /* vertical */
168 for(i = 0; i < 8; i++) {
169 interm3[0][i] = interm2[0][i] + interm2[4][i];
170 interm3[1][i] = interm2[1][i] + interm2[5][i];
171 interm3[2][i] = interm2[2][i] + interm2[6][i];
172 interm3[3][i] = interm2[3][i] + interm2[7][i];
173 interm3[4][i] = interm2[0][i] - interm2[4][i];
174 interm3[5][i] = interm2[1][i] - interm2[5][i];
175 interm3[6][i] = interm2[2][i] - interm2[6][i];
176 interm3[7][i] = interm2[3][i] - interm2[7][i];
177
178 interm1[0][i] = interm3[0][i] + interm3[2][i];
179 interm1[1][i] = interm3[1][i] + interm3[3][i];
180 interm1[2][i] = interm3[0][i] - interm3[2][i];
181 interm1[3][i] = interm3[1][i] - interm3[3][i];
182 interm1[4][i] = interm3[4][i] + interm3[6][i];
183 interm1[5][i] = interm3[5][i] + interm3[7][i];
184 interm1[6][i] = interm3[4][i] - interm3[6][i];
185 interm1[7][i] = interm3[5][i] - interm3[7][i];
186
187 interm2[0][i] = oapv_abs(interm1[0][i] + interm1[1][i]);
188 interm2[1][i] = oapv_abs(interm1[0][i] - interm1[1][i]);
189 interm2[2][i] = oapv_abs(interm1[2][i] + interm1[3][i]);
190 interm2[3][i] = oapv_abs(interm1[2][i] - interm1[3][i]);
191 interm2[4][i] = oapv_abs(interm1[4][i] + interm1[5][i]);
192 interm2[5][i] = oapv_abs(interm1[4][i] - interm1[5][i]);
193 interm2[6][i] = oapv_abs(interm1[6][i] + interm1[7][i]);
194 interm2[7][i] = oapv_abs(interm1[6][i] - interm1[7][i]);
195 }
196
197 satd = 0;
198 for(j = 1; j < 8; j++) {
199 satd += interm2[0][j];
200 }
201 for(i = 1; i < 8; i++) {
202 for(j = 0; j < 8; j++) {
203 satd += interm2[i][j];
204 }
205 }
206
207 satd = ((satd + 2) >> 2);
208
209 return satd;
210 }