1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <math.h>
12
13 #include "./vp9_rtcd.h"
14 #include "./vpx_dsp_rtcd.h"
15 #include "vp9/common/vp9_blockd.h"
16 #include "vp9/common/vp9_idct.h"
17 #include "vpx_dsp/inv_txfm.h"
18 #include "vpx_ports/mem.h"
19
vp9_iht4x4_16_add_c(const tran_low_t * input,uint8_t * dest,int stride,int tx_type)20 void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
21 int tx_type) {
22 const transform_2d IHT_4[] = {
23 { idct4_c, idct4_c }, // DCT_DCT = 0
24 { iadst4_c, idct4_c }, // ADST_DCT = 1
25 { idct4_c, iadst4_c }, // DCT_ADST = 2
26 { iadst4_c, iadst4_c } // ADST_ADST = 3
27 };
28
29 int i, j;
30 tran_low_t out[4 * 4];
31 tran_low_t *outptr = out;
32 tran_low_t temp_in[4], temp_out[4];
33
34 // inverse transform row vectors
35 for (i = 0; i < 4; ++i) {
36 IHT_4[tx_type].rows(input, outptr);
37 input += 4;
38 outptr += 4;
39 }
40
41 // inverse transform column vectors
42 for (i = 0; i < 4; ++i) {
43 for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
44 IHT_4[tx_type].cols(temp_in, temp_out);
45 for (j = 0; j < 4; ++j) {
46 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
47 ROUND_POWER_OF_TWO(temp_out[j], 4));
48 }
49 }
50 }
51
52 static const transform_2d IHT_8[] = {
53 { idct8_c, idct8_c }, // DCT_DCT = 0
54 { iadst8_c, idct8_c }, // ADST_DCT = 1
55 { idct8_c, iadst8_c }, // DCT_ADST = 2
56 { iadst8_c, iadst8_c } // ADST_ADST = 3
57 };
58
vp9_iht8x8_64_add_c(const tran_low_t * input,uint8_t * dest,int stride,int tx_type)59 void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
60 int tx_type) {
61 int i, j;
62 tran_low_t out[8 * 8];
63 tran_low_t *outptr = out;
64 tran_low_t temp_in[8], temp_out[8];
65 const transform_2d ht = IHT_8[tx_type];
66
67 // inverse transform row vectors
68 for (i = 0; i < 8; ++i) {
69 ht.rows(input, outptr);
70 input += 8;
71 outptr += 8;
72 }
73
74 // inverse transform column vectors
75 for (i = 0; i < 8; ++i) {
76 for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
77 ht.cols(temp_in, temp_out);
78 for (j = 0; j < 8; ++j) {
79 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
80 ROUND_POWER_OF_TWO(temp_out[j], 5));
81 }
82 }
83 }
84
85 static const transform_2d IHT_16[] = {
86 { idct16_c, idct16_c }, // DCT_DCT = 0
87 { iadst16_c, idct16_c }, // ADST_DCT = 1
88 { idct16_c, iadst16_c }, // DCT_ADST = 2
89 { iadst16_c, iadst16_c } // ADST_ADST = 3
90 };
91
vp9_iht16x16_256_add_c(const tran_low_t * input,uint8_t * dest,int stride,int tx_type)92 void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
93 int tx_type) {
94 int i, j;
95 tran_low_t out[16 * 16];
96 tran_low_t *outptr = out;
97 tran_low_t temp_in[16], temp_out[16];
98 const transform_2d ht = IHT_16[tx_type];
99
100 // Rows
101 for (i = 0; i < 16; ++i) {
102 ht.rows(input, outptr);
103 input += 16;
104 outptr += 16;
105 }
106
107 // Columns
108 for (i = 0; i < 16; ++i) {
109 for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
110 ht.cols(temp_in, temp_out);
111 for (j = 0; j < 16; ++j) {
112 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
113 ROUND_POWER_OF_TWO(temp_out[j], 6));
114 }
115 }
116 }
117
118 // idct
vp9_idct4x4_add(const tran_low_t * input,uint8_t * dest,int stride,int eob)119 void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
120 int eob) {
121 if (eob > 1)
122 vpx_idct4x4_16_add(input, dest, stride);
123 else
124 vpx_idct4x4_1_add(input, dest, stride);
125 }
126
vp9_iwht4x4_add(const tran_low_t * input,uint8_t * dest,int stride,int eob)127 void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
128 int eob) {
129 if (eob > 1)
130 vpx_iwht4x4_16_add(input, dest, stride);
131 else
132 vpx_iwht4x4_1_add(input, dest, stride);
133 }
134
vp9_idct8x8_add(const tran_low_t * input,uint8_t * dest,int stride,int eob)135 void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
136 int eob) {
137 // If dc is 1, then input[0] is the reconstructed value, do not need
138 // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
139
140 // The calculation can be simplified if there are not many non-zero dct
141 // coefficients. Use eobs to decide what to do.
142 if (eob == 1)
143 // DC only DCT coefficient
144 vpx_idct8x8_1_add(input, dest, stride);
145 else if (eob <= 12)
146 vpx_idct8x8_12_add(input, dest, stride);
147 else
148 vpx_idct8x8_64_add(input, dest, stride);
149 }
150
vp9_idct16x16_add(const tran_low_t * input,uint8_t * dest,int stride,int eob)151 void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
152 int eob) {
153 assert(((intptr_t)input) % 32 == 0);
154 /* The calculation can be simplified if there are not many non-zero dct
155 * coefficients. Use eobs to separate different cases. */
156 if (eob == 1) /* DC only DCT coefficient. */
157 vpx_idct16x16_1_add(input, dest, stride);
158 else if (eob <= 10)
159 vpx_idct16x16_10_add(input, dest, stride);
160 else if (eob <= 38)
161 vpx_idct16x16_38_add(input, dest, stride);
162 else
163 vpx_idct16x16_256_add(input, dest, stride);
164 }
165
vp9_idct32x32_add(const tran_low_t * input,uint8_t * dest,int stride,int eob)166 void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
167 int eob) {
168 assert(((intptr_t)input) % 32 == 0);
169 if (eob == 1)
170 vpx_idct32x32_1_add(input, dest, stride);
171 else if (eob <= 34)
172 // non-zero coeff only in upper-left 8x8
173 vpx_idct32x32_34_add(input, dest, stride);
174 else if (eob <= 135)
175 // non-zero coeff only in upper-left 16x16
176 vpx_idct32x32_135_add(input, dest, stride);
177 else
178 vpx_idct32x32_1024_add(input, dest, stride);
179 }
180
181 // iht
vp9_iht4x4_add(TX_TYPE tx_type,const tran_low_t * input,uint8_t * dest,int stride,int eob)182 void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
183 int stride, int eob) {
184 if (tx_type == DCT_DCT)
185 vp9_idct4x4_add(input, dest, stride, eob);
186 else
187 vp9_iht4x4_16_add(input, dest, stride, tx_type);
188 }
189
vp9_iht8x8_add(TX_TYPE tx_type,const tran_low_t * input,uint8_t * dest,int stride,int eob)190 void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
191 int stride, int eob) {
192 if (tx_type == DCT_DCT) {
193 vp9_idct8x8_add(input, dest, stride, eob);
194 } else {
195 vp9_iht8x8_64_add(input, dest, stride, tx_type);
196 }
197 }
198
vp9_iht16x16_add(TX_TYPE tx_type,const tran_low_t * input,uint8_t * dest,int stride,int eob)199 void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
200 int stride, int eob) {
201 if (tx_type == DCT_DCT) {
202 vp9_idct16x16_add(input, dest, stride, eob);
203 } else {
204 vp9_iht16x16_256_add(input, dest, stride, tx_type);
205 }
206 }
207
208 #if CONFIG_VP9_HIGHBITDEPTH
209
vp9_highbd_iht4x4_16_add_c(const tran_low_t * input,uint16_t * dest,int stride,int tx_type,int bd)210 void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint16_t *dest,
211 int stride, int tx_type, int bd) {
212 const highbd_transform_2d IHT_4[] = {
213 { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT = 0
214 { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT = 1
215 { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2
216 { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c } // ADST_ADST = 3
217 };
218
219 int i, j;
220 tran_low_t out[4 * 4];
221 tran_low_t *outptr = out;
222 tran_low_t temp_in[4], temp_out[4];
223
224 // Inverse transform row vectors.
225 for (i = 0; i < 4; ++i) {
226 IHT_4[tx_type].rows(input, outptr, bd);
227 input += 4;
228 outptr += 4;
229 }
230
231 // Inverse transform column vectors.
232 for (i = 0; i < 4; ++i) {
233 for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
234 IHT_4[tx_type].cols(temp_in, temp_out, bd);
235 for (j = 0; j < 4; ++j) {
236 dest[j * stride + i] = highbd_clip_pixel_add(
237 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);
238 }
239 }
240 }
241
242 static const highbd_transform_2d HIGH_IHT_8[] = {
243 { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT = 0
244 { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT = 1
245 { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST = 2
246 { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c } // ADST_ADST = 3
247 };
248
vp9_highbd_iht8x8_64_add_c(const tran_low_t * input,uint16_t * dest,int stride,int tx_type,int bd)249 void vp9_highbd_iht8x8_64_add_c(const tran_low_t *input, uint16_t *dest,
250 int stride, int tx_type, int bd) {
251 int i, j;
252 tran_low_t out[8 * 8];
253 tran_low_t *outptr = out;
254 tran_low_t temp_in[8], temp_out[8];
255 const highbd_transform_2d ht = HIGH_IHT_8[tx_type];
256
257 // Inverse transform row vectors.
258 for (i = 0; i < 8; ++i) {
259 ht.rows(input, outptr, bd);
260 input += 8;
261 outptr += 8;
262 }
263
264 // Inverse transform column vectors.
265 for (i = 0; i < 8; ++i) {
266 for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
267 ht.cols(temp_in, temp_out, bd);
268 for (j = 0; j < 8; ++j) {
269 dest[j * stride + i] = highbd_clip_pixel_add(
270 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
271 }
272 }
273 }
274
275 static const highbd_transform_2d HIGH_IHT_16[] = {
276 { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT = 0
277 { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT = 1
278 { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST = 2
279 { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c } // ADST_ADST = 3
280 };
281
vp9_highbd_iht16x16_256_add_c(const tran_low_t * input,uint16_t * dest,int stride,int tx_type,int bd)282 void vp9_highbd_iht16x16_256_add_c(const tran_low_t *input, uint16_t *dest,
283 int stride, int tx_type, int bd) {
284 int i, j;
285 tran_low_t out[16 * 16];
286 tran_low_t *outptr = out;
287 tran_low_t temp_in[16], temp_out[16];
288 const highbd_transform_2d ht = HIGH_IHT_16[tx_type];
289
290 // Rows
291 for (i = 0; i < 16; ++i) {
292 ht.rows(input, outptr, bd);
293 input += 16;
294 outptr += 16;
295 }
296
297 // Columns
298 for (i = 0; i < 16; ++i) {
299 for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
300 ht.cols(temp_in, temp_out, bd);
301 for (j = 0; j < 16; ++j) {
302 dest[j * stride + i] = highbd_clip_pixel_add(
303 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
304 }
305 }
306 }
307
308 // idct
vp9_highbd_idct4x4_add(const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)309 void vp9_highbd_idct4x4_add(const tran_low_t *input, uint16_t *dest, int stride,
310 int eob, int bd) {
311 if (eob > 1)
312 vpx_highbd_idct4x4_16_add(input, dest, stride, bd);
313 else
314 vpx_highbd_idct4x4_1_add(input, dest, stride, bd);
315 }
316
vp9_highbd_iwht4x4_add(const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)317 void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint16_t *dest, int stride,
318 int eob, int bd) {
319 if (eob > 1)
320 vpx_highbd_iwht4x4_16_add(input, dest, stride, bd);
321 else
322 vpx_highbd_iwht4x4_1_add(input, dest, stride, bd);
323 }
324
vp9_highbd_idct8x8_add(const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)325 void vp9_highbd_idct8x8_add(const tran_low_t *input, uint16_t *dest, int stride,
326 int eob, int bd) {
327 // If dc is 1, then input[0] is the reconstructed value, do not need
328 // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
329
330 // The calculation can be simplified if there are not many non-zero dct
331 // coefficients. Use eobs to decide what to do.
332 // DC only DCT coefficient
333 if (eob == 1) {
334 vpx_highbd_idct8x8_1_add(input, dest, stride, bd);
335 } else if (eob <= 12) {
336 vpx_highbd_idct8x8_12_add(input, dest, stride, bd);
337 } else {
338 vpx_highbd_idct8x8_64_add(input, dest, stride, bd);
339 }
340 }
341
vp9_highbd_idct16x16_add(const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)342 void vp9_highbd_idct16x16_add(const tran_low_t *input, uint16_t *dest,
343 int stride, int eob, int bd) {
344 // The calculation can be simplified if there are not many non-zero dct
345 // coefficients. Use eobs to separate different cases.
346 // DC only DCT coefficient.
347 if (eob == 1) {
348 vpx_highbd_idct16x16_1_add(input, dest, stride, bd);
349 } else if (eob <= 10) {
350 vpx_highbd_idct16x16_10_add(input, dest, stride, bd);
351 } else if (eob <= 38) {
352 vpx_highbd_idct16x16_38_add(input, dest, stride, bd);
353 } else {
354 vpx_highbd_idct16x16_256_add(input, dest, stride, bd);
355 }
356 }
357
vp9_highbd_idct32x32_add(const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)358 void vp9_highbd_idct32x32_add(const tran_low_t *input, uint16_t *dest,
359 int stride, int eob, int bd) {
360 // Non-zero coeff only in upper-left 8x8
361 if (eob == 1) {
362 vpx_highbd_idct32x32_1_add(input, dest, stride, bd);
363 } else if (eob <= 34) {
364 vpx_highbd_idct32x32_34_add(input, dest, stride, bd);
365 } else if (eob <= 135) {
366 vpx_highbd_idct32x32_135_add(input, dest, stride, bd);
367 } else {
368 vpx_highbd_idct32x32_1024_add(input, dest, stride, bd);
369 }
370 }
371
372 // iht
vp9_highbd_iht4x4_add(TX_TYPE tx_type,const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)373 void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input,
374 uint16_t *dest, int stride, int eob, int bd) {
375 if (tx_type == DCT_DCT)
376 vp9_highbd_idct4x4_add(input, dest, stride, eob, bd);
377 else
378 vp9_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd);
379 }
380
vp9_highbd_iht8x8_add(TX_TYPE tx_type,const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)381 void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input,
382 uint16_t *dest, int stride, int eob, int bd) {
383 if (tx_type == DCT_DCT) {
384 vp9_highbd_idct8x8_add(input, dest, stride, eob, bd);
385 } else {
386 vp9_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd);
387 }
388 }
389
vp9_highbd_iht16x16_add(TX_TYPE tx_type,const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)390 void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input,
391 uint16_t *dest, int stride, int eob, int bd) {
392 if (tx_type == DCT_DCT) {
393 vp9_highbd_idct16x16_add(input, dest, stride, eob, bd);
394 } else {
395 vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd);
396 }
397 }
398 #endif // CONFIG_VP9_HIGHBITDEPTH
399