xref: /aosp_15_r20/external/libvpx/vp9/common/vp9_idct.c (revision fb1b10ab9aebc7c7068eedab379b749d7e3900be)
1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <math.h>
12 
13 #include "./vp9_rtcd.h"
14 #include "./vpx_dsp_rtcd.h"
15 #include "vp9/common/vp9_blockd.h"
16 #include "vp9/common/vp9_idct.h"
17 #include "vpx_dsp/inv_txfm.h"
18 #include "vpx_ports/mem.h"
19 
vp9_iht4x4_16_add_c(const tran_low_t * input,uint8_t * dest,int stride,int tx_type)20 void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
21                          int tx_type) {
22   const transform_2d IHT_4[] = {
23     { idct4_c, idct4_c },   // DCT_DCT  = 0
24     { iadst4_c, idct4_c },  // ADST_DCT = 1
25     { idct4_c, iadst4_c },  // DCT_ADST = 2
26     { iadst4_c, iadst4_c }  // ADST_ADST = 3
27   };
28 
29   int i, j;
30   tran_low_t out[4 * 4];
31   tran_low_t *outptr = out;
32   tran_low_t temp_in[4], temp_out[4];
33 
34   // inverse transform row vectors
35   for (i = 0; i < 4; ++i) {
36     IHT_4[tx_type].rows(input, outptr);
37     input += 4;
38     outptr += 4;
39   }
40 
41   // inverse transform column vectors
42   for (i = 0; i < 4; ++i) {
43     for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
44     IHT_4[tx_type].cols(temp_in, temp_out);
45     for (j = 0; j < 4; ++j) {
46       dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
47                                             ROUND_POWER_OF_TWO(temp_out[j], 4));
48     }
49   }
50 }
51 
52 static const transform_2d IHT_8[] = {
53   { idct8_c, idct8_c },   // DCT_DCT  = 0
54   { iadst8_c, idct8_c },  // ADST_DCT = 1
55   { idct8_c, iadst8_c },  // DCT_ADST = 2
56   { iadst8_c, iadst8_c }  // ADST_ADST = 3
57 };
58 
vp9_iht8x8_64_add_c(const tran_low_t * input,uint8_t * dest,int stride,int tx_type)59 void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
60                          int tx_type) {
61   int i, j;
62   tran_low_t out[8 * 8];
63   tran_low_t *outptr = out;
64   tran_low_t temp_in[8], temp_out[8];
65   const transform_2d ht = IHT_8[tx_type];
66 
67   // inverse transform row vectors
68   for (i = 0; i < 8; ++i) {
69     ht.rows(input, outptr);
70     input += 8;
71     outptr += 8;
72   }
73 
74   // inverse transform column vectors
75   for (i = 0; i < 8; ++i) {
76     for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
77     ht.cols(temp_in, temp_out);
78     for (j = 0; j < 8; ++j) {
79       dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
80                                             ROUND_POWER_OF_TWO(temp_out[j], 5));
81     }
82   }
83 }
84 
85 static const transform_2d IHT_16[] = {
86   { idct16_c, idct16_c },   // DCT_DCT  = 0
87   { iadst16_c, idct16_c },  // ADST_DCT = 1
88   { idct16_c, iadst16_c },  // DCT_ADST = 2
89   { iadst16_c, iadst16_c }  // ADST_ADST = 3
90 };
91 
vp9_iht16x16_256_add_c(const tran_low_t * input,uint8_t * dest,int stride,int tx_type)92 void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
93                             int tx_type) {
94   int i, j;
95   tran_low_t out[16 * 16];
96   tran_low_t *outptr = out;
97   tran_low_t temp_in[16], temp_out[16];
98   const transform_2d ht = IHT_16[tx_type];
99 
100   // Rows
101   for (i = 0; i < 16; ++i) {
102     ht.rows(input, outptr);
103     input += 16;
104     outptr += 16;
105   }
106 
107   // Columns
108   for (i = 0; i < 16; ++i) {
109     for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
110     ht.cols(temp_in, temp_out);
111     for (j = 0; j < 16; ++j) {
112       dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
113                                             ROUND_POWER_OF_TWO(temp_out[j], 6));
114     }
115   }
116 }
117 
118 // idct
vp9_idct4x4_add(const tran_low_t * input,uint8_t * dest,int stride,int eob)119 void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
120                      int eob) {
121   if (eob > 1)
122     vpx_idct4x4_16_add(input, dest, stride);
123   else
124     vpx_idct4x4_1_add(input, dest, stride);
125 }
126 
vp9_iwht4x4_add(const tran_low_t * input,uint8_t * dest,int stride,int eob)127 void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
128                      int eob) {
129   if (eob > 1)
130     vpx_iwht4x4_16_add(input, dest, stride);
131   else
132     vpx_iwht4x4_1_add(input, dest, stride);
133 }
134 
vp9_idct8x8_add(const tran_low_t * input,uint8_t * dest,int stride,int eob)135 void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
136                      int eob) {
137   // If dc is 1, then input[0] is the reconstructed value, do not need
138   // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
139 
140   // The calculation can be simplified if there are not many non-zero dct
141   // coefficients. Use eobs to decide what to do.
142   if (eob == 1)
143     // DC only DCT coefficient
144     vpx_idct8x8_1_add(input, dest, stride);
145   else if (eob <= 12)
146     vpx_idct8x8_12_add(input, dest, stride);
147   else
148     vpx_idct8x8_64_add(input, dest, stride);
149 }
150 
vp9_idct16x16_add(const tran_low_t * input,uint8_t * dest,int stride,int eob)151 void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
152                        int eob) {
153   assert(((intptr_t)input) % 32 == 0);
154   /* The calculation can be simplified if there are not many non-zero dct
155    * coefficients. Use eobs to separate different cases. */
156   if (eob == 1) /* DC only DCT coefficient. */
157     vpx_idct16x16_1_add(input, dest, stride);
158   else if (eob <= 10)
159     vpx_idct16x16_10_add(input, dest, stride);
160   else if (eob <= 38)
161     vpx_idct16x16_38_add(input, dest, stride);
162   else
163     vpx_idct16x16_256_add(input, dest, stride);
164 }
165 
vp9_idct32x32_add(const tran_low_t * input,uint8_t * dest,int stride,int eob)166 void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
167                        int eob) {
168   assert(((intptr_t)input) % 32 == 0);
169   if (eob == 1)
170     vpx_idct32x32_1_add(input, dest, stride);
171   else if (eob <= 34)
172     // non-zero coeff only in upper-left 8x8
173     vpx_idct32x32_34_add(input, dest, stride);
174   else if (eob <= 135)
175     // non-zero coeff only in upper-left 16x16
176     vpx_idct32x32_135_add(input, dest, stride);
177   else
178     vpx_idct32x32_1024_add(input, dest, stride);
179 }
180 
181 // iht
vp9_iht4x4_add(TX_TYPE tx_type,const tran_low_t * input,uint8_t * dest,int stride,int eob)182 void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
183                     int stride, int eob) {
184   if (tx_type == DCT_DCT)
185     vp9_idct4x4_add(input, dest, stride, eob);
186   else
187     vp9_iht4x4_16_add(input, dest, stride, tx_type);
188 }
189 
vp9_iht8x8_add(TX_TYPE tx_type,const tran_low_t * input,uint8_t * dest,int stride,int eob)190 void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
191                     int stride, int eob) {
192   if (tx_type == DCT_DCT) {
193     vp9_idct8x8_add(input, dest, stride, eob);
194   } else {
195     vp9_iht8x8_64_add(input, dest, stride, tx_type);
196   }
197 }
198 
vp9_iht16x16_add(TX_TYPE tx_type,const tran_low_t * input,uint8_t * dest,int stride,int eob)199 void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
200                       int stride, int eob) {
201   if (tx_type == DCT_DCT) {
202     vp9_idct16x16_add(input, dest, stride, eob);
203   } else {
204     vp9_iht16x16_256_add(input, dest, stride, tx_type);
205   }
206 }
207 
208 #if CONFIG_VP9_HIGHBITDEPTH
209 
vp9_highbd_iht4x4_16_add_c(const tran_low_t * input,uint16_t * dest,int stride,int tx_type,int bd)210 void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint16_t *dest,
211                                 int stride, int tx_type, int bd) {
212   const highbd_transform_2d IHT_4[] = {
213     { vpx_highbd_idct4_c, vpx_highbd_idct4_c },   // DCT_DCT  = 0
214     { vpx_highbd_iadst4_c, vpx_highbd_idct4_c },  // ADST_DCT = 1
215     { vpx_highbd_idct4_c, vpx_highbd_iadst4_c },  // DCT_ADST = 2
216     { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }  // ADST_ADST = 3
217   };
218 
219   int i, j;
220   tran_low_t out[4 * 4];
221   tran_low_t *outptr = out;
222   tran_low_t temp_in[4], temp_out[4];
223 
224   // Inverse transform row vectors.
225   for (i = 0; i < 4; ++i) {
226     IHT_4[tx_type].rows(input, outptr, bd);
227     input += 4;
228     outptr += 4;
229   }
230 
231   // Inverse transform column vectors.
232   for (i = 0; i < 4; ++i) {
233     for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
234     IHT_4[tx_type].cols(temp_in, temp_out, bd);
235     for (j = 0; j < 4; ++j) {
236       dest[j * stride + i] = highbd_clip_pixel_add(
237           dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);
238     }
239   }
240 }
241 
242 static const highbd_transform_2d HIGH_IHT_8[] = {
243   { vpx_highbd_idct8_c, vpx_highbd_idct8_c },   // DCT_DCT  = 0
244   { vpx_highbd_iadst8_c, vpx_highbd_idct8_c },  // ADST_DCT = 1
245   { vpx_highbd_idct8_c, vpx_highbd_iadst8_c },  // DCT_ADST = 2
246   { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }  // ADST_ADST = 3
247 };
248 
vp9_highbd_iht8x8_64_add_c(const tran_low_t * input,uint16_t * dest,int stride,int tx_type,int bd)249 void vp9_highbd_iht8x8_64_add_c(const tran_low_t *input, uint16_t *dest,
250                                 int stride, int tx_type, int bd) {
251   int i, j;
252   tran_low_t out[8 * 8];
253   tran_low_t *outptr = out;
254   tran_low_t temp_in[8], temp_out[8];
255   const highbd_transform_2d ht = HIGH_IHT_8[tx_type];
256 
257   // Inverse transform row vectors.
258   for (i = 0; i < 8; ++i) {
259     ht.rows(input, outptr, bd);
260     input += 8;
261     outptr += 8;
262   }
263 
264   // Inverse transform column vectors.
265   for (i = 0; i < 8; ++i) {
266     for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
267     ht.cols(temp_in, temp_out, bd);
268     for (j = 0; j < 8; ++j) {
269       dest[j * stride + i] = highbd_clip_pixel_add(
270           dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
271     }
272   }
273 }
274 
275 static const highbd_transform_2d HIGH_IHT_16[] = {
276   { vpx_highbd_idct16_c, vpx_highbd_idct16_c },   // DCT_DCT  = 0
277   { vpx_highbd_iadst16_c, vpx_highbd_idct16_c },  // ADST_DCT = 1
278   { vpx_highbd_idct16_c, vpx_highbd_iadst16_c },  // DCT_ADST = 2
279   { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }  // ADST_ADST = 3
280 };
281 
vp9_highbd_iht16x16_256_add_c(const tran_low_t * input,uint16_t * dest,int stride,int tx_type,int bd)282 void vp9_highbd_iht16x16_256_add_c(const tran_low_t *input, uint16_t *dest,
283                                    int stride, int tx_type, int bd) {
284   int i, j;
285   tran_low_t out[16 * 16];
286   tran_low_t *outptr = out;
287   tran_low_t temp_in[16], temp_out[16];
288   const highbd_transform_2d ht = HIGH_IHT_16[tx_type];
289 
290   // Rows
291   for (i = 0; i < 16; ++i) {
292     ht.rows(input, outptr, bd);
293     input += 16;
294     outptr += 16;
295   }
296 
297   // Columns
298   for (i = 0; i < 16; ++i) {
299     for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
300     ht.cols(temp_in, temp_out, bd);
301     for (j = 0; j < 16; ++j) {
302       dest[j * stride + i] = highbd_clip_pixel_add(
303           dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
304     }
305   }
306 }
307 
308 // idct
vp9_highbd_idct4x4_add(const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)309 void vp9_highbd_idct4x4_add(const tran_low_t *input, uint16_t *dest, int stride,
310                             int eob, int bd) {
311   if (eob > 1)
312     vpx_highbd_idct4x4_16_add(input, dest, stride, bd);
313   else
314     vpx_highbd_idct4x4_1_add(input, dest, stride, bd);
315 }
316 
vp9_highbd_iwht4x4_add(const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)317 void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint16_t *dest, int stride,
318                             int eob, int bd) {
319   if (eob > 1)
320     vpx_highbd_iwht4x4_16_add(input, dest, stride, bd);
321   else
322     vpx_highbd_iwht4x4_1_add(input, dest, stride, bd);
323 }
324 
vp9_highbd_idct8x8_add(const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)325 void vp9_highbd_idct8x8_add(const tran_low_t *input, uint16_t *dest, int stride,
326                             int eob, int bd) {
327   // If dc is 1, then input[0] is the reconstructed value, do not need
328   // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
329 
330   // The calculation can be simplified if there are not many non-zero dct
331   // coefficients. Use eobs to decide what to do.
332   // DC only DCT coefficient
333   if (eob == 1) {
334     vpx_highbd_idct8x8_1_add(input, dest, stride, bd);
335   } else if (eob <= 12) {
336     vpx_highbd_idct8x8_12_add(input, dest, stride, bd);
337   } else {
338     vpx_highbd_idct8x8_64_add(input, dest, stride, bd);
339   }
340 }
341 
vp9_highbd_idct16x16_add(const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)342 void vp9_highbd_idct16x16_add(const tran_low_t *input, uint16_t *dest,
343                               int stride, int eob, int bd) {
344   // The calculation can be simplified if there are not many non-zero dct
345   // coefficients. Use eobs to separate different cases.
346   // DC only DCT coefficient.
347   if (eob == 1) {
348     vpx_highbd_idct16x16_1_add(input, dest, stride, bd);
349   } else if (eob <= 10) {
350     vpx_highbd_idct16x16_10_add(input, dest, stride, bd);
351   } else if (eob <= 38) {
352     vpx_highbd_idct16x16_38_add(input, dest, stride, bd);
353   } else {
354     vpx_highbd_idct16x16_256_add(input, dest, stride, bd);
355   }
356 }
357 
vp9_highbd_idct32x32_add(const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)358 void vp9_highbd_idct32x32_add(const tran_low_t *input, uint16_t *dest,
359                               int stride, int eob, int bd) {
360   // Non-zero coeff only in upper-left 8x8
361   if (eob == 1) {
362     vpx_highbd_idct32x32_1_add(input, dest, stride, bd);
363   } else if (eob <= 34) {
364     vpx_highbd_idct32x32_34_add(input, dest, stride, bd);
365   } else if (eob <= 135) {
366     vpx_highbd_idct32x32_135_add(input, dest, stride, bd);
367   } else {
368     vpx_highbd_idct32x32_1024_add(input, dest, stride, bd);
369   }
370 }
371 
372 // iht
vp9_highbd_iht4x4_add(TX_TYPE tx_type,const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)373 void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input,
374                            uint16_t *dest, int stride, int eob, int bd) {
375   if (tx_type == DCT_DCT)
376     vp9_highbd_idct4x4_add(input, dest, stride, eob, bd);
377   else
378     vp9_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd);
379 }
380 
vp9_highbd_iht8x8_add(TX_TYPE tx_type,const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)381 void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input,
382                            uint16_t *dest, int stride, int eob, int bd) {
383   if (tx_type == DCT_DCT) {
384     vp9_highbd_idct8x8_add(input, dest, stride, eob, bd);
385   } else {
386     vp9_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd);
387   }
388 }
389 
vp9_highbd_iht16x16_add(TX_TYPE tx_type,const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)390 void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input,
391                              uint16_t *dest, int stride, int eob, int bd) {
392   if (tx_type == DCT_DCT) {
393     vp9_highbd_idct16x16_add(input, dest, stride, eob, bd);
394   } else {
395     vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd);
396   }
397 }
398 #endif  // CONFIG_VP9_HIGHBITDEPTH
399