xref: /aosp_15_r20/external/libvpx/test/fdct8x8_test.cc (revision fb1b10ab9aebc7c7068eedab379b749d7e3900be)
1 /*
2  *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <math.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <tuple>
15 
16 #include "gtest/gtest.h"
17 
18 #include "./vp9_rtcd.h"
19 #include "./vpx_dsp_rtcd.h"
20 #include "test/acm_random.h"
21 #include "test/clear_system_state.h"
22 #include "test/register_state_check.h"
23 #include "test/util.h"
24 #include "vp9/common/vp9_entropy.h"
25 #include "vp9/common/vp9_scan.h"
26 #include "vpx_config.h"
27 #include "vpx/vpx_codec.h"
28 #include "vpx/vpx_integer.h"
29 #include "vpx_ports/mem.h"
30 
31 using libvpx_test::ACMRandom;
32 
33 namespace {
34 
35 const int kNumCoeffs = 64;
36 const double kPi = 3.141592653589793238462643383279502884;
37 
38 const int kSignBiasMaxDiff255 = 1500;
39 const int kSignBiasMaxDiff15 = 10000;
40 
41 typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
42 typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
43 typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
44                         int tx_type);
45 typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
46                         int tx_type);
47 
48 typedef std::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct8x8Param;
49 typedef std::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht8x8Param;
50 typedef std::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t> Idct8x8Param;
51 
reference_8x8_dct_1d(const double in[8],double out[8])52 void reference_8x8_dct_1d(const double in[8], double out[8]) {
53   const double kInvSqrt2 = 0.707106781186547524400844362104;
54   for (int k = 0; k < 8; k++) {
55     out[k] = 0.0;
56     for (int n = 0; n < 8; n++) {
57       out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 16.0);
58     }
59     if (k == 0) out[k] = out[k] * kInvSqrt2;
60   }
61 }
62 
reference_8x8_dct_2d(const int16_t input[kNumCoeffs],double output[kNumCoeffs])63 void reference_8x8_dct_2d(const int16_t input[kNumCoeffs],
64                           double output[kNumCoeffs]) {
65   // First transform columns
66   for (int i = 0; i < 8; ++i) {
67     double temp_in[8], temp_out[8];
68     for (int j = 0; j < 8; ++j) temp_in[j] = input[j * 8 + i];
69     reference_8x8_dct_1d(temp_in, temp_out);
70     for (int j = 0; j < 8; ++j) output[j * 8 + i] = temp_out[j];
71   }
72   // Then transform rows
73   for (int i = 0; i < 8; ++i) {
74     double temp_in[8], temp_out[8];
75     for (int j = 0; j < 8; ++j) temp_in[j] = output[j + i * 8];
76     reference_8x8_dct_1d(temp_in, temp_out);
77     // Scale by some magic number
78     for (int j = 0; j < 8; ++j) output[j + i * 8] = temp_out[j] * 2;
79   }
80 }
81 
fdct8x8_ref(const int16_t * in,tran_low_t * out,int stride,int)82 void fdct8x8_ref(const int16_t *in, tran_low_t *out, int stride,
83                  int /*tx_type*/) {
84   vpx_fdct8x8_c(in, out, stride);
85 }
86 
fht8x8_ref(const int16_t * in,tran_low_t * out,int stride,int tx_type)87 void fht8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
88   vp9_fht8x8_c(in, out, stride, tx_type);
89 }
90 
91 #if CONFIG_VP9_HIGHBITDEPTH
idct8x8_10(const tran_low_t * in,uint8_t * out,int stride)92 void idct8x8_10(const tran_low_t *in, uint8_t *out, int stride) {
93   vpx_highbd_idct8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
94 }
95 
idct8x8_12(const tran_low_t * in,uint8_t * out,int stride)96 void idct8x8_12(const tran_low_t *in, uint8_t *out, int stride) {
97   vpx_highbd_idct8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
98 }
99 
iht8x8_10(const tran_low_t * in,uint8_t * out,int stride,int tx_type)100 void iht8x8_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
101   vp9_highbd_iht8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 10);
102 }
103 
iht8x8_12(const tran_low_t * in,uint8_t * out,int stride,int tx_type)104 void iht8x8_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
105   vp9_highbd_iht8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 12);
106 }
107 
108 #if HAVE_SSE2
109 
idct8x8_12_add_10_c(const tran_low_t * in,uint8_t * out,int stride)110 void idct8x8_12_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
111   vpx_highbd_idct8x8_12_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
112 }
113 
idct8x8_12_add_12_c(const tran_low_t * in,uint8_t * out,int stride)114 void idct8x8_12_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
115   vpx_highbd_idct8x8_12_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
116 }
117 
idct8x8_12_add_10_sse2(const tran_low_t * in,uint8_t * out,int stride)118 void idct8x8_12_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
119   vpx_highbd_idct8x8_12_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10);
120 }
121 
idct8x8_12_add_12_sse2(const tran_low_t * in,uint8_t * out,int stride)122 void idct8x8_12_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
123   vpx_highbd_idct8x8_12_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12);
124 }
125 
idct8x8_64_add_10_sse2(const tran_low_t * in,uint8_t * out,int stride)126 void idct8x8_64_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
127   vpx_highbd_idct8x8_64_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10);
128 }
129 
idct8x8_64_add_12_sse2(const tran_low_t * in,uint8_t * out,int stride)130 void idct8x8_64_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
131   vpx_highbd_idct8x8_64_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12);
132 }
133 #endif  // HAVE_SSE2
134 #endif  // CONFIG_VP9_HIGHBITDEPTH
135 
136 // Visual Studio 2022 (cl.exe) targeting AArch64 with optimizations enabled
137 // produces invalid code in RunExtremalCheck() and RunInvAccuracyCheck().
138 // See:
139 // https://developercommunity.visualstudio.com/t/1770-preview-1:-Misoptimization-for-AR/10369786
140 // TODO(jzern): check the compiler version after a fix for the issue is
141 // released.
142 #if defined(_MSC_VER) && defined(_M_ARM64) && !defined(__clang__)
143 #pragma optimize("", off)
144 #endif
145 class FwdTrans8x8TestBase {
146  public:
147   virtual ~FwdTrans8x8TestBase() = default;
148 
149  protected:
150   virtual void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) = 0;
151   virtual void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) = 0;
152 
RunSignBiasCheck()153   void RunSignBiasCheck() {
154     ACMRandom rnd(ACMRandom::DeterministicSeed());
155     DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
156     DECLARE_ALIGNED(16, tran_low_t, test_output_block[64]);
157     int count_sign_block[64][2];
158     const int count_test_block = 100000;
159 
160     memset(count_sign_block, 0, sizeof(count_sign_block));
161 
162     for (int i = 0; i < count_test_block; ++i) {
163       // Initialize a test block with input range [-255, 255].
164       for (int j = 0; j < 64; ++j) {
165         test_input_block[j] = ((rnd.Rand16() >> (16 - bit_depth_)) & mask_) -
166                               ((rnd.Rand16() >> (16 - bit_depth_)) & mask_);
167       }
168       ASM_REGISTER_STATE_CHECK(
169           RunFwdTxfm(test_input_block, test_output_block, pitch_));
170 
171       for (int j = 0; j < 64; ++j) {
172         if (test_output_block[j] < 0) {
173           ++count_sign_block[j][0];
174         } else if (test_output_block[j] > 0) {
175           ++count_sign_block[j][1];
176         }
177       }
178     }
179 
180     for (int j = 0; j < 64; ++j) {
181       const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
182       const int max_diff = kSignBiasMaxDiff255;
183       ASSERT_LT(diff, max_diff << (bit_depth_ - 8))
184           << "Error: 8x8 FDCT/FHT has a sign bias > "
185           << 1. * max_diff / count_test_block * 100 << "%"
186           << " for input range [-255, 255] at index " << j
187           << " count0: " << count_sign_block[j][0]
188           << " count1: " << count_sign_block[j][1] << " diff: " << diff;
189     }
190 
191     memset(count_sign_block, 0, sizeof(count_sign_block));
192 
193     for (int i = 0; i < count_test_block; ++i) {
194       // Initialize a test block with input range [-mask_ / 16, mask_ / 16].
195       for (int j = 0; j < 64; ++j) {
196         test_input_block[j] =
197             ((rnd.Rand16() & mask_) >> 4) - ((rnd.Rand16() & mask_) >> 4);
198       }
199       ASM_REGISTER_STATE_CHECK(
200           RunFwdTxfm(test_input_block, test_output_block, pitch_));
201 
202       for (int j = 0; j < 64; ++j) {
203         if (test_output_block[j] < 0) {
204           ++count_sign_block[j][0];
205         } else if (test_output_block[j] > 0) {
206           ++count_sign_block[j][1];
207         }
208       }
209     }
210 
211     for (int j = 0; j < 64; ++j) {
212       const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
213       const int max_diff = kSignBiasMaxDiff15;
214       ASSERT_LT(diff, max_diff << (bit_depth_ - 8))
215           << "Error: 8x8 FDCT/FHT has a sign bias > "
216           << 1. * max_diff / count_test_block * 100 << "%"
217           << " for input range [-15, 15] at index " << j
218           << " count0: " << count_sign_block[j][0]
219           << " count1: " << count_sign_block[j][1] << " diff: " << diff;
220     }
221   }
222 
RunRoundTripErrorCheck()223   void RunRoundTripErrorCheck() {
224     ACMRandom rnd(ACMRandom::DeterministicSeed());
225     int max_error = 0;
226     int total_error = 0;
227     const int count_test_block = 100000;
228     DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
229     DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
230     DECLARE_ALIGNED(16, uint8_t, dst[64]);
231     DECLARE_ALIGNED(16, uint8_t, src[64]);
232 #if CONFIG_VP9_HIGHBITDEPTH
233     DECLARE_ALIGNED(16, uint16_t, dst16[64]);
234     DECLARE_ALIGNED(16, uint16_t, src16[64]);
235 #endif
236 
237     for (int i = 0; i < count_test_block; ++i) {
238       // Initialize a test block with input range [-mask_, mask_].
239       for (int j = 0; j < 64; ++j) {
240         if (bit_depth_ == VPX_BITS_8) {
241           src[j] = rnd.Rand8();
242           dst[j] = rnd.Rand8();
243           test_input_block[j] = src[j] - dst[j];
244 #if CONFIG_VP9_HIGHBITDEPTH
245         } else {
246           src16[j] = rnd.Rand16() & mask_;
247           dst16[j] = rnd.Rand16() & mask_;
248           test_input_block[j] = src16[j] - dst16[j];
249 #endif
250         }
251       }
252 
253       ASM_REGISTER_STATE_CHECK(
254           RunFwdTxfm(test_input_block, test_temp_block, pitch_));
255       for (int j = 0; j < 64; ++j) {
256         if (test_temp_block[j] > 0) {
257           test_temp_block[j] += 2;
258           test_temp_block[j] /= 4;
259           test_temp_block[j] *= 4;
260         } else {
261           test_temp_block[j] -= 2;
262           test_temp_block[j] /= 4;
263           test_temp_block[j] *= 4;
264         }
265       }
266       if (bit_depth_ == VPX_BITS_8) {
267         ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
268 #if CONFIG_VP9_HIGHBITDEPTH
269       } else {
270         ASM_REGISTER_STATE_CHECK(
271             RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
272 #endif
273       }
274 
275       for (int j = 0; j < 64; ++j) {
276 #if CONFIG_VP9_HIGHBITDEPTH
277         const int diff =
278             bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
279 #else
280         const int diff = dst[j] - src[j];
281 #endif
282         const int error = diff * diff;
283         if (max_error < error) max_error = error;
284         total_error += error;
285       }
286     }
287 
288     ASSERT_GE(1 << 2 * (bit_depth_ - 8), max_error)
289         << "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual"
290         << " roundtrip error > 1";
291 
292     ASSERT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error)
293         << "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip "
294         << "error > 1/5 per block";
295   }
296 
RunExtremalCheck()297   void RunExtremalCheck() {
298     ACMRandom rnd(ACMRandom::DeterministicSeed());
299     int max_error = 0;
300     int total_error = 0;
301     int total_coeff_error = 0;
302     const int count_test_block = 100000;
303     DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
304     DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
305     DECLARE_ALIGNED(16, tran_low_t, ref_temp_block[64]);
306     DECLARE_ALIGNED(16, uint8_t, dst[64]);
307     DECLARE_ALIGNED(16, uint8_t, src[64]);
308 #if CONFIG_VP9_HIGHBITDEPTH
309     DECLARE_ALIGNED(16, uint16_t, dst16[64]);
310     DECLARE_ALIGNED(16, uint16_t, src16[64]);
311 #endif
312 
313     for (int i = 0; i < count_test_block; ++i) {
314       // Initialize a test block with input range [-mask_, mask_].
315       for (int j = 0; j < 64; ++j) {
316         if (bit_depth_ == VPX_BITS_8) {
317           if (i == 0) {
318             src[j] = 255;
319             dst[j] = 0;
320           } else if (i == 1) {
321             src[j] = 0;
322             dst[j] = 255;
323           } else {
324             src[j] = rnd.Rand8() % 2 ? 255 : 0;
325             dst[j] = rnd.Rand8() % 2 ? 255 : 0;
326           }
327           test_input_block[j] = src[j] - dst[j];
328 #if CONFIG_VP9_HIGHBITDEPTH
329         } else {
330           if (i == 0) {
331             src16[j] = mask_;
332             dst16[j] = 0;
333           } else if (i == 1) {
334             src16[j] = 0;
335             dst16[j] = mask_;
336           } else {
337             src16[j] = rnd.Rand8() % 2 ? mask_ : 0;
338             dst16[j] = rnd.Rand8() % 2 ? mask_ : 0;
339           }
340           test_input_block[j] = src16[j] - dst16[j];
341 #endif
342         }
343       }
344 
345       ASM_REGISTER_STATE_CHECK(
346           RunFwdTxfm(test_input_block, test_temp_block, pitch_));
347       ASM_REGISTER_STATE_CHECK(
348           fwd_txfm_ref(test_input_block, ref_temp_block, pitch_, tx_type_));
349       if (bit_depth_ == VPX_BITS_8) {
350         ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
351 #if CONFIG_VP9_HIGHBITDEPTH
352       } else {
353         ASM_REGISTER_STATE_CHECK(
354             RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
355 #endif
356       }
357 
358       for (int j = 0; j < 64; ++j) {
359 #if CONFIG_VP9_HIGHBITDEPTH
360         const int diff =
361             bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
362 #else
363         const int diff = dst[j] - src[j];
364 #endif
365         const int error = diff * diff;
366         if (max_error < error) max_error = error;
367         total_error += error;
368 
369         const int coeff_diff = test_temp_block[j] - ref_temp_block[j];
370         total_coeff_error += abs(coeff_diff);
371       }
372 
373       ASSERT_GE(1 << 2 * (bit_depth_ - 8), max_error)
374           << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has"
375           << " an individual roundtrip error > 1";
376 
377       ASSERT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error)
378           << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has average"
379           << " roundtrip error > 1/5 per block";
380 
381       ASSERT_EQ(0, total_coeff_error)
382           << "Error: Extremal 8x8 FDCT/FHT has"
383           << " overflow issues in the intermediate steps > 1";
384     }
385   }
386 
RunInvAccuracyCheck()387   void RunInvAccuracyCheck() {
388     ACMRandom rnd(ACMRandom::DeterministicSeed());
389     const int count_test_block = 1000;
390     DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
391     DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
392     DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
393     DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
394 #if CONFIG_VP9_HIGHBITDEPTH
395     DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
396     DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
397 #endif
398 
399     for (int i = 0; i < count_test_block; ++i) {
400       double out_r[kNumCoeffs];
401 
402       // Initialize a test block with input range [-255, 255].
403       for (int j = 0; j < kNumCoeffs; ++j) {
404         if (bit_depth_ == VPX_BITS_8) {
405           src[j] = rnd.Rand8() % 2 ? 255 : 0;
406           dst[j] = src[j] > 0 ? 0 : 255;
407           in[j] = src[j] - dst[j];
408 #if CONFIG_VP9_HIGHBITDEPTH
409         } else {
410           src16[j] = rnd.Rand8() % 2 ? mask_ : 0;
411           dst16[j] = src16[j] > 0 ? 0 : mask_;
412           in[j] = src16[j] - dst16[j];
413 #endif
414         }
415       }
416 
417       reference_8x8_dct_2d(in, out_r);
418       for (int j = 0; j < kNumCoeffs; ++j) {
419         coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
420       }
421 
422       if (bit_depth_ == VPX_BITS_8) {
423         ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
424 #if CONFIG_VP9_HIGHBITDEPTH
425       } else {
426         ASM_REGISTER_STATE_CHECK(
427             RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
428 #endif
429       }
430 
431       for (int j = 0; j < kNumCoeffs; ++j) {
432 #if CONFIG_VP9_HIGHBITDEPTH
433         const int diff =
434             bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
435 #else
436         const int diff = dst[j] - src[j];
437 #endif
438         const uint32_t error = diff * diff;
439         ASSERT_GE(1u << 2 * (bit_depth_ - 8), error)
440             << "Error: 8x8 IDCT has error " << error << " at index " << j;
441       }
442     }
443   }
444 
RunFwdAccuracyCheck()445   void RunFwdAccuracyCheck() {
446     ACMRandom rnd(ACMRandom::DeterministicSeed());
447     const int count_test_block = 1000;
448     DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
449     DECLARE_ALIGNED(16, tran_low_t, coeff_r[kNumCoeffs]);
450     DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
451 
452     for (int i = 0; i < count_test_block; ++i) {
453       double out_r[kNumCoeffs];
454 
455       // Initialize a test block with input range [-mask_, mask_].
456       for (int j = 0; j < kNumCoeffs; ++j) {
457         in[j] = rnd.Rand8() % 2 == 0 ? mask_ : -mask_;
458       }
459 
460       RunFwdTxfm(in, coeff, pitch_);
461       reference_8x8_dct_2d(in, out_r);
462       for (int j = 0; j < kNumCoeffs; ++j) {
463         coeff_r[j] = static_cast<tran_low_t>(round(out_r[j]));
464       }
465 
466       for (int j = 0; j < kNumCoeffs; ++j) {
467         const int32_t diff = coeff[j] - coeff_r[j];
468         const uint32_t error = diff * diff;
469         ASSERT_GE(9u << 2 * (bit_depth_ - 8), error)
470             << "Error: 8x8 DCT has error " << error << " at index " << j;
471       }
472     }
473   }
474 
CompareInvReference(IdctFunc ref_txfm,int thresh)475   void CompareInvReference(IdctFunc ref_txfm, int thresh) {
476     ACMRandom rnd(ACMRandom::DeterministicSeed());
477     const int count_test_block = 10000;
478     const int eob = 12;
479     DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
480     DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
481     DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
482 #if CONFIG_VP9_HIGHBITDEPTH
483     DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
484     DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
485 #endif
486     const int16_t *scan = vp9_default_scan_orders[TX_8X8].scan;
487 
488     for (int i = 0; i < count_test_block; ++i) {
489       for (int j = 0; j < kNumCoeffs; ++j) {
490         if (j < eob) {
491           // Random values less than the threshold, either positive or negative
492           coeff[scan[j]] = rnd(thresh) * (1 - 2 * (i % 2));
493         } else {
494           coeff[scan[j]] = 0;
495         }
496         if (bit_depth_ == VPX_BITS_8) {
497           dst[j] = 0;
498           ref[j] = 0;
499 #if CONFIG_VP9_HIGHBITDEPTH
500         } else {
501           dst16[j] = 0;
502           ref16[j] = 0;
503 #endif
504         }
505       }
506       if (bit_depth_ == VPX_BITS_8) {
507         ref_txfm(coeff, ref, pitch_);
508         ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
509 #if CONFIG_VP9_HIGHBITDEPTH
510       } else {
511         ref_txfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_);
512         ASM_REGISTER_STATE_CHECK(
513             RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
514 #endif
515       }
516 
517       for (int j = 0; j < kNumCoeffs; ++j) {
518 #if CONFIG_VP9_HIGHBITDEPTH
519         const int diff =
520             bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
521 #else
522         const int diff = dst[j] - ref[j];
523 #endif
524         const uint32_t error = diff * diff;
525         ASSERT_EQ(0u, error)
526             << "Error: 8x8 IDCT has error " << error << " at index " << j;
527       }
528     }
529   }
530   int pitch_;
531   int tx_type_;
532   FhtFunc fwd_txfm_ref;
533   vpx_bit_depth_t bit_depth_;
534   int mask_;
535 };
536 #if defined(_MSC_VER) && defined(_M_ARM64) && !defined(__clang__)
537 #pragma optimize("", on)
538 #endif
539 
540 class FwdTrans8x8DCT : public FwdTrans8x8TestBase,
541                        public ::testing::TestWithParam<Dct8x8Param> {
542  public:
543   ~FwdTrans8x8DCT() override = default;
544 
SetUp()545   void SetUp() override {
546     fwd_txfm_ = GET_PARAM(0);
547     inv_txfm_ = GET_PARAM(1);
548     tx_type_ = GET_PARAM(2);
549     pitch_ = 8;
550     fwd_txfm_ref = fdct8x8_ref;
551     bit_depth_ = GET_PARAM(3);
552     mask_ = (1 << bit_depth_) - 1;
553   }
554 
TearDown()555   void TearDown() override { libvpx_test::ClearSystemState(); }
556 
557  protected:
RunFwdTxfm(int16_t * in,tran_low_t * out,int stride)558   void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) override {
559     fwd_txfm_(in, out, stride);
560   }
RunInvTxfm(tran_low_t * out,uint8_t * dst,int stride)561   void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) override {
562     inv_txfm_(out, dst, stride);
563   }
564 
565   FdctFunc fwd_txfm_;
566   IdctFunc inv_txfm_;
567 };
568 
TEST_P(FwdTrans8x8DCT,SignBiasCheck)569 TEST_P(FwdTrans8x8DCT, SignBiasCheck) { RunSignBiasCheck(); }
570 
TEST_P(FwdTrans8x8DCT,RoundTripErrorCheck)571 TEST_P(FwdTrans8x8DCT, RoundTripErrorCheck) { RunRoundTripErrorCheck(); }
572 
TEST_P(FwdTrans8x8DCT,ExtremalCheck)573 TEST_P(FwdTrans8x8DCT, ExtremalCheck) { RunExtremalCheck(); }
574 
TEST_P(FwdTrans8x8DCT,FwdAccuracyCheck)575 TEST_P(FwdTrans8x8DCT, FwdAccuracyCheck) { RunFwdAccuracyCheck(); }
576 
TEST_P(FwdTrans8x8DCT,InvAccuracyCheck)577 TEST_P(FwdTrans8x8DCT, InvAccuracyCheck) { RunInvAccuracyCheck(); }
578 
579 class FwdTrans8x8HT : public FwdTrans8x8TestBase,
580                       public ::testing::TestWithParam<Ht8x8Param> {
581  public:
582   ~FwdTrans8x8HT() override = default;
583 
SetUp()584   void SetUp() override {
585     fwd_txfm_ = GET_PARAM(0);
586     inv_txfm_ = GET_PARAM(1);
587     tx_type_ = GET_PARAM(2);
588     pitch_ = 8;
589     fwd_txfm_ref = fht8x8_ref;
590     bit_depth_ = GET_PARAM(3);
591     mask_ = (1 << bit_depth_) - 1;
592   }
593 
TearDown()594   void TearDown() override { libvpx_test::ClearSystemState(); }
595 
596  protected:
RunFwdTxfm(int16_t * in,tran_low_t * out,int stride)597   void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) override {
598     fwd_txfm_(in, out, stride, tx_type_);
599   }
RunInvTxfm(tran_low_t * out,uint8_t * dst,int stride)600   void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) override {
601     inv_txfm_(out, dst, stride, tx_type_);
602   }
603 
604   FhtFunc fwd_txfm_;
605   IhtFunc inv_txfm_;
606 };
607 
TEST_P(FwdTrans8x8HT,SignBiasCheck)608 TEST_P(FwdTrans8x8HT, SignBiasCheck) { RunSignBiasCheck(); }
609 
TEST_P(FwdTrans8x8HT,RoundTripErrorCheck)610 TEST_P(FwdTrans8x8HT, RoundTripErrorCheck) { RunRoundTripErrorCheck(); }
611 
TEST_P(FwdTrans8x8HT,ExtremalCheck)612 TEST_P(FwdTrans8x8HT, ExtremalCheck) { RunExtremalCheck(); }
613 
614 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
615 class InvTrans8x8DCT : public FwdTrans8x8TestBase,
616                        public ::testing::TestWithParam<Idct8x8Param> {
617  public:
618   ~InvTrans8x8DCT() override = default;
619 
SetUp()620   void SetUp() override {
621     ref_txfm_ = GET_PARAM(0);
622     inv_txfm_ = GET_PARAM(1);
623     thresh_ = GET_PARAM(2);
624     pitch_ = 8;
625     bit_depth_ = GET_PARAM(3);
626     mask_ = (1 << bit_depth_) - 1;
627   }
628 
TearDown()629   void TearDown() override { libvpx_test::ClearSystemState(); }
630 
631  protected:
RunInvTxfm(tran_low_t * out,uint8_t * dst,int stride)632   void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) override {
633     inv_txfm_(out, dst, stride);
634   }
RunFwdTxfm(int16_t *,tran_low_t *,int)635   void RunFwdTxfm(int16_t * /*out*/, tran_low_t * /*dst*/,
636                   int /*stride*/) override {}
637 
638   IdctFunc ref_txfm_;
639   IdctFunc inv_txfm_;
640   int thresh_;
641 };
642 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(InvTrans8x8DCT);
643 
TEST_P(InvTrans8x8DCT,CompareReference)644 TEST_P(InvTrans8x8DCT, CompareReference) {
645   CompareInvReference(ref_txfm_, thresh_);
646 }
647 #endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
648 
649 using std::make_tuple;
650 
651 #if CONFIG_VP9_HIGHBITDEPTH
652 INSTANTIATE_TEST_SUITE_P(
653     C, FwdTrans8x8DCT,
654     ::testing::Values(
655         make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8),
656         make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_10, 0, VPX_BITS_10),
657         make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_12, 0, VPX_BITS_12)));
658 #else
659 INSTANTIATE_TEST_SUITE_P(C, FwdTrans8x8DCT,
660                          ::testing::Values(make_tuple(&vpx_fdct8x8_c,
661                                                       &vpx_idct8x8_64_add_c, 0,
662                                                       VPX_BITS_8)));
663 #endif  // CONFIG_VP9_HIGHBITDEPTH
664 
665 #if CONFIG_VP9_HIGHBITDEPTH
666 INSTANTIATE_TEST_SUITE_P(
667     C, FwdTrans8x8HT,
668     ::testing::Values(
669         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
670         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 0, VPX_BITS_10),
671         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 1, VPX_BITS_10),
672         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 2, VPX_BITS_10),
673         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 3, VPX_BITS_10),
674         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 0, VPX_BITS_12),
675         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 1, VPX_BITS_12),
676         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 2, VPX_BITS_12),
677         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 3, VPX_BITS_12),
678         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
679         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
680         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
681 #else
682 INSTANTIATE_TEST_SUITE_P(
683     C, FwdTrans8x8HT,
684     ::testing::Values(
685         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
686         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
687         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
688         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
689 #endif  // CONFIG_VP9_HIGHBITDEPTH
690 
691 #if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
692 INSTANTIATE_TEST_SUITE_P(NEON, FwdTrans8x8DCT,
693                          ::testing::Values(make_tuple(&vpx_fdct8x8_neon,
694                                                       &vpx_idct8x8_64_add_neon,
695                                                       0, VPX_BITS_8)));
696 
697 #if !CONFIG_VP9_HIGHBITDEPTH
698 INSTANTIATE_TEST_SUITE_P(
699     NEON, FwdTrans8x8HT,
700     ::testing::Values(
701         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 0, VPX_BITS_8),
702         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 1, VPX_BITS_8),
703         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 2, VPX_BITS_8),
704         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 3, VPX_BITS_8)));
705 #endif  // !CONFIG_VP9_HIGHBITDEPTH
706 #endif  // HAVE_NEON && !CONFIG_EMULATE_HARDWARE
707 
708 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
709 INSTANTIATE_TEST_SUITE_P(SSE2, FwdTrans8x8DCT,
710                          ::testing::Values(make_tuple(&vpx_fdct8x8_sse2,
711                                                       &vpx_idct8x8_64_add_sse2,
712                                                       0, VPX_BITS_8)));
713 INSTANTIATE_TEST_SUITE_P(
714     SSE2, FwdTrans8x8HT,
715     ::testing::Values(
716         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 0, VPX_BITS_8),
717         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 1, VPX_BITS_8),
718         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 2, VPX_BITS_8),
719         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3, VPX_BITS_8)));
720 #endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
721 
722 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
723 INSTANTIATE_TEST_SUITE_P(
724     SSE2, FwdTrans8x8DCT,
725     ::testing::Values(make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_c, 0,
726                                  VPX_BITS_8),
727                       make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_64_add_10_sse2,
728                                  12, VPX_BITS_10),
729                       make_tuple(&vpx_highbd_fdct8x8_sse2,
730                                  &idct8x8_64_add_10_sse2, 12, VPX_BITS_10),
731                       make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_64_add_12_sse2,
732                                  12, VPX_BITS_12),
733                       make_tuple(&vpx_highbd_fdct8x8_sse2,
734                                  &idct8x8_64_add_12_sse2, 12, VPX_BITS_12)));
735 
736 INSTANTIATE_TEST_SUITE_P(
737     SSE2, FwdTrans8x8HT,
738     ::testing::Values(
739         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
740         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
741         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
742         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
743 
744 // Optimizations take effect at a threshold of 6201, so we use a value close to
745 // that to test both branches.
746 INSTANTIATE_TEST_SUITE_P(
747     SSE2, InvTrans8x8DCT,
748     ::testing::Values(
749         make_tuple(&idct8x8_12_add_10_c, &idct8x8_12_add_10_sse2, 6225,
750                    VPX_BITS_10),
751         make_tuple(&idct8x8_10, &idct8x8_64_add_10_sse2, 6225, VPX_BITS_10),
752         make_tuple(&idct8x8_12_add_12_c, &idct8x8_12_add_12_sse2, 6225,
753                    VPX_BITS_12),
754         make_tuple(&idct8x8_12, &idct8x8_64_add_12_sse2, 6225, VPX_BITS_12)));
755 #endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
756 
757 #if HAVE_SSSE3 && VPX_ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
758     !CONFIG_EMULATE_HARDWARE
759 INSTANTIATE_TEST_SUITE_P(SSSE3, FwdTrans8x8DCT,
760                          ::testing::Values(make_tuple(&vpx_fdct8x8_ssse3,
761                                                       &vpx_idct8x8_64_add_sse2,
762                                                       0, VPX_BITS_8)));
763 #endif
764 
765 #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
766 INSTANTIATE_TEST_SUITE_P(MSA, FwdTrans8x8DCT,
767                          ::testing::Values(make_tuple(&vpx_fdct8x8_msa,
768                                                       &vpx_idct8x8_64_add_msa,
769                                                       0, VPX_BITS_8)));
770 INSTANTIATE_TEST_SUITE_P(
771     MSA, FwdTrans8x8HT,
772     ::testing::Values(
773         make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 0, VPX_BITS_8),
774         make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 1, VPX_BITS_8),
775         make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 2, VPX_BITS_8),
776         make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 3, VPX_BITS_8)));
777 #endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
778 
779 #if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
780 INSTANTIATE_TEST_SUITE_P(VSX, FwdTrans8x8DCT,
781                          ::testing::Values(make_tuple(&vpx_fdct8x8_c,
782                                                       &vpx_idct8x8_64_add_vsx,
783                                                       0, VPX_BITS_8)));
784 #endif  // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
785 
786 #if HAVE_LSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
787 INSTANTIATE_TEST_SUITE_P(LSX, FwdTrans8x8DCT,
788                          ::testing::Values(make_tuple(&vpx_fdct8x8_lsx,
789                                                       &vpx_idct8x8_64_add_c, 0,
790                                                       VPX_BITS_8)));
791 #endif  // HAVE_LSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
792 }  // namespace
793