1 /*
2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <math.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <tuple>
15
16 #include "gtest/gtest.h"
17
18 #include "./vp9_rtcd.h"
19 #include "./vpx_dsp_rtcd.h"
20 #include "test/acm_random.h"
21 #include "test/clear_system_state.h"
22 #include "test/register_state_check.h"
23 #include "test/util.h"
24 #include "vp9/common/vp9_entropy.h"
25 #include "vp9/common/vp9_scan.h"
26 #include "vpx_config.h"
27 #include "vpx/vpx_codec.h"
28 #include "vpx/vpx_integer.h"
29 #include "vpx_ports/mem.h"
30
31 using libvpx_test::ACMRandom;
32
33 namespace {
34
35 const int kNumCoeffs = 64;
36 const double kPi = 3.141592653589793238462643383279502884;
37
38 const int kSignBiasMaxDiff255 = 1500;
39 const int kSignBiasMaxDiff15 = 10000;
40
41 typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
42 typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
43 typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
44 int tx_type);
45 typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
46 int tx_type);
47
48 typedef std::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct8x8Param;
49 typedef std::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht8x8Param;
50 typedef std::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t> Idct8x8Param;
51
reference_8x8_dct_1d(const double in[8],double out[8])52 void reference_8x8_dct_1d(const double in[8], double out[8]) {
53 const double kInvSqrt2 = 0.707106781186547524400844362104;
54 for (int k = 0; k < 8; k++) {
55 out[k] = 0.0;
56 for (int n = 0; n < 8; n++) {
57 out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 16.0);
58 }
59 if (k == 0) out[k] = out[k] * kInvSqrt2;
60 }
61 }
62
reference_8x8_dct_2d(const int16_t input[kNumCoeffs],double output[kNumCoeffs])63 void reference_8x8_dct_2d(const int16_t input[kNumCoeffs],
64 double output[kNumCoeffs]) {
65 // First transform columns
66 for (int i = 0; i < 8; ++i) {
67 double temp_in[8], temp_out[8];
68 for (int j = 0; j < 8; ++j) temp_in[j] = input[j * 8 + i];
69 reference_8x8_dct_1d(temp_in, temp_out);
70 for (int j = 0; j < 8; ++j) output[j * 8 + i] = temp_out[j];
71 }
72 // Then transform rows
73 for (int i = 0; i < 8; ++i) {
74 double temp_in[8], temp_out[8];
75 for (int j = 0; j < 8; ++j) temp_in[j] = output[j + i * 8];
76 reference_8x8_dct_1d(temp_in, temp_out);
77 // Scale by some magic number
78 for (int j = 0; j < 8; ++j) output[j + i * 8] = temp_out[j] * 2;
79 }
80 }
81
fdct8x8_ref(const int16_t * in,tran_low_t * out,int stride,int)82 void fdct8x8_ref(const int16_t *in, tran_low_t *out, int stride,
83 int /*tx_type*/) {
84 vpx_fdct8x8_c(in, out, stride);
85 }
86
fht8x8_ref(const int16_t * in,tran_low_t * out,int stride,int tx_type)87 void fht8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
88 vp9_fht8x8_c(in, out, stride, tx_type);
89 }
90
91 #if CONFIG_VP9_HIGHBITDEPTH
idct8x8_10(const tran_low_t * in,uint8_t * out,int stride)92 void idct8x8_10(const tran_low_t *in, uint8_t *out, int stride) {
93 vpx_highbd_idct8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
94 }
95
idct8x8_12(const tran_low_t * in,uint8_t * out,int stride)96 void idct8x8_12(const tran_low_t *in, uint8_t *out, int stride) {
97 vpx_highbd_idct8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
98 }
99
iht8x8_10(const tran_low_t * in,uint8_t * out,int stride,int tx_type)100 void iht8x8_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
101 vp9_highbd_iht8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 10);
102 }
103
iht8x8_12(const tran_low_t * in,uint8_t * out,int stride,int tx_type)104 void iht8x8_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
105 vp9_highbd_iht8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 12);
106 }
107
108 #if HAVE_SSE2
109
idct8x8_12_add_10_c(const tran_low_t * in,uint8_t * out,int stride)110 void idct8x8_12_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
111 vpx_highbd_idct8x8_12_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
112 }
113
idct8x8_12_add_12_c(const tran_low_t * in,uint8_t * out,int stride)114 void idct8x8_12_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
115 vpx_highbd_idct8x8_12_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
116 }
117
idct8x8_12_add_10_sse2(const tran_low_t * in,uint8_t * out,int stride)118 void idct8x8_12_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
119 vpx_highbd_idct8x8_12_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10);
120 }
121
idct8x8_12_add_12_sse2(const tran_low_t * in,uint8_t * out,int stride)122 void idct8x8_12_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
123 vpx_highbd_idct8x8_12_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12);
124 }
125
idct8x8_64_add_10_sse2(const tran_low_t * in,uint8_t * out,int stride)126 void idct8x8_64_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
127 vpx_highbd_idct8x8_64_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10);
128 }
129
idct8x8_64_add_12_sse2(const tran_low_t * in,uint8_t * out,int stride)130 void idct8x8_64_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
131 vpx_highbd_idct8x8_64_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12);
132 }
133 #endif // HAVE_SSE2
134 #endif // CONFIG_VP9_HIGHBITDEPTH
135
136 // Visual Studio 2022 (cl.exe) targeting AArch64 with optimizations enabled
137 // produces invalid code in RunExtremalCheck() and RunInvAccuracyCheck().
138 // See:
139 // https://developercommunity.visualstudio.com/t/1770-preview-1:-Misoptimization-for-AR/10369786
140 // TODO(jzern): check the compiler version after a fix for the issue is
141 // released.
142 #if defined(_MSC_VER) && defined(_M_ARM64) && !defined(__clang__)
143 #pragma optimize("", off)
144 #endif
145 class FwdTrans8x8TestBase {
146 public:
147 virtual ~FwdTrans8x8TestBase() = default;
148
149 protected:
150 virtual void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) = 0;
151 virtual void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) = 0;
152
RunSignBiasCheck()153 void RunSignBiasCheck() {
154 ACMRandom rnd(ACMRandom::DeterministicSeed());
155 DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
156 DECLARE_ALIGNED(16, tran_low_t, test_output_block[64]);
157 int count_sign_block[64][2];
158 const int count_test_block = 100000;
159
160 memset(count_sign_block, 0, sizeof(count_sign_block));
161
162 for (int i = 0; i < count_test_block; ++i) {
163 // Initialize a test block with input range [-255, 255].
164 for (int j = 0; j < 64; ++j) {
165 test_input_block[j] = ((rnd.Rand16() >> (16 - bit_depth_)) & mask_) -
166 ((rnd.Rand16() >> (16 - bit_depth_)) & mask_);
167 }
168 ASM_REGISTER_STATE_CHECK(
169 RunFwdTxfm(test_input_block, test_output_block, pitch_));
170
171 for (int j = 0; j < 64; ++j) {
172 if (test_output_block[j] < 0) {
173 ++count_sign_block[j][0];
174 } else if (test_output_block[j] > 0) {
175 ++count_sign_block[j][1];
176 }
177 }
178 }
179
180 for (int j = 0; j < 64; ++j) {
181 const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
182 const int max_diff = kSignBiasMaxDiff255;
183 ASSERT_LT(diff, max_diff << (bit_depth_ - 8))
184 << "Error: 8x8 FDCT/FHT has a sign bias > "
185 << 1. * max_diff / count_test_block * 100 << "%"
186 << " for input range [-255, 255] at index " << j
187 << " count0: " << count_sign_block[j][0]
188 << " count1: " << count_sign_block[j][1] << " diff: " << diff;
189 }
190
191 memset(count_sign_block, 0, sizeof(count_sign_block));
192
193 for (int i = 0; i < count_test_block; ++i) {
194 // Initialize a test block with input range [-mask_ / 16, mask_ / 16].
195 for (int j = 0; j < 64; ++j) {
196 test_input_block[j] =
197 ((rnd.Rand16() & mask_) >> 4) - ((rnd.Rand16() & mask_) >> 4);
198 }
199 ASM_REGISTER_STATE_CHECK(
200 RunFwdTxfm(test_input_block, test_output_block, pitch_));
201
202 for (int j = 0; j < 64; ++j) {
203 if (test_output_block[j] < 0) {
204 ++count_sign_block[j][0];
205 } else if (test_output_block[j] > 0) {
206 ++count_sign_block[j][1];
207 }
208 }
209 }
210
211 for (int j = 0; j < 64; ++j) {
212 const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
213 const int max_diff = kSignBiasMaxDiff15;
214 ASSERT_LT(diff, max_diff << (bit_depth_ - 8))
215 << "Error: 8x8 FDCT/FHT has a sign bias > "
216 << 1. * max_diff / count_test_block * 100 << "%"
217 << " for input range [-15, 15] at index " << j
218 << " count0: " << count_sign_block[j][0]
219 << " count1: " << count_sign_block[j][1] << " diff: " << diff;
220 }
221 }
222
RunRoundTripErrorCheck()223 void RunRoundTripErrorCheck() {
224 ACMRandom rnd(ACMRandom::DeterministicSeed());
225 int max_error = 0;
226 int total_error = 0;
227 const int count_test_block = 100000;
228 DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
229 DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
230 DECLARE_ALIGNED(16, uint8_t, dst[64]);
231 DECLARE_ALIGNED(16, uint8_t, src[64]);
232 #if CONFIG_VP9_HIGHBITDEPTH
233 DECLARE_ALIGNED(16, uint16_t, dst16[64]);
234 DECLARE_ALIGNED(16, uint16_t, src16[64]);
235 #endif
236
237 for (int i = 0; i < count_test_block; ++i) {
238 // Initialize a test block with input range [-mask_, mask_].
239 for (int j = 0; j < 64; ++j) {
240 if (bit_depth_ == VPX_BITS_8) {
241 src[j] = rnd.Rand8();
242 dst[j] = rnd.Rand8();
243 test_input_block[j] = src[j] - dst[j];
244 #if CONFIG_VP9_HIGHBITDEPTH
245 } else {
246 src16[j] = rnd.Rand16() & mask_;
247 dst16[j] = rnd.Rand16() & mask_;
248 test_input_block[j] = src16[j] - dst16[j];
249 #endif
250 }
251 }
252
253 ASM_REGISTER_STATE_CHECK(
254 RunFwdTxfm(test_input_block, test_temp_block, pitch_));
255 for (int j = 0; j < 64; ++j) {
256 if (test_temp_block[j] > 0) {
257 test_temp_block[j] += 2;
258 test_temp_block[j] /= 4;
259 test_temp_block[j] *= 4;
260 } else {
261 test_temp_block[j] -= 2;
262 test_temp_block[j] /= 4;
263 test_temp_block[j] *= 4;
264 }
265 }
266 if (bit_depth_ == VPX_BITS_8) {
267 ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
268 #if CONFIG_VP9_HIGHBITDEPTH
269 } else {
270 ASM_REGISTER_STATE_CHECK(
271 RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
272 #endif
273 }
274
275 for (int j = 0; j < 64; ++j) {
276 #if CONFIG_VP9_HIGHBITDEPTH
277 const int diff =
278 bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
279 #else
280 const int diff = dst[j] - src[j];
281 #endif
282 const int error = diff * diff;
283 if (max_error < error) max_error = error;
284 total_error += error;
285 }
286 }
287
288 ASSERT_GE(1 << 2 * (bit_depth_ - 8), max_error)
289 << "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual"
290 << " roundtrip error > 1";
291
292 ASSERT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error)
293 << "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip "
294 << "error > 1/5 per block";
295 }
296
RunExtremalCheck()297 void RunExtremalCheck() {
298 ACMRandom rnd(ACMRandom::DeterministicSeed());
299 int max_error = 0;
300 int total_error = 0;
301 int total_coeff_error = 0;
302 const int count_test_block = 100000;
303 DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
304 DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
305 DECLARE_ALIGNED(16, tran_low_t, ref_temp_block[64]);
306 DECLARE_ALIGNED(16, uint8_t, dst[64]);
307 DECLARE_ALIGNED(16, uint8_t, src[64]);
308 #if CONFIG_VP9_HIGHBITDEPTH
309 DECLARE_ALIGNED(16, uint16_t, dst16[64]);
310 DECLARE_ALIGNED(16, uint16_t, src16[64]);
311 #endif
312
313 for (int i = 0; i < count_test_block; ++i) {
314 // Initialize a test block with input range [-mask_, mask_].
315 for (int j = 0; j < 64; ++j) {
316 if (bit_depth_ == VPX_BITS_8) {
317 if (i == 0) {
318 src[j] = 255;
319 dst[j] = 0;
320 } else if (i == 1) {
321 src[j] = 0;
322 dst[j] = 255;
323 } else {
324 src[j] = rnd.Rand8() % 2 ? 255 : 0;
325 dst[j] = rnd.Rand8() % 2 ? 255 : 0;
326 }
327 test_input_block[j] = src[j] - dst[j];
328 #if CONFIG_VP9_HIGHBITDEPTH
329 } else {
330 if (i == 0) {
331 src16[j] = mask_;
332 dst16[j] = 0;
333 } else if (i == 1) {
334 src16[j] = 0;
335 dst16[j] = mask_;
336 } else {
337 src16[j] = rnd.Rand8() % 2 ? mask_ : 0;
338 dst16[j] = rnd.Rand8() % 2 ? mask_ : 0;
339 }
340 test_input_block[j] = src16[j] - dst16[j];
341 #endif
342 }
343 }
344
345 ASM_REGISTER_STATE_CHECK(
346 RunFwdTxfm(test_input_block, test_temp_block, pitch_));
347 ASM_REGISTER_STATE_CHECK(
348 fwd_txfm_ref(test_input_block, ref_temp_block, pitch_, tx_type_));
349 if (bit_depth_ == VPX_BITS_8) {
350 ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
351 #if CONFIG_VP9_HIGHBITDEPTH
352 } else {
353 ASM_REGISTER_STATE_CHECK(
354 RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
355 #endif
356 }
357
358 for (int j = 0; j < 64; ++j) {
359 #if CONFIG_VP9_HIGHBITDEPTH
360 const int diff =
361 bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
362 #else
363 const int diff = dst[j] - src[j];
364 #endif
365 const int error = diff * diff;
366 if (max_error < error) max_error = error;
367 total_error += error;
368
369 const int coeff_diff = test_temp_block[j] - ref_temp_block[j];
370 total_coeff_error += abs(coeff_diff);
371 }
372
373 ASSERT_GE(1 << 2 * (bit_depth_ - 8), max_error)
374 << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has"
375 << " an individual roundtrip error > 1";
376
377 ASSERT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error)
378 << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has average"
379 << " roundtrip error > 1/5 per block";
380
381 ASSERT_EQ(0, total_coeff_error)
382 << "Error: Extremal 8x8 FDCT/FHT has"
383 << " overflow issues in the intermediate steps > 1";
384 }
385 }
386
RunInvAccuracyCheck()387 void RunInvAccuracyCheck() {
388 ACMRandom rnd(ACMRandom::DeterministicSeed());
389 const int count_test_block = 1000;
390 DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
391 DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
392 DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
393 DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
394 #if CONFIG_VP9_HIGHBITDEPTH
395 DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
396 DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
397 #endif
398
399 for (int i = 0; i < count_test_block; ++i) {
400 double out_r[kNumCoeffs];
401
402 // Initialize a test block with input range [-255, 255].
403 for (int j = 0; j < kNumCoeffs; ++j) {
404 if (bit_depth_ == VPX_BITS_8) {
405 src[j] = rnd.Rand8() % 2 ? 255 : 0;
406 dst[j] = src[j] > 0 ? 0 : 255;
407 in[j] = src[j] - dst[j];
408 #if CONFIG_VP9_HIGHBITDEPTH
409 } else {
410 src16[j] = rnd.Rand8() % 2 ? mask_ : 0;
411 dst16[j] = src16[j] > 0 ? 0 : mask_;
412 in[j] = src16[j] - dst16[j];
413 #endif
414 }
415 }
416
417 reference_8x8_dct_2d(in, out_r);
418 for (int j = 0; j < kNumCoeffs; ++j) {
419 coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
420 }
421
422 if (bit_depth_ == VPX_BITS_8) {
423 ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
424 #if CONFIG_VP9_HIGHBITDEPTH
425 } else {
426 ASM_REGISTER_STATE_CHECK(
427 RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
428 #endif
429 }
430
431 for (int j = 0; j < kNumCoeffs; ++j) {
432 #if CONFIG_VP9_HIGHBITDEPTH
433 const int diff =
434 bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
435 #else
436 const int diff = dst[j] - src[j];
437 #endif
438 const uint32_t error = diff * diff;
439 ASSERT_GE(1u << 2 * (bit_depth_ - 8), error)
440 << "Error: 8x8 IDCT has error " << error << " at index " << j;
441 }
442 }
443 }
444
RunFwdAccuracyCheck()445 void RunFwdAccuracyCheck() {
446 ACMRandom rnd(ACMRandom::DeterministicSeed());
447 const int count_test_block = 1000;
448 DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
449 DECLARE_ALIGNED(16, tran_low_t, coeff_r[kNumCoeffs]);
450 DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
451
452 for (int i = 0; i < count_test_block; ++i) {
453 double out_r[kNumCoeffs];
454
455 // Initialize a test block with input range [-mask_, mask_].
456 for (int j = 0; j < kNumCoeffs; ++j) {
457 in[j] = rnd.Rand8() % 2 == 0 ? mask_ : -mask_;
458 }
459
460 RunFwdTxfm(in, coeff, pitch_);
461 reference_8x8_dct_2d(in, out_r);
462 for (int j = 0; j < kNumCoeffs; ++j) {
463 coeff_r[j] = static_cast<tran_low_t>(round(out_r[j]));
464 }
465
466 for (int j = 0; j < kNumCoeffs; ++j) {
467 const int32_t diff = coeff[j] - coeff_r[j];
468 const uint32_t error = diff * diff;
469 ASSERT_GE(9u << 2 * (bit_depth_ - 8), error)
470 << "Error: 8x8 DCT has error " << error << " at index " << j;
471 }
472 }
473 }
474
CompareInvReference(IdctFunc ref_txfm,int thresh)475 void CompareInvReference(IdctFunc ref_txfm, int thresh) {
476 ACMRandom rnd(ACMRandom::DeterministicSeed());
477 const int count_test_block = 10000;
478 const int eob = 12;
479 DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
480 DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
481 DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
482 #if CONFIG_VP9_HIGHBITDEPTH
483 DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
484 DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
485 #endif
486 const int16_t *scan = vp9_default_scan_orders[TX_8X8].scan;
487
488 for (int i = 0; i < count_test_block; ++i) {
489 for (int j = 0; j < kNumCoeffs; ++j) {
490 if (j < eob) {
491 // Random values less than the threshold, either positive or negative
492 coeff[scan[j]] = rnd(thresh) * (1 - 2 * (i % 2));
493 } else {
494 coeff[scan[j]] = 0;
495 }
496 if (bit_depth_ == VPX_BITS_8) {
497 dst[j] = 0;
498 ref[j] = 0;
499 #if CONFIG_VP9_HIGHBITDEPTH
500 } else {
501 dst16[j] = 0;
502 ref16[j] = 0;
503 #endif
504 }
505 }
506 if (bit_depth_ == VPX_BITS_8) {
507 ref_txfm(coeff, ref, pitch_);
508 ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
509 #if CONFIG_VP9_HIGHBITDEPTH
510 } else {
511 ref_txfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_);
512 ASM_REGISTER_STATE_CHECK(
513 RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
514 #endif
515 }
516
517 for (int j = 0; j < kNumCoeffs; ++j) {
518 #if CONFIG_VP9_HIGHBITDEPTH
519 const int diff =
520 bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
521 #else
522 const int diff = dst[j] - ref[j];
523 #endif
524 const uint32_t error = diff * diff;
525 ASSERT_EQ(0u, error)
526 << "Error: 8x8 IDCT has error " << error << " at index " << j;
527 }
528 }
529 }
530 int pitch_;
531 int tx_type_;
532 FhtFunc fwd_txfm_ref;
533 vpx_bit_depth_t bit_depth_;
534 int mask_;
535 };
536 #if defined(_MSC_VER) && defined(_M_ARM64) && !defined(__clang__)
537 #pragma optimize("", on)
538 #endif
539
540 class FwdTrans8x8DCT : public FwdTrans8x8TestBase,
541 public ::testing::TestWithParam<Dct8x8Param> {
542 public:
543 ~FwdTrans8x8DCT() override = default;
544
SetUp()545 void SetUp() override {
546 fwd_txfm_ = GET_PARAM(0);
547 inv_txfm_ = GET_PARAM(1);
548 tx_type_ = GET_PARAM(2);
549 pitch_ = 8;
550 fwd_txfm_ref = fdct8x8_ref;
551 bit_depth_ = GET_PARAM(3);
552 mask_ = (1 << bit_depth_) - 1;
553 }
554
TearDown()555 void TearDown() override { libvpx_test::ClearSystemState(); }
556
557 protected:
RunFwdTxfm(int16_t * in,tran_low_t * out,int stride)558 void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) override {
559 fwd_txfm_(in, out, stride);
560 }
RunInvTxfm(tran_low_t * out,uint8_t * dst,int stride)561 void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) override {
562 inv_txfm_(out, dst, stride);
563 }
564
565 FdctFunc fwd_txfm_;
566 IdctFunc inv_txfm_;
567 };
568
TEST_P(FwdTrans8x8DCT,SignBiasCheck)569 TEST_P(FwdTrans8x8DCT, SignBiasCheck) { RunSignBiasCheck(); }
570
TEST_P(FwdTrans8x8DCT,RoundTripErrorCheck)571 TEST_P(FwdTrans8x8DCT, RoundTripErrorCheck) { RunRoundTripErrorCheck(); }
572
TEST_P(FwdTrans8x8DCT,ExtremalCheck)573 TEST_P(FwdTrans8x8DCT, ExtremalCheck) { RunExtremalCheck(); }
574
TEST_P(FwdTrans8x8DCT,FwdAccuracyCheck)575 TEST_P(FwdTrans8x8DCT, FwdAccuracyCheck) { RunFwdAccuracyCheck(); }
576
TEST_P(FwdTrans8x8DCT,InvAccuracyCheck)577 TEST_P(FwdTrans8x8DCT, InvAccuracyCheck) { RunInvAccuracyCheck(); }
578
579 class FwdTrans8x8HT : public FwdTrans8x8TestBase,
580 public ::testing::TestWithParam<Ht8x8Param> {
581 public:
582 ~FwdTrans8x8HT() override = default;
583
SetUp()584 void SetUp() override {
585 fwd_txfm_ = GET_PARAM(0);
586 inv_txfm_ = GET_PARAM(1);
587 tx_type_ = GET_PARAM(2);
588 pitch_ = 8;
589 fwd_txfm_ref = fht8x8_ref;
590 bit_depth_ = GET_PARAM(3);
591 mask_ = (1 << bit_depth_) - 1;
592 }
593
TearDown()594 void TearDown() override { libvpx_test::ClearSystemState(); }
595
596 protected:
RunFwdTxfm(int16_t * in,tran_low_t * out,int stride)597 void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) override {
598 fwd_txfm_(in, out, stride, tx_type_);
599 }
RunInvTxfm(tran_low_t * out,uint8_t * dst,int stride)600 void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) override {
601 inv_txfm_(out, dst, stride, tx_type_);
602 }
603
604 FhtFunc fwd_txfm_;
605 IhtFunc inv_txfm_;
606 };
607
TEST_P(FwdTrans8x8HT,SignBiasCheck)608 TEST_P(FwdTrans8x8HT, SignBiasCheck) { RunSignBiasCheck(); }
609
TEST_P(FwdTrans8x8HT,RoundTripErrorCheck)610 TEST_P(FwdTrans8x8HT, RoundTripErrorCheck) { RunRoundTripErrorCheck(); }
611
TEST_P(FwdTrans8x8HT,ExtremalCheck)612 TEST_P(FwdTrans8x8HT, ExtremalCheck) { RunExtremalCheck(); }
613
614 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
615 class InvTrans8x8DCT : public FwdTrans8x8TestBase,
616 public ::testing::TestWithParam<Idct8x8Param> {
617 public:
618 ~InvTrans8x8DCT() override = default;
619
SetUp()620 void SetUp() override {
621 ref_txfm_ = GET_PARAM(0);
622 inv_txfm_ = GET_PARAM(1);
623 thresh_ = GET_PARAM(2);
624 pitch_ = 8;
625 bit_depth_ = GET_PARAM(3);
626 mask_ = (1 << bit_depth_) - 1;
627 }
628
TearDown()629 void TearDown() override { libvpx_test::ClearSystemState(); }
630
631 protected:
RunInvTxfm(tran_low_t * out,uint8_t * dst,int stride)632 void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) override {
633 inv_txfm_(out, dst, stride);
634 }
RunFwdTxfm(int16_t *,tran_low_t *,int)635 void RunFwdTxfm(int16_t * /*out*/, tran_low_t * /*dst*/,
636 int /*stride*/) override {}
637
638 IdctFunc ref_txfm_;
639 IdctFunc inv_txfm_;
640 int thresh_;
641 };
642 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(InvTrans8x8DCT);
643
TEST_P(InvTrans8x8DCT,CompareReference)644 TEST_P(InvTrans8x8DCT, CompareReference) {
645 CompareInvReference(ref_txfm_, thresh_);
646 }
647 #endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
648
649 using std::make_tuple;
650
651 #if CONFIG_VP9_HIGHBITDEPTH
652 INSTANTIATE_TEST_SUITE_P(
653 C, FwdTrans8x8DCT,
654 ::testing::Values(
655 make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8),
656 make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_10, 0, VPX_BITS_10),
657 make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_12, 0, VPX_BITS_12)));
658 #else
659 INSTANTIATE_TEST_SUITE_P(C, FwdTrans8x8DCT,
660 ::testing::Values(make_tuple(&vpx_fdct8x8_c,
661 &vpx_idct8x8_64_add_c, 0,
662 VPX_BITS_8)));
663 #endif // CONFIG_VP9_HIGHBITDEPTH
664
665 #if CONFIG_VP9_HIGHBITDEPTH
666 INSTANTIATE_TEST_SUITE_P(
667 C, FwdTrans8x8HT,
668 ::testing::Values(
669 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
670 make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 0, VPX_BITS_10),
671 make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 1, VPX_BITS_10),
672 make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 2, VPX_BITS_10),
673 make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 3, VPX_BITS_10),
674 make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 0, VPX_BITS_12),
675 make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 1, VPX_BITS_12),
676 make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 2, VPX_BITS_12),
677 make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 3, VPX_BITS_12),
678 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
679 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
680 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
681 #else
682 INSTANTIATE_TEST_SUITE_P(
683 C, FwdTrans8x8HT,
684 ::testing::Values(
685 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
686 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
687 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
688 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
689 #endif // CONFIG_VP9_HIGHBITDEPTH
690
691 #if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
692 INSTANTIATE_TEST_SUITE_P(NEON, FwdTrans8x8DCT,
693 ::testing::Values(make_tuple(&vpx_fdct8x8_neon,
694 &vpx_idct8x8_64_add_neon,
695 0, VPX_BITS_8)));
696
697 #if !CONFIG_VP9_HIGHBITDEPTH
698 INSTANTIATE_TEST_SUITE_P(
699 NEON, FwdTrans8x8HT,
700 ::testing::Values(
701 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 0, VPX_BITS_8),
702 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 1, VPX_BITS_8),
703 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 2, VPX_BITS_8),
704 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 3, VPX_BITS_8)));
705 #endif // !CONFIG_VP9_HIGHBITDEPTH
706 #endif // HAVE_NEON && !CONFIG_EMULATE_HARDWARE
707
708 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
709 INSTANTIATE_TEST_SUITE_P(SSE2, FwdTrans8x8DCT,
710 ::testing::Values(make_tuple(&vpx_fdct8x8_sse2,
711 &vpx_idct8x8_64_add_sse2,
712 0, VPX_BITS_8)));
713 INSTANTIATE_TEST_SUITE_P(
714 SSE2, FwdTrans8x8HT,
715 ::testing::Values(
716 make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 0, VPX_BITS_8),
717 make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 1, VPX_BITS_8),
718 make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 2, VPX_BITS_8),
719 make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3, VPX_BITS_8)));
720 #endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
721
722 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
723 INSTANTIATE_TEST_SUITE_P(
724 SSE2, FwdTrans8x8DCT,
725 ::testing::Values(make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_c, 0,
726 VPX_BITS_8),
727 make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_64_add_10_sse2,
728 12, VPX_BITS_10),
729 make_tuple(&vpx_highbd_fdct8x8_sse2,
730 &idct8x8_64_add_10_sse2, 12, VPX_BITS_10),
731 make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_64_add_12_sse2,
732 12, VPX_BITS_12),
733 make_tuple(&vpx_highbd_fdct8x8_sse2,
734 &idct8x8_64_add_12_sse2, 12, VPX_BITS_12)));
735
736 INSTANTIATE_TEST_SUITE_P(
737 SSE2, FwdTrans8x8HT,
738 ::testing::Values(
739 make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
740 make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
741 make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
742 make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
743
744 // Optimizations take effect at a threshold of 6201, so we use a value close to
745 // that to test both branches.
746 INSTANTIATE_TEST_SUITE_P(
747 SSE2, InvTrans8x8DCT,
748 ::testing::Values(
749 make_tuple(&idct8x8_12_add_10_c, &idct8x8_12_add_10_sse2, 6225,
750 VPX_BITS_10),
751 make_tuple(&idct8x8_10, &idct8x8_64_add_10_sse2, 6225, VPX_BITS_10),
752 make_tuple(&idct8x8_12_add_12_c, &idct8x8_12_add_12_sse2, 6225,
753 VPX_BITS_12),
754 make_tuple(&idct8x8_12, &idct8x8_64_add_12_sse2, 6225, VPX_BITS_12)));
755 #endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
756
757 #if HAVE_SSSE3 && VPX_ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
758 !CONFIG_EMULATE_HARDWARE
759 INSTANTIATE_TEST_SUITE_P(SSSE3, FwdTrans8x8DCT,
760 ::testing::Values(make_tuple(&vpx_fdct8x8_ssse3,
761 &vpx_idct8x8_64_add_sse2,
762 0, VPX_BITS_8)));
763 #endif
764
765 #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
766 INSTANTIATE_TEST_SUITE_P(MSA, FwdTrans8x8DCT,
767 ::testing::Values(make_tuple(&vpx_fdct8x8_msa,
768 &vpx_idct8x8_64_add_msa,
769 0, VPX_BITS_8)));
770 INSTANTIATE_TEST_SUITE_P(
771 MSA, FwdTrans8x8HT,
772 ::testing::Values(
773 make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 0, VPX_BITS_8),
774 make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 1, VPX_BITS_8),
775 make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 2, VPX_BITS_8),
776 make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 3, VPX_BITS_8)));
777 #endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
778
779 #if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
780 INSTANTIATE_TEST_SUITE_P(VSX, FwdTrans8x8DCT,
781 ::testing::Values(make_tuple(&vpx_fdct8x8_c,
782 &vpx_idct8x8_64_add_vsx,
783 0, VPX_BITS_8)));
784 #endif // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
785
786 #if HAVE_LSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
787 INSTANTIATE_TEST_SUITE_P(LSX, FwdTrans8x8DCT,
788 ::testing::Values(make_tuple(&vpx_fdct8x8_lsx,
789 &vpx_idct8x8_64_add_c, 0,
790 VPX_BITS_8)));
791 #endif // HAVE_LSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
792 } // namespace
793