1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <math.h>
13 #include <stdlib.h>
14 #include <string.h>
15
16 #include "gtest/gtest.h"
17 #include "test/register_state_check.h"
18 #include "test/function_equivalence_test.h"
19
20 #include "config/aom_config.h"
21 #include "config/aom_dsp_rtcd.h"
22 #include "config/av1_rtcd.h"
23
24 #include "aom/aom_integer.h"
25
26 #include "av1/common/enums.h"
27
28 #include "aom_dsp/blend.h"
29
30 using libaom_test::FunctionEquivalenceTest;
31
32 namespace {
33
34 template <typename BlendA64Func, typename SrcPixel, typename DstPixel>
35 class BlendA64MaskTest : public FunctionEquivalenceTest<BlendA64Func> {
36 protected:
37 static const int kIterations = 10000;
38 static const int kMaxWidth = MAX_SB_SIZE * 5; // * 5 to cover longer strides
39 static const int kMaxHeight = MAX_SB_SIZE;
40 static const int kBufSize = kMaxWidth * kMaxHeight;
41 static const int kMaxMaskWidth = 2 * MAX_SB_SIZE;
42 static const int kMaxMaskSize = kMaxMaskWidth * kMaxMaskWidth;
43
44 ~BlendA64MaskTest() override = default;
45
46 virtual void Execute(const SrcPixel *p_src0, const SrcPixel *p_src1,
47 int run_times) = 0;
48
49 template <typename Pixel>
GetSources(Pixel ** src0,Pixel ** src1,Pixel *,int run_times)50 void GetSources(Pixel **src0, Pixel **src1, Pixel * /*dst*/, int run_times) {
51 if (run_times > 1) {
52 *src0 = src0_;
53 *src1 = src1_;
54 return;
55 }
56 switch (this->rng_(3)) {
57 case 0: // Separate sources
58 *src0 = src0_;
59 *src1 = src1_;
60 break;
61 case 1: // src0 == dst
62 *src0 = dst_tst_;
63 src0_stride_ = dst_stride_;
64 src0_offset_ = dst_offset_;
65 *src1 = src1_;
66 break;
67 case 2: // src1 == dst
68 *src0 = src0_;
69 *src1 = dst_tst_;
70 src1_stride_ = dst_stride_;
71 src1_offset_ = dst_offset_;
72 break;
73 default: FAIL();
74 }
75 }
76
GetSources(uint16_t ** src0,uint16_t ** src1,uint8_t *,int)77 void GetSources(uint16_t **src0, uint16_t **src1, uint8_t * /*dst*/,
78 int /*run_times*/) {
79 *src0 = src0_;
80 *src1 = src1_;
81 }
82
Rand1()83 uint8_t Rand1() { return this->rng_.Rand8() & 1; }
84
RunOneTest(int block_size,int subx,int suby,int run_times)85 void RunOneTest(int block_size, int subx, int suby, int run_times) {
86 w_ = block_size_wide[block_size];
87 h_ = block_size_high[block_size];
88 run_times = run_times > 1 ? run_times / w_ : 1;
89 ASSERT_GT(run_times, 0);
90 subx_ = subx;
91 suby_ = suby;
92
93 dst_offset_ = this->rng_(33);
94 dst_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
95
96 src0_offset_ = this->rng_(33);
97 src0_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
98
99 src1_offset_ = this->rng_(33);
100 src1_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
101
102 mask_stride_ =
103 this->rng_(kMaxWidth + 1 - w_ * (subx_ ? 2 : 1)) + w_ * (subx_ ? 2 : 1);
104
105 SrcPixel *p_src0;
106 SrcPixel *p_src1;
107
108 p_src0 = src0_;
109 p_src1 = src1_;
110
111 GetSources(&p_src0, &p_src1, &dst_ref_[0], run_times);
112
113 Execute(p_src0, p_src1, run_times);
114
115 for (int r = 0; r < h_; ++r) {
116 for (int c = 0; c < w_; ++c) {
117 ASSERT_EQ(dst_ref_[dst_offset_ + r * dst_stride_ + c],
118 dst_tst_[dst_offset_ + r * dst_stride_ + c])
119 << w_ << "x" << h_ << " subx " << subx_ << " suby " << suby_
120 << " r: " << r << " c: " << c;
121 }
122 }
123 }
124
RunTest(int block_size,int run_times)125 void RunTest(int block_size, int run_times) {
126 for (subx_ = 0; subx_ <= 1; subx_++) {
127 for (suby_ = 0; suby_ <= 1; suby_++) {
128 RunOneTest(block_size, subx_, suby_, run_times);
129 }
130 }
131 }
132
133 DstPixel dst_ref_[kBufSize];
134 DstPixel dst_tst_[kBufSize];
135 uint32_t dst_stride_;
136 uint32_t dst_offset_;
137
138 SrcPixel src0_[kBufSize];
139 uint32_t src0_stride_;
140 uint32_t src0_offset_;
141
142 SrcPixel src1_[kBufSize];
143 uint32_t src1_stride_;
144 uint32_t src1_offset_;
145
146 uint8_t mask_[kMaxMaskSize];
147 size_t mask_stride_;
148
149 int w_;
150 int h_;
151
152 int suby_;
153 int subx_;
154 };
155
156 //////////////////////////////////////////////////////////////////////////////
157 // 8 bit version
158 //////////////////////////////////////////////////////////////////////////////
159
160 typedef void (*F8B)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
161 uint32_t src0_stride, const uint8_t *src1,
162 uint32_t src1_stride, const uint8_t *mask,
163 uint32_t mask_stride, int w, int h, int subx, int suby);
164 typedef libaom_test::FuncParam<F8B> TestFuncs;
165
166 class BlendA64MaskTest8B : public BlendA64MaskTest<F8B, uint8_t, uint8_t> {
167 protected:
Execute(const uint8_t * p_src0,const uint8_t * p_src1,int run_times)168 void Execute(const uint8_t *p_src0, const uint8_t *p_src1,
169 int run_times) override {
170 aom_usec_timer timer;
171 aom_usec_timer_start(&timer);
172 for (int i = 0; i < run_times; ++i) {
173 params_.ref_func(dst_ref_ + dst_offset_, dst_stride_,
174 p_src0 + src0_offset_, src0_stride_,
175 p_src1 + src1_offset_, src1_stride_, mask_,
176 kMaxMaskWidth, w_, h_, subx_, suby_);
177 }
178 aom_usec_timer_mark(&timer);
179 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
180 aom_usec_timer_start(&timer);
181 for (int i = 0; i < run_times; ++i) {
182 params_.tst_func(dst_tst_ + dst_offset_, dst_stride_,
183 p_src0 + src0_offset_, src0_stride_,
184 p_src1 + src1_offset_, src1_stride_, mask_,
185 kMaxMaskWidth, w_, h_, subx_, suby_);
186 }
187 aom_usec_timer_mark(&timer);
188 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
189 if (run_times > 1) {
190 printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_,
191 time1, time2);
192 printf("(%3.2f)\n", time1 / time2);
193 }
194 }
195 };
196 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BlendA64MaskTest8B);
197
TEST_P(BlendA64MaskTest8B,RandomValues)198 TEST_P(BlendA64MaskTest8B, RandomValues) {
199 for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize) {
200 for (int i = 0; i < kBufSize; ++i) {
201 dst_ref_[i] = rng_.Rand8();
202 dst_tst_[i] = rng_.Rand8();
203
204 src0_[i] = rng_.Rand8();
205 src1_[i] = rng_.Rand8();
206 }
207
208 for (int i = 0; i < kMaxMaskSize; ++i)
209 mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
210
211 RunTest(bsize, 1);
212 }
213 }
214
TEST_P(BlendA64MaskTest8B,ExtremeValues)215 TEST_P(BlendA64MaskTest8B, ExtremeValues) {
216 for (int i = 0; i < kBufSize; ++i) {
217 dst_ref_[i] = rng_(2) + 254;
218 dst_tst_[i] = rng_(2) + 254;
219 src0_[i] = rng_(2) + 254;
220 src1_[i] = rng_(2) + 254;
221 }
222
223 for (int i = 0; i < kMaxMaskSize; ++i)
224 mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1;
225
226 for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize)
227 RunTest(bsize, 1);
228 }
229
TEST_P(BlendA64MaskTest8B,DISABLED_Speed)230 TEST_P(BlendA64MaskTest8B, DISABLED_Speed) {
231 const int kRunTimes = 10000000;
232 for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
233 for (int i = 0; i < kBufSize; ++i) {
234 dst_ref_[i] = rng_.Rand8();
235 dst_tst_[i] = rng_.Rand8();
236
237 src0_[i] = rng_.Rand8();
238 src1_[i] = rng_.Rand8();
239 }
240
241 for (int i = 0; i < kMaxMaskSize; ++i)
242 mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
243
244 RunTest(bsize, kRunTimes);
245 }
246 }
247 #if HAVE_SSE4_1
248 INSTANTIATE_TEST_SUITE_P(SSE4_1, BlendA64MaskTest8B,
249 ::testing::Values(TestFuncs(
250 aom_blend_a64_mask_c, aom_blend_a64_mask_sse4_1)));
251 #endif // HAVE_SSE4_1
252
253 #if HAVE_AVX2
254 INSTANTIATE_TEST_SUITE_P(AVX2, BlendA64MaskTest8B,
255 ::testing::Values(TestFuncs(aom_blend_a64_mask_sse4_1,
256 aom_blend_a64_mask_avx2)));
257 #endif // HAVE_AVX2
258
259 #if HAVE_NEON
260 INSTANTIATE_TEST_SUITE_P(NEON, BlendA64MaskTest8B,
261 ::testing::Values(TestFuncs(aom_blend_a64_mask_c,
262 aom_blend_a64_mask_neon)));
263 #endif // HAVE_NEON
264
265 //////////////////////////////////////////////////////////////////////////////
266 // 8 bit _d16 version
267 //////////////////////////////////////////////////////////////////////////////
268
269 typedef void (*F8B_D16)(uint8_t *dst, uint32_t dst_stride, const uint16_t *src0,
270 uint32_t src0_stride, const uint16_t *src1,
271 uint32_t src1_stride, const uint8_t *mask,
272 uint32_t mask_stride, int w, int h, int subx, int suby,
273 ConvolveParams *conv_params);
274 typedef libaom_test::FuncParam<F8B_D16> TestFuncs_d16;
275
276 class BlendA64MaskTest8B_d16
277 : public BlendA64MaskTest<F8B_D16, uint16_t, uint8_t> {
278 protected:
279 // max number of bits used by the source
280 static const int kSrcMaxBitsMask = 0x3fff;
281
Execute(const uint16_t * p_src0,const uint16_t * p_src1,int run_times)282 void Execute(const uint16_t *p_src0, const uint16_t *p_src1,
283 int run_times) override {
284 ConvolveParams conv_params;
285 conv_params.round_0 = ROUND0_BITS;
286 conv_params.round_1 = COMPOUND_ROUND1_BITS;
287 aom_usec_timer timer;
288 aom_usec_timer_start(&timer);
289 for (int i = 0; i < run_times; ++i) {
290 params_.ref_func(dst_ref_ + dst_offset_, dst_stride_,
291 p_src0 + src0_offset_, src0_stride_,
292 p_src1 + src1_offset_, src1_stride_, mask_,
293 kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params);
294 }
295 aom_usec_timer_mark(&timer);
296 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
297 aom_usec_timer_start(&timer);
298 for (int i = 0; i < run_times; ++i) {
299 params_.tst_func(dst_tst_ + dst_offset_, dst_stride_,
300 p_src0 + src0_offset_, src0_stride_,
301 p_src1 + src1_offset_, src1_stride_, mask_,
302 kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params);
303 }
304 aom_usec_timer_mark(&timer);
305 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
306 if (run_times > 1) {
307 printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_,
308 time1, time2);
309 printf("(%3.2f)\n", time1 / time2);
310 }
311 }
312 };
313 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BlendA64MaskTest8B_d16);
314
TEST_P(BlendA64MaskTest8B_d16,RandomValues)315 TEST_P(BlendA64MaskTest8B_d16, RandomValues) {
316 for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize) {
317 for (int i = 0; i < kBufSize; ++i) {
318 dst_ref_[i] = rng_.Rand8();
319 dst_tst_[i] = rng_.Rand8();
320
321 src0_[i] = rng_.Rand16() & kSrcMaxBitsMask;
322 src1_[i] = rng_.Rand16() & kSrcMaxBitsMask;
323 }
324
325 for (int i = 0; i < kMaxMaskSize; ++i)
326 mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
327
328 RunTest(bsize, 1);
329 }
330 }
331
TEST_P(BlendA64MaskTest8B_d16,ExtremeValues)332 TEST_P(BlendA64MaskTest8B_d16, ExtremeValues) {
333 for (int i = 0; i < kBufSize; ++i) {
334 dst_ref_[i] = 255;
335 dst_tst_[i] = 255;
336
337 src0_[i] = kSrcMaxBitsMask;
338 src1_[i] = kSrcMaxBitsMask;
339 }
340
341 for (int i = 0; i < kMaxMaskSize; ++i) mask_[i] = AOM_BLEND_A64_MAX_ALPHA - 1;
342
343 for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize)
344 RunTest(bsize, 1);
345 }
346
TEST_P(BlendA64MaskTest8B_d16,DISABLED_Speed)347 TEST_P(BlendA64MaskTest8B_d16, DISABLED_Speed) {
348 const int kRunTimes = 10000000;
349 for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
350 for (int i = 0; i < kBufSize; ++i) {
351 dst_ref_[i] = rng_.Rand8();
352 dst_tst_[i] = rng_.Rand8();
353
354 src0_[i] = rng_.Rand16() & kSrcMaxBitsMask;
355 src1_[i] = rng_.Rand16() & kSrcMaxBitsMask;
356 }
357
358 for (int i = 0; i < kMaxMaskSize; ++i)
359 mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
360
361 RunTest(bsize, kRunTimes);
362 }
363 }
364
365 #if HAVE_SSE4_1
366 INSTANTIATE_TEST_SUITE_P(
367 SSE4_1, BlendA64MaskTest8B_d16,
368 ::testing::Values(TestFuncs_d16(aom_lowbd_blend_a64_d16_mask_c,
369 aom_lowbd_blend_a64_d16_mask_sse4_1)));
370 #endif // HAVE_SSE4_1
371
372 #if HAVE_AVX2
373 INSTANTIATE_TEST_SUITE_P(
374 AVX2, BlendA64MaskTest8B_d16,
375 ::testing::Values(TestFuncs_d16(aom_lowbd_blend_a64_d16_mask_c,
376 aom_lowbd_blend_a64_d16_mask_avx2)));
377 #endif // HAVE_AVX2
378
379 #if HAVE_NEON
380 INSTANTIATE_TEST_SUITE_P(
381 NEON, BlendA64MaskTest8B_d16,
382 ::testing::Values(TestFuncs_d16(aom_lowbd_blend_a64_d16_mask_c,
383 aom_lowbd_blend_a64_d16_mask_neon)));
384 #endif // HAVE_NEON
385
386 //////////////////////////////////////////////////////////////////////////////
387 // High bit-depth version
388 //////////////////////////////////////////////////////////////////////////////
389 #if CONFIG_AV1_HIGHBITDEPTH
390 typedef void (*FHBD)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
391 uint32_t src0_stride, const uint8_t *src1,
392 uint32_t src1_stride, const uint8_t *mask,
393 uint32_t mask_stride, int w, int h, int subx, int suby,
394 int bd);
395 typedef libaom_test::FuncParam<FHBD> TestFuncsHBD;
396
397 class BlendA64MaskTestHBD : public BlendA64MaskTest<FHBD, uint16_t, uint16_t> {
398 protected:
Execute(const uint16_t * p_src0,const uint16_t * p_src1,int run_times)399 void Execute(const uint16_t *p_src0, const uint16_t *p_src1,
400 int run_times) override {
401 aom_usec_timer timer;
402 aom_usec_timer_start(&timer);
403 for (int i = 0; i < run_times; ++i) {
404 params_.ref_func(CONVERT_TO_BYTEPTR(dst_ref_ + dst_offset_), dst_stride_,
405 CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_,
406 CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_,
407 mask_, kMaxMaskWidth, w_, h_, subx_, suby_, bit_depth_);
408 }
409 aom_usec_timer_mark(&timer);
410 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
411 aom_usec_timer_start(&timer);
412 for (int i = 0; i < run_times; ++i) {
413 params_.tst_func(CONVERT_TO_BYTEPTR(dst_tst_ + dst_offset_), dst_stride_,
414 CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_,
415 CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_,
416 mask_, kMaxMaskWidth, w_, h_, subx_, suby_, bit_depth_);
417 }
418 aom_usec_timer_mark(&timer);
419 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
420 if (run_times > 1) {
421 printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_,
422 time1, time2);
423 printf("(%3.2f)\n", time1 / time2);
424 }
425 }
426
427 int bit_depth_;
428 };
429 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BlendA64MaskTestHBD);
430
TEST_P(BlendA64MaskTestHBD,RandomValues)431 TEST_P(BlendA64MaskTestHBD, RandomValues) {
432 for (bit_depth_ = 8; bit_depth_ <= 12 && !HasFatalFailure();
433 bit_depth_ += 2) {
434 const int hi = 1 << bit_depth_;
435
436 for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
437 for (int i = 0; i < kBufSize; ++i) {
438 dst_ref_[i] = rng_(hi);
439 dst_tst_[i] = rng_(hi);
440 src0_[i] = rng_(hi);
441 src1_[i] = rng_(hi);
442 }
443
444 for (int i = 0; i < kMaxMaskSize; ++i)
445 mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
446
447 RunTest(bsize, 1);
448 }
449 }
450 }
451
TEST_P(BlendA64MaskTestHBD,ExtremeValues)452 TEST_P(BlendA64MaskTestHBD, ExtremeValues) {
453 for (bit_depth_ = 8; bit_depth_ <= 12 && !HasFatalFailure();
454 bit_depth_ += 2) {
455 const int hi = 1 << bit_depth_;
456 const int lo = hi - 2;
457
458 for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure();
459 ++bsize) {
460 for (int i = 0; i < kBufSize; ++i) {
461 dst_ref_[i] = rng_(hi - lo) + lo;
462 dst_tst_[i] = rng_(hi - lo) + lo;
463 src0_[i] = rng_(hi - lo) + lo;
464 src1_[i] = rng_(hi - lo) + lo;
465 }
466
467 for (int i = 0; i < kMaxMaskSize; ++i)
468 mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1;
469
470 RunTest(bsize, 1);
471 }
472 }
473 }
474
475 #if HAVE_SSE4_1
476 INSTANTIATE_TEST_SUITE_P(
477 SSE4_1, BlendA64MaskTestHBD,
478 ::testing::Values(TestFuncsHBD(aom_highbd_blend_a64_mask_c,
479 aom_highbd_blend_a64_mask_sse4_1)));
480 #endif // HAVE_SSE4_1
481
482 #if HAVE_NEON
483 INSTANTIATE_TEST_SUITE_P(
484 NEON, BlendA64MaskTestHBD,
485 ::testing::Values(TestFuncsHBD(aom_highbd_blend_a64_mask_c,
486 aom_highbd_blend_a64_mask_neon)));
487 #endif // HAVE_NEON
488
489 //////////////////////////////////////////////////////////////////////////////
490 // HBD _d16 version
491 //////////////////////////////////////////////////////////////////////////////
492
493 typedef void (*FHBD_D16)(uint8_t *dst, uint32_t dst_stride,
494 const CONV_BUF_TYPE *src0, uint32_t src0_stride,
495 const CONV_BUF_TYPE *src1, uint32_t src1_stride,
496 const uint8_t *mask, uint32_t mask_stride, int w,
497 int h, int subx, int suby, ConvolveParams *conv_params,
498 const int bd);
499 typedef libaom_test::FuncParam<FHBD_D16> TestFuncsHBD_d16;
500
501 class BlendA64MaskTestHBD_d16
502 : public BlendA64MaskTest<FHBD_D16, uint16_t, uint16_t> {
503 protected:
504 // max number of bits used by the source
505 static const int kSrcMaxBitsMask = (1 << 14) - 1;
506 static const int kSrcMaxBitsMaskHBD = (1 << 16) - 1;
507
Execute(const uint16_t * p_src0,const uint16_t * p_src1,int run_times)508 void Execute(const uint16_t *p_src0, const uint16_t *p_src1,
509 int run_times) override {
510 ASSERT_GT(run_times, 0) << "Cannot run 0 iterations of the test.";
511 ConvolveParams conv_params;
512 conv_params.round_0 = (bit_depth_ == 12) ? ROUND0_BITS + 2 : ROUND0_BITS;
513 conv_params.round_1 = COMPOUND_ROUND1_BITS;
514 aom_usec_timer timer;
515 aom_usec_timer_start(&timer);
516 for (int i = 0; i < run_times; ++i) {
517 params_.ref_func(CONVERT_TO_BYTEPTR(dst_ref_ + dst_offset_), dst_stride_,
518 p_src0 + src0_offset_, src0_stride_,
519 p_src1 + src1_offset_, src1_stride_, mask_,
520 kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params,
521 bit_depth_);
522 }
523 if (params_.tst_func) {
524 aom_usec_timer_mark(&timer);
525 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
526 aom_usec_timer_start(&timer);
527 for (int i = 0; i < run_times; ++i) {
528 params_.tst_func(CONVERT_TO_BYTEPTR(dst_tst_ + dst_offset_),
529 dst_stride_, p_src0 + src0_offset_, src0_stride_,
530 p_src1 + src1_offset_, src1_stride_, mask_,
531 kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params,
532 bit_depth_);
533 }
534 aom_usec_timer_mark(&timer);
535 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
536 if (run_times > 1) {
537 printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_,
538 time1, time2);
539 printf("(%3.2f)\n", time1 / time2);
540 }
541 }
542 }
543
544 int bit_depth_;
545 int src_max_bits_mask_;
546 };
547
TEST_P(BlendA64MaskTestHBD_d16,RandomValues)548 TEST_P(BlendA64MaskTestHBD_d16, RandomValues) {
549 if (params_.tst_func == nullptr) return;
550 for (bit_depth_ = 8; bit_depth_ <= 12 && !HasFatalFailure();
551 bit_depth_ += 2) {
552 src_max_bits_mask_ =
553 (bit_depth_ == 8) ? kSrcMaxBitsMask : kSrcMaxBitsMaskHBD;
554
555 for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure();
556 ++bsize) {
557 for (int i = 0; i < kBufSize; ++i) {
558 dst_ref_[i] = rng_.Rand8();
559 dst_tst_[i] = rng_.Rand8();
560
561 src0_[i] = rng_.Rand16() & src_max_bits_mask_;
562 src1_[i] = rng_.Rand16() & src_max_bits_mask_;
563 }
564
565 for (int i = 0; i < kMaxMaskSize; ++i)
566 mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
567
568 RunTest(bsize, 1);
569 }
570 }
571 }
572
TEST_P(BlendA64MaskTestHBD_d16,ExtremeValues)573 TEST_P(BlendA64MaskTestHBD_d16, ExtremeValues) {
574 for (bit_depth_ = 8; bit_depth_ <= 12; bit_depth_ += 2) {
575 src_max_bits_mask_ =
576 (bit_depth_ == 8) ? kSrcMaxBitsMask : kSrcMaxBitsMaskHBD;
577
578 for (int i = 0; i < kBufSize; ++i) {
579 dst_ref_[i] = 0;
580 dst_tst_[i] = (1 << bit_depth_) - 1;
581
582 src0_[i] = src_max_bits_mask_;
583 src1_[i] = src_max_bits_mask_;
584 }
585
586 for (int i = 0; i < kMaxMaskSize; ++i) mask_[i] = AOM_BLEND_A64_MAX_ALPHA;
587 for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
588 RunTest(bsize, 1);
589 }
590 }
591 }
592
TEST_P(BlendA64MaskTestHBD_d16,DISABLED_Speed)593 TEST_P(BlendA64MaskTestHBD_d16, DISABLED_Speed) {
594 const int kRunTimes = 10000000;
595 for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
596 for (bit_depth_ = 8; bit_depth_ <= 12; bit_depth_ += 2) {
597 for (int i = 0; i < kBufSize; ++i) {
598 dst_ref_[i] = rng_.Rand12() % (1 << bit_depth_);
599 dst_tst_[i] = rng_.Rand12() % (1 << bit_depth_);
600
601 src0_[i] = rng_.Rand16();
602 src1_[i] = rng_.Rand16();
603 }
604
605 for (int i = 0; i < kMaxMaskSize; ++i)
606 mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
607
608 RunTest(bsize, kRunTimes);
609 }
610 }
611 }
612
613 INSTANTIATE_TEST_SUITE_P(
614 C, BlendA64MaskTestHBD_d16,
615 ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c,
616 aom_highbd_blend_a64_d16_mask_c)));
617
618 #if HAVE_SSE4_1
619 INSTANTIATE_TEST_SUITE_P(
620 SSE4_1, BlendA64MaskTestHBD_d16,
621 ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c,
622 aom_highbd_blend_a64_d16_mask_sse4_1)));
623 #endif // HAVE_SSE4_1
624
625 #if HAVE_AVX2
626 INSTANTIATE_TEST_SUITE_P(
627 AVX2, BlendA64MaskTestHBD_d16,
628 ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c,
629 aom_highbd_blend_a64_d16_mask_avx2)));
630 #endif // HAVE_AVX2
631
632 #if HAVE_NEON
633 INSTANTIATE_TEST_SUITE_P(
634 NEON, BlendA64MaskTestHBD_d16,
635 ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c,
636 aom_highbd_blend_a64_d16_mask_neon)));
637 #endif // HAVE_NEON
638
639 // TODO(slavarnway): Enable the following in the avx2 commit. (56501)
640 #if 0
641 #if HAVE_AVX2
642 INSTANTIATE_TEST_SUITE_P(
643 SSE4_1, BlendA64MaskTestHBD,
644 ::testing::Values(TestFuncsHBD(aom_highbd_blend_a64_mask_c,
645 aom_highbd_blend_a64_mask_avx2)));
646 #endif // HAVE_AVX2
647 #endif
648 #endif // CONFIG_AV1_HIGHBITDEPTH
649 } // namespace
650