xref: /aosp_15_r20/external/libaom/test/blend_a64_mask_test.cc (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <math.h>
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #include "gtest/gtest.h"
17 #include "test/register_state_check.h"
18 #include "test/function_equivalence_test.h"
19 
20 #include "config/aom_config.h"
21 #include "config/aom_dsp_rtcd.h"
22 #include "config/av1_rtcd.h"
23 
24 #include "aom/aom_integer.h"
25 
26 #include "av1/common/enums.h"
27 
28 #include "aom_dsp/blend.h"
29 
30 using libaom_test::FunctionEquivalenceTest;
31 
32 namespace {
33 
34 template <typename BlendA64Func, typename SrcPixel, typename DstPixel>
35 class BlendA64MaskTest : public FunctionEquivalenceTest<BlendA64Func> {
36  protected:
37   static const int kIterations = 10000;
38   static const int kMaxWidth = MAX_SB_SIZE * 5;  // * 5 to cover longer strides
39   static const int kMaxHeight = MAX_SB_SIZE;
40   static const int kBufSize = kMaxWidth * kMaxHeight;
41   static const int kMaxMaskWidth = 2 * MAX_SB_SIZE;
42   static const int kMaxMaskSize = kMaxMaskWidth * kMaxMaskWidth;
43 
44   ~BlendA64MaskTest() override = default;
45 
46   virtual void Execute(const SrcPixel *p_src0, const SrcPixel *p_src1,
47                        int run_times) = 0;
48 
49   template <typename Pixel>
GetSources(Pixel ** src0,Pixel ** src1,Pixel *,int run_times)50   void GetSources(Pixel **src0, Pixel **src1, Pixel * /*dst*/, int run_times) {
51     if (run_times > 1) {
52       *src0 = src0_;
53       *src1 = src1_;
54       return;
55     }
56     switch (this->rng_(3)) {
57       case 0:  // Separate sources
58         *src0 = src0_;
59         *src1 = src1_;
60         break;
61       case 1:  // src0 == dst
62         *src0 = dst_tst_;
63         src0_stride_ = dst_stride_;
64         src0_offset_ = dst_offset_;
65         *src1 = src1_;
66         break;
67       case 2:  // src1 == dst
68         *src0 = src0_;
69         *src1 = dst_tst_;
70         src1_stride_ = dst_stride_;
71         src1_offset_ = dst_offset_;
72         break;
73       default: FAIL();
74     }
75   }
76 
GetSources(uint16_t ** src0,uint16_t ** src1,uint8_t *,int)77   void GetSources(uint16_t **src0, uint16_t **src1, uint8_t * /*dst*/,
78                   int /*run_times*/) {
79     *src0 = src0_;
80     *src1 = src1_;
81   }
82 
Rand1()83   uint8_t Rand1() { return this->rng_.Rand8() & 1; }
84 
RunOneTest(int block_size,int subx,int suby,int run_times)85   void RunOneTest(int block_size, int subx, int suby, int run_times) {
86     w_ = block_size_wide[block_size];
87     h_ = block_size_high[block_size];
88     run_times = run_times > 1 ? run_times / w_ : 1;
89     ASSERT_GT(run_times, 0);
90     subx_ = subx;
91     suby_ = suby;
92 
93     dst_offset_ = this->rng_(33);
94     dst_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
95 
96     src0_offset_ = this->rng_(33);
97     src0_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
98 
99     src1_offset_ = this->rng_(33);
100     src1_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
101 
102     mask_stride_ =
103         this->rng_(kMaxWidth + 1 - w_ * (subx_ ? 2 : 1)) + w_ * (subx_ ? 2 : 1);
104 
105     SrcPixel *p_src0;
106     SrcPixel *p_src1;
107 
108     p_src0 = src0_;
109     p_src1 = src1_;
110 
111     GetSources(&p_src0, &p_src1, &dst_ref_[0], run_times);
112 
113     Execute(p_src0, p_src1, run_times);
114 
115     for (int r = 0; r < h_; ++r) {
116       for (int c = 0; c < w_; ++c) {
117         ASSERT_EQ(dst_ref_[dst_offset_ + r * dst_stride_ + c],
118                   dst_tst_[dst_offset_ + r * dst_stride_ + c])
119             << w_ << "x" << h_ << " subx " << subx_ << " suby " << suby_
120             << " r: " << r << " c: " << c;
121       }
122     }
123   }
124 
RunTest(int block_size,int run_times)125   void RunTest(int block_size, int run_times) {
126     for (subx_ = 0; subx_ <= 1; subx_++) {
127       for (suby_ = 0; suby_ <= 1; suby_++) {
128         RunOneTest(block_size, subx_, suby_, run_times);
129       }
130     }
131   }
132 
133   DstPixel dst_ref_[kBufSize];
134   DstPixel dst_tst_[kBufSize];
135   uint32_t dst_stride_;
136   uint32_t dst_offset_;
137 
138   SrcPixel src0_[kBufSize];
139   uint32_t src0_stride_;
140   uint32_t src0_offset_;
141 
142   SrcPixel src1_[kBufSize];
143   uint32_t src1_stride_;
144   uint32_t src1_offset_;
145 
146   uint8_t mask_[kMaxMaskSize];
147   size_t mask_stride_;
148 
149   int w_;
150   int h_;
151 
152   int suby_;
153   int subx_;
154 };
155 
156 //////////////////////////////////////////////////////////////////////////////
157 // 8 bit version
158 //////////////////////////////////////////////////////////////////////////////
159 
160 typedef void (*F8B)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
161                     uint32_t src0_stride, const uint8_t *src1,
162                     uint32_t src1_stride, const uint8_t *mask,
163                     uint32_t mask_stride, int w, int h, int subx, int suby);
164 typedef libaom_test::FuncParam<F8B> TestFuncs;
165 
166 class BlendA64MaskTest8B : public BlendA64MaskTest<F8B, uint8_t, uint8_t> {
167  protected:
Execute(const uint8_t * p_src0,const uint8_t * p_src1,int run_times)168   void Execute(const uint8_t *p_src0, const uint8_t *p_src1,
169                int run_times) override {
170     aom_usec_timer timer;
171     aom_usec_timer_start(&timer);
172     for (int i = 0; i < run_times; ++i) {
173       params_.ref_func(dst_ref_ + dst_offset_, dst_stride_,
174                        p_src0 + src0_offset_, src0_stride_,
175                        p_src1 + src1_offset_, src1_stride_, mask_,
176                        kMaxMaskWidth, w_, h_, subx_, suby_);
177     }
178     aom_usec_timer_mark(&timer);
179     const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
180     aom_usec_timer_start(&timer);
181     for (int i = 0; i < run_times; ++i) {
182       params_.tst_func(dst_tst_ + dst_offset_, dst_stride_,
183                        p_src0 + src0_offset_, src0_stride_,
184                        p_src1 + src1_offset_, src1_stride_, mask_,
185                        kMaxMaskWidth, w_, h_, subx_, suby_);
186     }
187     aom_usec_timer_mark(&timer);
188     const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
189     if (run_times > 1) {
190       printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_,
191              time1, time2);
192       printf("(%3.2f)\n", time1 / time2);
193     }
194   }
195 };
196 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BlendA64MaskTest8B);
197 
TEST_P(BlendA64MaskTest8B,RandomValues)198 TEST_P(BlendA64MaskTest8B, RandomValues) {
199   for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize) {
200     for (int i = 0; i < kBufSize; ++i) {
201       dst_ref_[i] = rng_.Rand8();
202       dst_tst_[i] = rng_.Rand8();
203 
204       src0_[i] = rng_.Rand8();
205       src1_[i] = rng_.Rand8();
206     }
207 
208     for (int i = 0; i < kMaxMaskSize; ++i)
209       mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
210 
211     RunTest(bsize, 1);
212   }
213 }
214 
TEST_P(BlendA64MaskTest8B,ExtremeValues)215 TEST_P(BlendA64MaskTest8B, ExtremeValues) {
216   for (int i = 0; i < kBufSize; ++i) {
217     dst_ref_[i] = rng_(2) + 254;
218     dst_tst_[i] = rng_(2) + 254;
219     src0_[i] = rng_(2) + 254;
220     src1_[i] = rng_(2) + 254;
221   }
222 
223   for (int i = 0; i < kMaxMaskSize; ++i)
224     mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1;
225 
226   for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize)
227     RunTest(bsize, 1);
228 }
229 
TEST_P(BlendA64MaskTest8B,DISABLED_Speed)230 TEST_P(BlendA64MaskTest8B, DISABLED_Speed) {
231   const int kRunTimes = 10000000;
232   for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
233     for (int i = 0; i < kBufSize; ++i) {
234       dst_ref_[i] = rng_.Rand8();
235       dst_tst_[i] = rng_.Rand8();
236 
237       src0_[i] = rng_.Rand8();
238       src1_[i] = rng_.Rand8();
239     }
240 
241     for (int i = 0; i < kMaxMaskSize; ++i)
242       mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
243 
244     RunTest(bsize, kRunTimes);
245   }
246 }
247 #if HAVE_SSE4_1
248 INSTANTIATE_TEST_SUITE_P(SSE4_1, BlendA64MaskTest8B,
249                          ::testing::Values(TestFuncs(
250                              aom_blend_a64_mask_c, aom_blend_a64_mask_sse4_1)));
251 #endif  // HAVE_SSE4_1
252 
253 #if HAVE_AVX2
254 INSTANTIATE_TEST_SUITE_P(AVX2, BlendA64MaskTest8B,
255                          ::testing::Values(TestFuncs(aom_blend_a64_mask_sse4_1,
256                                                      aom_blend_a64_mask_avx2)));
257 #endif  // HAVE_AVX2
258 
259 #if HAVE_NEON
260 INSTANTIATE_TEST_SUITE_P(NEON, BlendA64MaskTest8B,
261                          ::testing::Values(TestFuncs(aom_blend_a64_mask_c,
262                                                      aom_blend_a64_mask_neon)));
263 #endif  // HAVE_NEON
264 
265 //////////////////////////////////////////////////////////////////////////////
266 // 8 bit _d16 version
267 //////////////////////////////////////////////////////////////////////////////
268 
269 typedef void (*F8B_D16)(uint8_t *dst, uint32_t dst_stride, const uint16_t *src0,
270                         uint32_t src0_stride, const uint16_t *src1,
271                         uint32_t src1_stride, const uint8_t *mask,
272                         uint32_t mask_stride, int w, int h, int subx, int suby,
273                         ConvolveParams *conv_params);
274 typedef libaom_test::FuncParam<F8B_D16> TestFuncs_d16;
275 
276 class BlendA64MaskTest8B_d16
277     : public BlendA64MaskTest<F8B_D16, uint16_t, uint8_t> {
278  protected:
279   // max number of bits used by the source
280   static const int kSrcMaxBitsMask = 0x3fff;
281 
Execute(const uint16_t * p_src0,const uint16_t * p_src1,int run_times)282   void Execute(const uint16_t *p_src0, const uint16_t *p_src1,
283                int run_times) override {
284     ConvolveParams conv_params;
285     conv_params.round_0 = ROUND0_BITS;
286     conv_params.round_1 = COMPOUND_ROUND1_BITS;
287     aom_usec_timer timer;
288     aom_usec_timer_start(&timer);
289     for (int i = 0; i < run_times; ++i) {
290       params_.ref_func(dst_ref_ + dst_offset_, dst_stride_,
291                        p_src0 + src0_offset_, src0_stride_,
292                        p_src1 + src1_offset_, src1_stride_, mask_,
293                        kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params);
294     }
295     aom_usec_timer_mark(&timer);
296     const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
297     aom_usec_timer_start(&timer);
298     for (int i = 0; i < run_times; ++i) {
299       params_.tst_func(dst_tst_ + dst_offset_, dst_stride_,
300                        p_src0 + src0_offset_, src0_stride_,
301                        p_src1 + src1_offset_, src1_stride_, mask_,
302                        kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params);
303     }
304     aom_usec_timer_mark(&timer);
305     const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
306     if (run_times > 1) {
307       printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_,
308              time1, time2);
309       printf("(%3.2f)\n", time1 / time2);
310     }
311   }
312 };
313 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BlendA64MaskTest8B_d16);
314 
TEST_P(BlendA64MaskTest8B_d16,RandomValues)315 TEST_P(BlendA64MaskTest8B_d16, RandomValues) {
316   for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize) {
317     for (int i = 0; i < kBufSize; ++i) {
318       dst_ref_[i] = rng_.Rand8();
319       dst_tst_[i] = rng_.Rand8();
320 
321       src0_[i] = rng_.Rand16() & kSrcMaxBitsMask;
322       src1_[i] = rng_.Rand16() & kSrcMaxBitsMask;
323     }
324 
325     for (int i = 0; i < kMaxMaskSize; ++i)
326       mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
327 
328     RunTest(bsize, 1);
329   }
330 }
331 
TEST_P(BlendA64MaskTest8B_d16,ExtremeValues)332 TEST_P(BlendA64MaskTest8B_d16, ExtremeValues) {
333   for (int i = 0; i < kBufSize; ++i) {
334     dst_ref_[i] = 255;
335     dst_tst_[i] = 255;
336 
337     src0_[i] = kSrcMaxBitsMask;
338     src1_[i] = kSrcMaxBitsMask;
339   }
340 
341   for (int i = 0; i < kMaxMaskSize; ++i) mask_[i] = AOM_BLEND_A64_MAX_ALPHA - 1;
342 
343   for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize)
344     RunTest(bsize, 1);
345 }
346 
TEST_P(BlendA64MaskTest8B_d16,DISABLED_Speed)347 TEST_P(BlendA64MaskTest8B_d16, DISABLED_Speed) {
348   const int kRunTimes = 10000000;
349   for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
350     for (int i = 0; i < kBufSize; ++i) {
351       dst_ref_[i] = rng_.Rand8();
352       dst_tst_[i] = rng_.Rand8();
353 
354       src0_[i] = rng_.Rand16() & kSrcMaxBitsMask;
355       src1_[i] = rng_.Rand16() & kSrcMaxBitsMask;
356     }
357 
358     for (int i = 0; i < kMaxMaskSize; ++i)
359       mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
360 
361     RunTest(bsize, kRunTimes);
362   }
363 }
364 
365 #if HAVE_SSE4_1
366 INSTANTIATE_TEST_SUITE_P(
367     SSE4_1, BlendA64MaskTest8B_d16,
368     ::testing::Values(TestFuncs_d16(aom_lowbd_blend_a64_d16_mask_c,
369                                     aom_lowbd_blend_a64_d16_mask_sse4_1)));
370 #endif  // HAVE_SSE4_1
371 
372 #if HAVE_AVX2
373 INSTANTIATE_TEST_SUITE_P(
374     AVX2, BlendA64MaskTest8B_d16,
375     ::testing::Values(TestFuncs_d16(aom_lowbd_blend_a64_d16_mask_c,
376                                     aom_lowbd_blend_a64_d16_mask_avx2)));
377 #endif  // HAVE_AVX2
378 
379 #if HAVE_NEON
380 INSTANTIATE_TEST_SUITE_P(
381     NEON, BlendA64MaskTest8B_d16,
382     ::testing::Values(TestFuncs_d16(aom_lowbd_blend_a64_d16_mask_c,
383                                     aom_lowbd_blend_a64_d16_mask_neon)));
384 #endif  // HAVE_NEON
385 
386 //////////////////////////////////////////////////////////////////////////////
387 // High bit-depth version
388 //////////////////////////////////////////////////////////////////////////////
389 #if CONFIG_AV1_HIGHBITDEPTH
390 typedef void (*FHBD)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
391                      uint32_t src0_stride, const uint8_t *src1,
392                      uint32_t src1_stride, const uint8_t *mask,
393                      uint32_t mask_stride, int w, int h, int subx, int suby,
394                      int bd);
395 typedef libaom_test::FuncParam<FHBD> TestFuncsHBD;
396 
397 class BlendA64MaskTestHBD : public BlendA64MaskTest<FHBD, uint16_t, uint16_t> {
398  protected:
Execute(const uint16_t * p_src0,const uint16_t * p_src1,int run_times)399   void Execute(const uint16_t *p_src0, const uint16_t *p_src1,
400                int run_times) override {
401     aom_usec_timer timer;
402     aom_usec_timer_start(&timer);
403     for (int i = 0; i < run_times; ++i) {
404       params_.ref_func(CONVERT_TO_BYTEPTR(dst_ref_ + dst_offset_), dst_stride_,
405                        CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_,
406                        CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_,
407                        mask_, kMaxMaskWidth, w_, h_, subx_, suby_, bit_depth_);
408     }
409     aom_usec_timer_mark(&timer);
410     const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
411     aom_usec_timer_start(&timer);
412     for (int i = 0; i < run_times; ++i) {
413       params_.tst_func(CONVERT_TO_BYTEPTR(dst_tst_ + dst_offset_), dst_stride_,
414                        CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_,
415                        CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_,
416                        mask_, kMaxMaskWidth, w_, h_, subx_, suby_, bit_depth_);
417     }
418     aom_usec_timer_mark(&timer);
419     const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
420     if (run_times > 1) {
421       printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_,
422              time1, time2);
423       printf("(%3.2f)\n", time1 / time2);
424     }
425   }
426 
427   int bit_depth_;
428 };
429 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BlendA64MaskTestHBD);
430 
TEST_P(BlendA64MaskTestHBD,RandomValues)431 TEST_P(BlendA64MaskTestHBD, RandomValues) {
432   for (bit_depth_ = 8; bit_depth_ <= 12 && !HasFatalFailure();
433        bit_depth_ += 2) {
434     const int hi = 1 << bit_depth_;
435 
436     for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
437       for (int i = 0; i < kBufSize; ++i) {
438         dst_ref_[i] = rng_(hi);
439         dst_tst_[i] = rng_(hi);
440         src0_[i] = rng_(hi);
441         src1_[i] = rng_(hi);
442       }
443 
444       for (int i = 0; i < kMaxMaskSize; ++i)
445         mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
446 
447       RunTest(bsize, 1);
448     }
449   }
450 }
451 
TEST_P(BlendA64MaskTestHBD,ExtremeValues)452 TEST_P(BlendA64MaskTestHBD, ExtremeValues) {
453   for (bit_depth_ = 8; bit_depth_ <= 12 && !HasFatalFailure();
454        bit_depth_ += 2) {
455     const int hi = 1 << bit_depth_;
456     const int lo = hi - 2;
457 
458     for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure();
459          ++bsize) {
460       for (int i = 0; i < kBufSize; ++i) {
461         dst_ref_[i] = rng_(hi - lo) + lo;
462         dst_tst_[i] = rng_(hi - lo) + lo;
463         src0_[i] = rng_(hi - lo) + lo;
464         src1_[i] = rng_(hi - lo) + lo;
465       }
466 
467       for (int i = 0; i < kMaxMaskSize; ++i)
468         mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1;
469 
470       RunTest(bsize, 1);
471     }
472   }
473 }
474 
475 #if HAVE_SSE4_1
476 INSTANTIATE_TEST_SUITE_P(
477     SSE4_1, BlendA64MaskTestHBD,
478     ::testing::Values(TestFuncsHBD(aom_highbd_blend_a64_mask_c,
479                                    aom_highbd_blend_a64_mask_sse4_1)));
480 #endif  // HAVE_SSE4_1
481 
482 #if HAVE_NEON
483 INSTANTIATE_TEST_SUITE_P(
484     NEON, BlendA64MaskTestHBD,
485     ::testing::Values(TestFuncsHBD(aom_highbd_blend_a64_mask_c,
486                                    aom_highbd_blend_a64_mask_neon)));
487 #endif  // HAVE_NEON
488 
489 //////////////////////////////////////////////////////////////////////////////
490 // HBD _d16 version
491 //////////////////////////////////////////////////////////////////////////////
492 
493 typedef void (*FHBD_D16)(uint8_t *dst, uint32_t dst_stride,
494                          const CONV_BUF_TYPE *src0, uint32_t src0_stride,
495                          const CONV_BUF_TYPE *src1, uint32_t src1_stride,
496                          const uint8_t *mask, uint32_t mask_stride, int w,
497                          int h, int subx, int suby, ConvolveParams *conv_params,
498                          const int bd);
499 typedef libaom_test::FuncParam<FHBD_D16> TestFuncsHBD_d16;
500 
501 class BlendA64MaskTestHBD_d16
502     : public BlendA64MaskTest<FHBD_D16, uint16_t, uint16_t> {
503  protected:
504   // max number of bits used by the source
505   static const int kSrcMaxBitsMask = (1 << 14) - 1;
506   static const int kSrcMaxBitsMaskHBD = (1 << 16) - 1;
507 
Execute(const uint16_t * p_src0,const uint16_t * p_src1,int run_times)508   void Execute(const uint16_t *p_src0, const uint16_t *p_src1,
509                int run_times) override {
510     ASSERT_GT(run_times, 0) << "Cannot run 0 iterations of the test.";
511     ConvolveParams conv_params;
512     conv_params.round_0 = (bit_depth_ == 12) ? ROUND0_BITS + 2 : ROUND0_BITS;
513     conv_params.round_1 = COMPOUND_ROUND1_BITS;
514     aom_usec_timer timer;
515     aom_usec_timer_start(&timer);
516     for (int i = 0; i < run_times; ++i) {
517       params_.ref_func(CONVERT_TO_BYTEPTR(dst_ref_ + dst_offset_), dst_stride_,
518                        p_src0 + src0_offset_, src0_stride_,
519                        p_src1 + src1_offset_, src1_stride_, mask_,
520                        kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params,
521                        bit_depth_);
522     }
523     if (params_.tst_func) {
524       aom_usec_timer_mark(&timer);
525       const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
526       aom_usec_timer_start(&timer);
527       for (int i = 0; i < run_times; ++i) {
528         params_.tst_func(CONVERT_TO_BYTEPTR(dst_tst_ + dst_offset_),
529                          dst_stride_, p_src0 + src0_offset_, src0_stride_,
530                          p_src1 + src1_offset_, src1_stride_, mask_,
531                          kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params,
532                          bit_depth_);
533       }
534       aom_usec_timer_mark(&timer);
535       const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
536       if (run_times > 1) {
537         printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_,
538                time1, time2);
539         printf("(%3.2f)\n", time1 / time2);
540       }
541     }
542   }
543 
544   int bit_depth_;
545   int src_max_bits_mask_;
546 };
547 
TEST_P(BlendA64MaskTestHBD_d16,RandomValues)548 TEST_P(BlendA64MaskTestHBD_d16, RandomValues) {
549   if (params_.tst_func == nullptr) return;
550   for (bit_depth_ = 8; bit_depth_ <= 12 && !HasFatalFailure();
551        bit_depth_ += 2) {
552     src_max_bits_mask_ =
553         (bit_depth_ == 8) ? kSrcMaxBitsMask : kSrcMaxBitsMaskHBD;
554 
555     for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure();
556          ++bsize) {
557       for (int i = 0; i < kBufSize; ++i) {
558         dst_ref_[i] = rng_.Rand8();
559         dst_tst_[i] = rng_.Rand8();
560 
561         src0_[i] = rng_.Rand16() & src_max_bits_mask_;
562         src1_[i] = rng_.Rand16() & src_max_bits_mask_;
563       }
564 
565       for (int i = 0; i < kMaxMaskSize; ++i)
566         mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
567 
568       RunTest(bsize, 1);
569     }
570   }
571 }
572 
TEST_P(BlendA64MaskTestHBD_d16,ExtremeValues)573 TEST_P(BlendA64MaskTestHBD_d16, ExtremeValues) {
574   for (bit_depth_ = 8; bit_depth_ <= 12; bit_depth_ += 2) {
575     src_max_bits_mask_ =
576         (bit_depth_ == 8) ? kSrcMaxBitsMask : kSrcMaxBitsMaskHBD;
577 
578     for (int i = 0; i < kBufSize; ++i) {
579       dst_ref_[i] = 0;
580       dst_tst_[i] = (1 << bit_depth_) - 1;
581 
582       src0_[i] = src_max_bits_mask_;
583       src1_[i] = src_max_bits_mask_;
584     }
585 
586     for (int i = 0; i < kMaxMaskSize; ++i) mask_[i] = AOM_BLEND_A64_MAX_ALPHA;
587     for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
588       RunTest(bsize, 1);
589     }
590   }
591 }
592 
TEST_P(BlendA64MaskTestHBD_d16,DISABLED_Speed)593 TEST_P(BlendA64MaskTestHBD_d16, DISABLED_Speed) {
594   const int kRunTimes = 10000000;
595   for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
596     for (bit_depth_ = 8; bit_depth_ <= 12; bit_depth_ += 2) {
597       for (int i = 0; i < kBufSize; ++i) {
598         dst_ref_[i] = rng_.Rand12() % (1 << bit_depth_);
599         dst_tst_[i] = rng_.Rand12() % (1 << bit_depth_);
600 
601         src0_[i] = rng_.Rand16();
602         src1_[i] = rng_.Rand16();
603       }
604 
605       for (int i = 0; i < kMaxMaskSize; ++i)
606         mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
607 
608       RunTest(bsize, kRunTimes);
609     }
610   }
611 }
612 
613 INSTANTIATE_TEST_SUITE_P(
614     C, BlendA64MaskTestHBD_d16,
615     ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c,
616                                        aom_highbd_blend_a64_d16_mask_c)));
617 
618 #if HAVE_SSE4_1
619 INSTANTIATE_TEST_SUITE_P(
620     SSE4_1, BlendA64MaskTestHBD_d16,
621     ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c,
622                                        aom_highbd_blend_a64_d16_mask_sse4_1)));
623 #endif  // HAVE_SSE4_1
624 
625 #if HAVE_AVX2
626 INSTANTIATE_TEST_SUITE_P(
627     AVX2, BlendA64MaskTestHBD_d16,
628     ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c,
629                                        aom_highbd_blend_a64_d16_mask_avx2)));
630 #endif  // HAVE_AVX2
631 
632 #if HAVE_NEON
633 INSTANTIATE_TEST_SUITE_P(
634     NEON, BlendA64MaskTestHBD_d16,
635     ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c,
636                                        aom_highbd_blend_a64_d16_mask_neon)));
637 #endif  // HAVE_NEON
638 
639 // TODO(slavarnway): Enable the following in the avx2 commit. (56501)
640 #if 0
641 #if HAVE_AVX2
642 INSTANTIATE_TEST_SUITE_P(
643     SSE4_1, BlendA64MaskTestHBD,
644     ::testing::Values(TestFuncsHBD(aom_highbd_blend_a64_mask_c,
645                                    aom_highbd_blend_a64_mask_avx2)));
646 #endif  // HAVE_AVX2
647 #endif
648 #endif  // CONFIG_AV1_HIGHBITDEPTH
649 }  // namespace
650