1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <cstdlib>
13 #include <new>
14 #include <ostream>
15 #include <tuple>
16
17 #include "gtest/gtest.h"
18
19 #include "config/aom_config.h"
20 #include "config/aom_dsp_rtcd.h"
21
22 #include "test/acm_random.h"
23 #include "test/register_state_check.h"
24 #include "aom/aom_codec.h"
25 #include "aom/aom_integer.h"
26 #include "aom_mem/aom_mem.h"
27 #include "aom_ports/aom_timer.h"
28 #include "aom_ports/mem.h"
29 #include "av1/common/cdef_block.h"
30
31 namespace {
32
33 typedef uint64_t (*MseWxH16bitFunc)(uint8_t *dst, int dstride, uint16_t *src,
34 int sstride, int w, int h);
35 typedef uint64_t (*Mse16xH16bitFunc)(uint8_t *dst, int dstride, uint16_t *src,
36 int w, int h);
37 typedef unsigned int (*VarianceMxNFunc)(const uint8_t *a, int a_stride,
38 const uint8_t *b, int b_stride,
39 unsigned int *sse);
40 typedef void (*GetSseSum8x8QuadFunc)(const uint8_t *a, int a_stride,
41 const uint8_t *b, int b_stride,
42 uint32_t *sse8x8, int *sum8x8,
43 unsigned int *tot_sse, int *tot_sum,
44 uint32_t *var8x8);
45 typedef void (*GetSseSum16x16DualFunc)(const uint8_t *a, int a_stride,
46 const uint8_t *b, int b_stride,
47 uint32_t *sse16x16,
48 unsigned int *tot_sse, int *tot_sum,
49 uint32_t *var16x16);
50 typedef unsigned int (*SubpixVarMxNFunc)(const uint8_t *a, int a_stride,
51 int xoffset, int yoffset,
52 const uint8_t *b, int b_stride,
53 unsigned int *sse);
54 typedef unsigned int (*SubpixAvgVarMxNFunc)(const uint8_t *a, int a_stride,
55 int xoffset, int yoffset,
56 const uint8_t *b, int b_stride,
57 uint32_t *sse,
58 const uint8_t *second_pred);
59 typedef unsigned int (*SumOfSquaresFunction)(const int16_t *src);
60 typedef unsigned int (*DistWtdSubpixAvgVarMxNFunc)(
61 const uint8_t *a, int a_stride, int xoffset, int yoffset, const uint8_t *b,
62 int b_stride, uint32_t *sse, const uint8_t *second_pred,
63 const DIST_WTD_COMP_PARAMS *jcp_param);
64
65 #if !CONFIG_REALTIME_ONLY
66 typedef uint32_t (*ObmcSubpelVarFunc)(const uint8_t *pre, int pre_stride,
67 int xoffset, int yoffset,
68 const int32_t *wsrc, const int32_t *mask,
69 unsigned int *sse);
70 #endif
71
72 using libaom_test::ACMRandom;
73
74 // Truncate high bit depth results by downshifting (with rounding) by:
75 // 2 * (bit_depth - 8) for sse
76 // (bit_depth - 8) for se
RoundHighBitDepth(int bit_depth,int64_t * se,uint64_t * sse)77 static void RoundHighBitDepth(int bit_depth, int64_t *se, uint64_t *sse) {
78 switch (bit_depth) {
79 case AOM_BITS_12:
80 *sse = (*sse + 128) >> 8;
81 *se = (*se + 8) >> 4;
82 break;
83 case AOM_BITS_10:
84 *sse = (*sse + 8) >> 4;
85 *se = (*se + 2) >> 2;
86 break;
87 case AOM_BITS_8:
88 default: break;
89 }
90 }
91
92 /* Note:
93 * Our codebase calculates the "diff" value in the variance algorithm by
94 * (src - ref).
95 */
variance_ref(const uint8_t * src,const uint8_t * ref,int l2w,int l2h,int src_stride,int ref_stride,uint32_t * sse_ptr,bool use_high_bit_depth_,aom_bit_depth_t bit_depth)96 static uint32_t variance_ref(const uint8_t *src, const uint8_t *ref, int l2w,
97 int l2h, int src_stride, int ref_stride,
98 uint32_t *sse_ptr, bool use_high_bit_depth_,
99 aom_bit_depth_t bit_depth) {
100 int64_t se = 0;
101 uint64_t sse = 0;
102 const int w = 1 << l2w;
103 const int h = 1 << l2h;
104 for (int y = 0; y < h; y++) {
105 for (int x = 0; x < w; x++) {
106 int diff;
107 if (!use_high_bit_depth_) {
108 diff = src[y * src_stride + x] - ref[y * ref_stride + x];
109 se += diff;
110 sse += diff * diff;
111 } else {
112 diff = CONVERT_TO_SHORTPTR(src)[y * src_stride + x] -
113 CONVERT_TO_SHORTPTR(ref)[y * ref_stride + x];
114 se += diff;
115 sse += diff * diff;
116 }
117 }
118 }
119 RoundHighBitDepth(bit_depth, &se, &sse);
120 *sse_ptr = static_cast<uint32_t>(sse);
121 return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
122 }
123
124 /* The subpel reference functions differ from the codec version in one aspect:
125 * they calculate the bilinear factors directly instead of using a lookup table
126 * and therefore upshift xoff and yoff by 1. Only every other calculated value
127 * is used so the codec version shrinks the table to save space.
128 */
subpel_variance_ref(const uint8_t * ref,const uint8_t * src,int l2w,int l2h,int xoff,int yoff,uint32_t * sse_ptr,bool use_high_bit_depth_,aom_bit_depth_t bit_depth)129 static uint32_t subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
130 int l2w, int l2h, int xoff, int yoff,
131 uint32_t *sse_ptr, bool use_high_bit_depth_,
132 aom_bit_depth_t bit_depth) {
133 int64_t se = 0;
134 uint64_t sse = 0;
135 const int w = 1 << l2w;
136 const int h = 1 << l2h;
137
138 xoff <<= 1;
139 yoff <<= 1;
140
141 for (int y = 0; y < h; y++) {
142 for (int x = 0; x < w; x++) {
143 // Bilinear interpolation at a 16th pel step.
144 if (!use_high_bit_depth_) {
145 const int a1 = ref[(w + 1) * (y + 0) + x + 0];
146 const int a2 = ref[(w + 1) * (y + 0) + x + 1];
147 const int b1 = ref[(w + 1) * (y + 1) + x + 0];
148 const int b2 = ref[(w + 1) * (y + 1) + x + 1];
149 const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
150 const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
151 const int r = a + (((b - a) * yoff + 8) >> 4);
152 const int diff = r - src[w * y + x];
153 se += diff;
154 sse += diff * diff;
155 } else {
156 uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
157 uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
158 const int a1 = ref16[(w + 1) * (y + 0) + x + 0];
159 const int a2 = ref16[(w + 1) * (y + 0) + x + 1];
160 const int b1 = ref16[(w + 1) * (y + 1) + x + 0];
161 const int b2 = ref16[(w + 1) * (y + 1) + x + 1];
162 const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
163 const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
164 const int r = a + (((b - a) * yoff + 8) >> 4);
165 const int diff = r - src16[w * y + x];
166 se += diff;
167 sse += diff * diff;
168 }
169 }
170 }
171 RoundHighBitDepth(bit_depth, &se, &sse);
172 *sse_ptr = static_cast<uint32_t>(sse);
173 return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
174 }
175
subpel_avg_variance_ref(const uint8_t * ref,const uint8_t * src,const uint8_t * second_pred,int l2w,int l2h,int xoff,int yoff,uint32_t * sse_ptr,bool use_high_bit_depth,aom_bit_depth_t bit_depth)176 static uint32_t subpel_avg_variance_ref(const uint8_t *ref, const uint8_t *src,
177 const uint8_t *second_pred, int l2w,
178 int l2h, int xoff, int yoff,
179 uint32_t *sse_ptr,
180 bool use_high_bit_depth,
181 aom_bit_depth_t bit_depth) {
182 int64_t se = 0;
183 uint64_t sse = 0;
184 const int w = 1 << l2w;
185 const int h = 1 << l2h;
186
187 xoff <<= 1;
188 yoff <<= 1;
189
190 for (int y = 0; y < h; y++) {
191 for (int x = 0; x < w; x++) {
192 // bilinear interpolation at a 16th pel step
193 if (!use_high_bit_depth) {
194 const int a1 = ref[(w + 1) * (y + 0) + x + 0];
195 const int a2 = ref[(w + 1) * (y + 0) + x + 1];
196 const int b1 = ref[(w + 1) * (y + 1) + x + 0];
197 const int b2 = ref[(w + 1) * (y + 1) + x + 1];
198 const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
199 const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
200 const int r = a + (((b - a) * yoff + 8) >> 4);
201 const int diff =
202 ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x];
203 se += diff;
204 sse += diff * diff;
205 } else {
206 const uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
207 const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
208 const uint16_t *sec16 = CONVERT_TO_SHORTPTR(second_pred);
209 const int a1 = ref16[(w + 1) * (y + 0) + x + 0];
210 const int a2 = ref16[(w + 1) * (y + 0) + x + 1];
211 const int b1 = ref16[(w + 1) * (y + 1) + x + 0];
212 const int b2 = ref16[(w + 1) * (y + 1) + x + 1];
213 const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
214 const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
215 const int r = a + (((b - a) * yoff + 8) >> 4);
216 const int diff = ((r + sec16[w * y + x] + 1) >> 1) - src16[w * y + x];
217 se += diff;
218 sse += diff * diff;
219 }
220 }
221 }
222 RoundHighBitDepth(bit_depth, &se, &sse);
223 *sse_ptr = static_cast<uint32_t>(sse);
224 return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
225 }
226
dist_wtd_subpel_avg_variance_ref(const uint8_t * ref,const uint8_t * src,const uint8_t * second_pred,int l2w,int l2h,int xoff,int yoff,uint32_t * sse_ptr,bool use_high_bit_depth,aom_bit_depth_t bit_depth,DIST_WTD_COMP_PARAMS * jcp_param)227 static uint32_t dist_wtd_subpel_avg_variance_ref(
228 const uint8_t *ref, const uint8_t *src, const uint8_t *second_pred, int l2w,
229 int l2h, int xoff, int yoff, uint32_t *sse_ptr, bool use_high_bit_depth,
230 aom_bit_depth_t bit_depth, DIST_WTD_COMP_PARAMS *jcp_param) {
231 int64_t se = 0;
232 uint64_t sse = 0;
233 const int w = 1 << l2w;
234 const int h = 1 << l2h;
235
236 xoff <<= 1;
237 yoff <<= 1;
238
239 for (int y = 0; y < h; y++) {
240 for (int x = 0; x < w; x++) {
241 // bilinear interpolation at a 16th pel step
242 if (!use_high_bit_depth) {
243 const int a1 = ref[(w + 0) * (y + 0) + x + 0];
244 const int a2 = ref[(w + 0) * (y + 0) + x + 1];
245 const int b1 = ref[(w + 0) * (y + 1) + x + 0];
246 const int b2 = ref[(w + 0) * (y + 1) + x + 1];
247 const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
248 const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
249 const int r = a + (((b - a) * yoff + 8) >> 4);
250 const int avg = ROUND_POWER_OF_TWO(
251 r * jcp_param->fwd_offset +
252 second_pred[w * y + x] * jcp_param->bck_offset,
253 DIST_PRECISION_BITS);
254 const int diff = avg - src[w * y + x];
255
256 se += diff;
257 sse += diff * diff;
258 } else {
259 const uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
260 const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
261 const uint16_t *sec16 = CONVERT_TO_SHORTPTR(second_pred);
262 const int a1 = ref16[(w + 0) * (y + 0) + x + 0];
263 const int a2 = ref16[(w + 0) * (y + 0) + x + 1];
264 const int b1 = ref16[(w + 0) * (y + 1) + x + 0];
265 const int b2 = ref16[(w + 0) * (y + 1) + x + 1];
266 const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
267 const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
268 const int r = a + (((b - a) * yoff + 8) >> 4);
269 const int avg =
270 ROUND_POWER_OF_TWO(r * jcp_param->fwd_offset +
271 sec16[w * y + x] * jcp_param->bck_offset,
272 DIST_PRECISION_BITS);
273 const int diff = avg - src16[w * y + x];
274
275 se += diff;
276 sse += diff * diff;
277 }
278 }
279 }
280 RoundHighBitDepth(bit_depth, &se, &sse);
281 *sse_ptr = static_cast<uint32_t>(sse);
282 return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
283 }
284
285 #if !CONFIG_REALTIME_ONLY
obmc_subpel_variance_ref(const uint8_t * pre,int l2w,int l2h,int xoff,int yoff,const int32_t * wsrc,const int32_t * mask,uint32_t * sse_ptr,bool use_high_bit_depth_,aom_bit_depth_t bit_depth)286 static uint32_t obmc_subpel_variance_ref(const uint8_t *pre, int l2w, int l2h,
287 int xoff, int yoff,
288 const int32_t *wsrc,
289 const int32_t *mask, uint32_t *sse_ptr,
290 bool use_high_bit_depth_,
291 aom_bit_depth_t bit_depth) {
292 int64_t se = 0;
293 uint64_t sse = 0;
294 const int w = 1 << l2w;
295 const int h = 1 << l2h;
296
297 xoff <<= 1;
298 yoff <<= 1;
299
300 for (int y = 0; y < h; y++) {
301 for (int x = 0; x < w; x++) {
302 // Bilinear interpolation at a 16th pel step.
303 if (!use_high_bit_depth_) {
304 const int a1 = pre[(w + 1) * (y + 0) + x + 0];
305 const int a2 = pre[(w + 1) * (y + 0) + x + 1];
306 const int b1 = pre[(w + 1) * (y + 1) + x + 0];
307 const int b2 = pre[(w + 1) * (y + 1) + x + 1];
308 const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
309 const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
310 const int r = a + (((b - a) * yoff + 8) >> 4);
311 const int diff = ROUND_POWER_OF_TWO_SIGNED(
312 wsrc[w * y + x] - r * mask[w * y + x], 12);
313 se += diff;
314 sse += diff * diff;
315 } else {
316 uint16_t *pre16 = CONVERT_TO_SHORTPTR(pre);
317 const int a1 = pre16[(w + 1) * (y + 0) + x + 0];
318 const int a2 = pre16[(w + 1) * (y + 0) + x + 1];
319 const int b1 = pre16[(w + 1) * (y + 1) + x + 0];
320 const int b2 = pre16[(w + 1) * (y + 1) + x + 1];
321 const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
322 const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
323 const int r = a + (((b - a) * yoff + 8) >> 4);
324 const int diff = ROUND_POWER_OF_TWO_SIGNED(
325 wsrc[w * y + x] - r * mask[w * y + x], 12);
326 se += diff;
327 sse += diff * diff;
328 }
329 }
330 }
331 RoundHighBitDepth(bit_depth, &se, &sse);
332 *sse_ptr = static_cast<uint32_t>(sse);
333 return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
334 }
335 #endif
336
337 ////////////////////////////////////////////////////////////////////////////////
338
339 #if !CONFIG_REALTIME_ONLY
340 class SumOfSquaresTest : public ::testing::TestWithParam<SumOfSquaresFunction> {
341 public:
SumOfSquaresTest()342 SumOfSquaresTest() : func_(GetParam()) {}
343
344 ~SumOfSquaresTest() override = default;
345
346 protected:
347 void ConstTest();
348 void RefTest();
349
350 SumOfSquaresFunction func_;
351 ACMRandom rnd_;
352 };
353
ConstTest()354 void SumOfSquaresTest::ConstTest() {
355 int16_t mem[256];
356 unsigned int res;
357 for (int v = 0; v < 256; ++v) {
358 for (int i = 0; i < 256; ++i) {
359 mem[i] = v;
360 }
361 API_REGISTER_STATE_CHECK(res = func_(mem));
362 EXPECT_EQ(256u * (v * v), res);
363 }
364 }
365
mb_ss_ref(const int16_t * src)366 unsigned int mb_ss_ref(const int16_t *src) {
367 unsigned int res = 0;
368 for (int i = 0; i < 256; ++i) {
369 res += src[i] * src[i];
370 }
371 return res;
372 }
373
RefTest()374 void SumOfSquaresTest::RefTest() {
375 int16_t mem[256];
376 for (int i = 0; i < 100; ++i) {
377 for (int j = 0; j < 256; ++j) {
378 mem[j] = rnd_.Rand8() - rnd_.Rand8();
379 }
380
381 const unsigned int expected = mb_ss_ref(mem);
382 unsigned int res;
383 API_REGISTER_STATE_CHECK(res = func_(mem));
384 EXPECT_EQ(expected, res);
385 }
386 }
387 #endif // !CONFIG_REALTIME_ONLY
388
389 ////////////////////////////////////////////////////////////////////////////////
390 // Encapsulating struct to store the function to test along with
391 // some testing context.
392 // Can be used for MSE, SSE, Variance, etc.
393
394 template <typename Func>
395 struct TestParams {
TestParams__anone33b19b50111::TestParams396 TestParams(int log2w = 0, int log2h = 0, Func function = nullptr,
397 int bit_depth_value = 0)
398 : log2width(log2w), log2height(log2h), func(function) {
399 use_high_bit_depth = (bit_depth_value > 0);
400 if (use_high_bit_depth) {
401 bit_depth = static_cast<aom_bit_depth_t>(bit_depth_value);
402 } else {
403 bit_depth = AOM_BITS_8;
404 }
405 width = 1 << log2width;
406 height = 1 << log2height;
407 block_size = width * height;
408 mask = (1u << bit_depth) - 1;
409 }
410
411 int log2width, log2height;
412 int width, height;
413 int block_size;
414 Func func;
415 aom_bit_depth_t bit_depth;
416 bool use_high_bit_depth;
417 uint32_t mask;
418 };
419
420 template <typename Func>
operator <<(std::ostream & os,const TestParams<Func> & p)421 std::ostream &operator<<(std::ostream &os, const TestParams<Func> &p) {
422 return os << "width/height:" << p.width << "/" << p.height
423 << " function:" << reinterpret_cast<const void *>(p.func)
424 << " bit-depth:" << p.bit_depth;
425 }
426
427 // Main class for testing a function type
428 template <typename FunctionType>
429 class MseWxHTestClass
430 : public ::testing::TestWithParam<TestParams<FunctionType> > {
431 public:
SetUp()432 void SetUp() override {
433 params_ = this->GetParam();
434
435 rnd_.Reset(ACMRandom::DeterministicSeed());
436 src_ = reinterpret_cast<uint16_t *>(
437 aom_memalign(16, block_size() * sizeof(src_)));
438 dst_ = reinterpret_cast<uint8_t *>(
439 aom_memalign(16, block_size() * sizeof(dst_)));
440 ASSERT_NE(src_, nullptr);
441 ASSERT_NE(dst_, nullptr);
442 }
443
TearDown()444 void TearDown() override {
445 aom_free(src_);
446 aom_free(dst_);
447 src_ = nullptr;
448 dst_ = nullptr;
449 }
450
451 protected:
452 void RefMatchTestMse();
453 void SpeedTest();
454
455 protected:
456 ACMRandom rnd_;
457 uint8_t *dst_;
458 uint16_t *src_;
459 TestParams<FunctionType> params_;
460
461 // some relay helpers
block_size() const462 int block_size() const { return params_.block_size; }
width() const463 int width() const { return params_.width; }
height() const464 int height() const { return params_.height; }
d_stride() const465 int d_stride() const { return params_.width; } // stride is same as width
s_stride() const466 int s_stride() const { return params_.width; } // stride is same as width
467 };
468
469 template <typename MseWxHFunctionType>
SpeedTest()470 void MseWxHTestClass<MseWxHFunctionType>::SpeedTest() {
471 aom_usec_timer ref_timer, test_timer;
472 double elapsed_time_c = 0;
473 double elapsed_time_simd = 0;
474 int run_time = 10000000;
475 int w = width();
476 int h = height();
477 int dstride = d_stride();
478 int sstride = s_stride();
479
480 for (int k = 0; k < block_size(); ++k) {
481 dst_[k] = rnd_.Rand8();
482 src_[k] = rnd_.Rand8();
483 }
484 aom_usec_timer_start(&ref_timer);
485 for (int i = 0; i < run_time; i++) {
486 aom_mse_wxh_16bit_c(dst_, dstride, src_, sstride, w, h);
487 }
488 aom_usec_timer_mark(&ref_timer);
489 elapsed_time_c = static_cast<double>(aom_usec_timer_elapsed(&ref_timer));
490
491 aom_usec_timer_start(&test_timer);
492 for (int i = 0; i < run_time; i++) {
493 params_.func(dst_, dstride, src_, sstride, w, h);
494 }
495 aom_usec_timer_mark(&test_timer);
496 elapsed_time_simd = static_cast<double>(aom_usec_timer_elapsed(&test_timer));
497
498 printf("%dx%d\tc_time=%lf \t simd_time=%lf \t gain=%lf\n", width(), height(),
499 elapsed_time_c, elapsed_time_simd,
500 (elapsed_time_c / elapsed_time_simd));
501 }
502
503 template <typename MseWxHFunctionType>
RefMatchTestMse()504 void MseWxHTestClass<MseWxHFunctionType>::RefMatchTestMse() {
505 uint64_t mse_ref = 0;
506 uint64_t mse_mod = 0;
507 int w = width();
508 int h = height();
509 int dstride = d_stride();
510 int sstride = s_stride();
511
512 for (int i = 0; i < 10; i++) {
513 for (int k = 0; k < block_size(); ++k) {
514 dst_[k] = rnd_.Rand8();
515 src_[k] = rnd_.Rand8();
516 }
517 API_REGISTER_STATE_CHECK(
518 mse_ref = aom_mse_wxh_16bit_c(dst_, dstride, src_, sstride, w, h));
519 API_REGISTER_STATE_CHECK(
520 mse_mod = params_.func(dst_, dstride, src_, sstride, w, h));
521 EXPECT_EQ(mse_ref, mse_mod)
522 << "ref mse: " << mse_ref << " mod mse: " << mse_mod;
523 }
524 }
525
526 template <typename FunctionType>
527 class Mse16xHTestClass
528 : public ::testing::TestWithParam<TestParams<FunctionType> > {
529 public:
530 // Memory required to compute mse of two 8x8 and four 4x4 blocks assigned for
531 // maximum width 16 and maximum height 8.
532 int mem_size = 16 * 8;
SetUp()533 void SetUp() override {
534 params_ = this->GetParam();
535 rnd_.Reset(ACMRandom::DeterministicSeed());
536 src_ = reinterpret_cast<uint16_t *>(
537 aom_memalign(16, mem_size * sizeof(*src_)));
538 dst_ =
539 reinterpret_cast<uint8_t *>(aom_memalign(16, mem_size * sizeof(*dst_)));
540 ASSERT_NE(src_, nullptr);
541 ASSERT_NE(dst_, nullptr);
542 }
543
TearDown()544 void TearDown() override {
545 aom_free(src_);
546 aom_free(dst_);
547 src_ = nullptr;
548 dst_ = nullptr;
549 }
550
RandBool()551 uint8_t RandBool() {
552 const uint32_t value = rnd_.Rand8();
553 return (value & 0x1);
554 }
555
556 protected:
557 void RefMatchExtremeTestMse();
558 void RefMatchTestMse();
559 void SpeedTest();
560
561 protected:
562 ACMRandom rnd_;
563 uint8_t *dst_;
564 uint16_t *src_;
565 TestParams<FunctionType> params_;
566
567 // some relay helpers
width() const568 int width() const { return params_.width; }
height() const569 int height() const { return params_.height; }
d_stride() const570 int d_stride() const { return params_.width; }
571 };
572
573 template <typename Mse16xHFunctionType>
SpeedTest()574 void Mse16xHTestClass<Mse16xHFunctionType>::SpeedTest() {
575 aom_usec_timer ref_timer, test_timer;
576 double elapsed_time_c = 0.0;
577 double elapsed_time_simd = 0.0;
578 const int loop_count = 10000000;
579 const int w = width();
580 const int h = height();
581 const int dstride = d_stride();
582
583 for (int k = 0; k < mem_size; ++k) {
584 dst_[k] = rnd_.Rand8();
585 // Right shift by 6 is done to generate more input in range of [0,255] than
586 // CDEF_VERY_LARGE
587 int rnd_i10 = rnd_.Rand16() >> 6;
588 src_[k] = (rnd_i10 < 256) ? rnd_i10 : CDEF_VERY_LARGE;
589 }
590
591 aom_usec_timer_start(&ref_timer);
592 for (int i = 0; i < loop_count; i++) {
593 aom_mse_16xh_16bit_c(dst_, dstride, src_, w, h);
594 }
595 aom_usec_timer_mark(&ref_timer);
596 elapsed_time_c = static_cast<double>(aom_usec_timer_elapsed(&ref_timer));
597
598 aom_usec_timer_start(&test_timer);
599 for (int i = 0; i < loop_count; i++) {
600 params_.func(dst_, dstride, src_, w, h);
601 }
602 aom_usec_timer_mark(&test_timer);
603 elapsed_time_simd = static_cast<double>(aom_usec_timer_elapsed(&test_timer));
604
605 printf("%dx%d\tc_time=%lf \t simd_time=%lf \t gain=%.31f\n", width(),
606 height(), elapsed_time_c, elapsed_time_simd,
607 (elapsed_time_c / elapsed_time_simd));
608 }
609
610 template <typename Mse16xHFunctionType>
RefMatchTestMse()611 void Mse16xHTestClass<Mse16xHFunctionType>::RefMatchTestMse() {
612 uint64_t mse_ref = 0;
613 uint64_t mse_mod = 0;
614 const int w = width();
615 const int h = height();
616 const int dstride = d_stride();
617
618 for (int i = 0; i < 10; i++) {
619 for (int k = 0; k < mem_size; ++k) {
620 dst_[k] = rnd_.Rand8();
621 // Right shift by 6 is done to generate more input in range of [0,255]
622 // than CDEF_VERY_LARGE
623 int rnd_i10 = rnd_.Rand16() >> 6;
624 src_[k] = (rnd_i10 < 256) ? rnd_i10 : CDEF_VERY_LARGE;
625 }
626
627 API_REGISTER_STATE_CHECK(
628 mse_ref = aom_mse_16xh_16bit_c(dst_, dstride, src_, w, h));
629 API_REGISTER_STATE_CHECK(mse_mod = params_.func(dst_, dstride, src_, w, h));
630 EXPECT_EQ(mse_ref, mse_mod)
631 << "ref mse: " << mse_ref << " mod mse: " << mse_mod;
632 }
633 }
634
635 template <typename Mse16xHFunctionType>
RefMatchExtremeTestMse()636 void Mse16xHTestClass<Mse16xHFunctionType>::RefMatchExtremeTestMse() {
637 uint64_t mse_ref = 0;
638 uint64_t mse_mod = 0;
639 const int w = width();
640 const int h = height();
641 const int dstride = d_stride();
642 const int iter = 10;
643
644 // Fill the buffers with extreme values
645 for (int i = 0; i < iter; i++) {
646 for (int k = 0; k < mem_size; ++k) {
647 dst_[k] = static_cast<uint8_t>(RandBool() ? 0 : 255);
648 src_[k] = static_cast<uint16_t>(RandBool() ? 0 : CDEF_VERY_LARGE);
649 }
650
651 API_REGISTER_STATE_CHECK(
652 mse_ref = aom_mse_16xh_16bit_c(dst_, dstride, src_, w, h));
653 API_REGISTER_STATE_CHECK(mse_mod = params_.func(dst_, dstride, src_, w, h));
654 EXPECT_EQ(mse_ref, mse_mod)
655 << "ref mse: " << mse_ref << " mod mse: " << mse_mod;
656 }
657 }
658
659 // Main class for testing a function type
660 template <typename FunctionType>
661 class MainTestClass
662 : public ::testing::TestWithParam<TestParams<FunctionType> > {
663 public:
SetUp()664 void SetUp() override {
665 params_ = this->GetParam();
666
667 rnd_.Reset(ACMRandom::DeterministicSeed());
668 const size_t unit =
669 use_high_bit_depth() ? sizeof(uint16_t) : sizeof(uint8_t);
670 src_ = reinterpret_cast<uint8_t *>(aom_memalign(16, block_size() * unit));
671 ref_ = new uint8_t[block_size() * unit];
672 ASSERT_NE(src_, nullptr);
673 ASSERT_NE(ref_, nullptr);
674 memset(src_, 0, block_size() * sizeof(src_[0]));
675 memset(ref_, 0, block_size() * sizeof(ref_[0]));
676 if (use_high_bit_depth()) {
677 // TODO(skal): remove!
678 src_ = CONVERT_TO_BYTEPTR(src_);
679 ref_ = CONVERT_TO_BYTEPTR(ref_);
680 }
681 }
682
TearDown()683 void TearDown() override {
684 if (use_high_bit_depth()) {
685 // TODO(skal): remove!
686 src_ = reinterpret_cast<uint8_t *>(CONVERT_TO_SHORTPTR(src_));
687 ref_ = reinterpret_cast<uint8_t *>(CONVERT_TO_SHORTPTR(ref_));
688 }
689
690 aom_free(src_);
691 delete[] ref_;
692 src_ = nullptr;
693 ref_ = nullptr;
694 }
695
696 protected:
697 // We could sub-class MainTestClass into dedicated class for Variance
698 // and MSE/SSE, but it involves a lot of 'this->xxx' dereferencing
699 // to access top class fields xxx. That's cumbersome, so for now we'll just
700 // implement the testing methods here:
701
702 // Variance tests
703 void ZeroTest();
704 void RefTest();
705 void RefStrideTest();
706 void OneQuarterTest();
707 void SpeedTest();
708
709 // SSE&SUM tests
710 void RefTestSseSum();
711 void MinTestSseSum();
712 void MaxTestSseSum();
713 void SseSum_SpeedTest();
714
715 // SSE&SUM dual tests
716 void RefTestSseSumDual();
717 void MinTestSseSumDual();
718 void MaxTestSseSumDual();
719 void SseSum_SpeedTestDual();
720
721 // MSE/SSE tests
722 void RefTestMse();
723 void RefTestSse();
724 void MaxTestMse();
725 void MaxTestSse();
726
727 protected:
728 ACMRandom rnd_;
729 uint8_t *src_;
730 uint8_t *ref_;
731 TestParams<FunctionType> params_;
732
733 // some relay helpers
use_high_bit_depth() const734 bool use_high_bit_depth() const { return params_.use_high_bit_depth; }
byte_shift() const735 int byte_shift() const { return params_.bit_depth - 8; }
block_size() const736 int block_size() const { return params_.block_size; }
width() const737 int width() const { return params_.width; }
height() const738 int height() const { return params_.height; }
mask() const739 uint32_t mask() const { return params_.mask; }
740 };
741
742 ////////////////////////////////////////////////////////////////////////////////
743 // Tests related to variance.
744
745 template <typename VarianceFunctionType>
ZeroTest()746 void MainTestClass<VarianceFunctionType>::ZeroTest() {
747 for (int i = 0; i <= 255; ++i) {
748 if (!use_high_bit_depth()) {
749 memset(src_, i, block_size());
750 } else {
751 uint16_t *const src16 = CONVERT_TO_SHORTPTR(src_);
752 for (int k = 0; k < block_size(); ++k) src16[k] = i << byte_shift();
753 }
754 for (int j = 0; j <= 255; ++j) {
755 if (!use_high_bit_depth()) {
756 memset(ref_, j, block_size());
757 } else {
758 uint16_t *const ref16 = CONVERT_TO_SHORTPTR(ref_);
759 for (int k = 0; k < block_size(); ++k) ref16[k] = j << byte_shift();
760 }
761 unsigned int sse, var;
762 API_REGISTER_STATE_CHECK(
763 var = params_.func(src_, width(), ref_, width(), &sse));
764 EXPECT_EQ(0u, var) << "src values: " << i << " ref values: " << j;
765 }
766 }
767 }
768
769 template <typename VarianceFunctionType>
RefTest()770 void MainTestClass<VarianceFunctionType>::RefTest() {
771 for (int i = 0; i < 10; ++i) {
772 for (int j = 0; j < block_size(); j++) {
773 if (!use_high_bit_depth()) {
774 src_[j] = rnd_.Rand8();
775 ref_[j] = rnd_.Rand8();
776 } else {
777 CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
778 CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
779 }
780 }
781 unsigned int sse1, sse2, var1, var2;
782 const int stride = width();
783 API_REGISTER_STATE_CHECK(
784 var1 = params_.func(src_, stride, ref_, stride, &sse1));
785 var2 =
786 variance_ref(src_, ref_, params_.log2width, params_.log2height, stride,
787 stride, &sse2, use_high_bit_depth(), params_.bit_depth);
788 EXPECT_EQ(sse1, sse2) << "Error at test index: " << i;
789 EXPECT_EQ(var1, var2) << "Error at test index: " << i;
790 }
791 }
792
793 template <typename VarianceFunctionType>
RefStrideTest()794 void MainTestClass<VarianceFunctionType>::RefStrideTest() {
795 for (int i = 0; i < 10; ++i) {
796 const int ref_stride = (i & 1) * width();
797 const int src_stride = ((i >> 1) & 1) * width();
798 for (int j = 0; j < block_size(); j++) {
799 const int ref_ind = (j / width()) * ref_stride + j % width();
800 const int src_ind = (j / width()) * src_stride + j % width();
801 if (!use_high_bit_depth()) {
802 src_[src_ind] = rnd_.Rand8();
803 ref_[ref_ind] = rnd_.Rand8();
804 } else {
805 CONVERT_TO_SHORTPTR(src_)[src_ind] = rnd_.Rand16() & mask();
806 CONVERT_TO_SHORTPTR(ref_)[ref_ind] = rnd_.Rand16() & mask();
807 }
808 }
809 unsigned int sse1, sse2;
810 unsigned int var1, var2;
811
812 API_REGISTER_STATE_CHECK(
813 var1 = params_.func(src_, src_stride, ref_, ref_stride, &sse1));
814 var2 = variance_ref(src_, ref_, params_.log2width, params_.log2height,
815 src_stride, ref_stride, &sse2, use_high_bit_depth(),
816 params_.bit_depth);
817 EXPECT_EQ(sse1, sse2) << "Error at test index: " << i;
818 EXPECT_EQ(var1, var2) << "Error at test index: " << i;
819 }
820 }
821
822 template <typename VarianceFunctionType>
OneQuarterTest()823 void MainTestClass<VarianceFunctionType>::OneQuarterTest() {
824 const int half = block_size() / 2;
825 if (!use_high_bit_depth()) {
826 memset(src_, 255, block_size());
827 memset(ref_, 255, half);
828 memset(ref_ + half, 0, half);
829 } else {
830 aom_memset16(CONVERT_TO_SHORTPTR(src_), 255 << byte_shift(), block_size());
831 aom_memset16(CONVERT_TO_SHORTPTR(ref_), 255 << byte_shift(), half);
832 aom_memset16(CONVERT_TO_SHORTPTR(ref_) + half, 0, half);
833 }
834 unsigned int sse, var, expected;
835 API_REGISTER_STATE_CHECK(
836 var = params_.func(src_, width(), ref_, width(), &sse));
837 expected = block_size() * 255 * 255 / 4;
838 EXPECT_EQ(expected, var);
839 }
840
841 template <typename VarianceFunctionType>
SpeedTest()842 void MainTestClass<VarianceFunctionType>::SpeedTest() {
843 for (int j = 0; j < block_size(); j++) {
844 if (!use_high_bit_depth()) {
845 src_[j] = rnd_.Rand8();
846 ref_[j] = rnd_.Rand8();
847 #if CONFIG_AV1_HIGHBITDEPTH
848 } else {
849 CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
850 CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
851 #endif // CONFIG_AV1_HIGHBITDEPTH
852 }
853 }
854 unsigned int sse;
855 const int stride = width();
856 int run_time = 1000000000 / block_size();
857 aom_usec_timer timer;
858 aom_usec_timer_start(&timer);
859 for (int i = 0; i < run_time; ++i) {
860 params_.func(src_, stride, ref_, stride, &sse);
861 }
862
863 aom_usec_timer_mark(&timer);
864 const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
865 printf("Variance %dx%d : %d us\n", width(), height(), elapsed_time);
866 }
867
868 template <typename GetSseSum8x8QuadFuncType>
RefTestSseSum()869 void MainTestClass<GetSseSum8x8QuadFuncType>::RefTestSseSum() {
870 for (int i = 0; i < 10; ++i) {
871 for (int j = 0; j < block_size(); ++j) {
872 src_[j] = rnd_.Rand8();
873 ref_[j] = rnd_.Rand8();
874 }
875 unsigned int sse1[256] = { 0 };
876 unsigned int sse2[256] = { 0 };
877 unsigned int var1[256] = { 0 };
878 unsigned int var2[256] = { 0 };
879 int sum1[256] = { 0 };
880 int sum2[256] = { 0 };
881 unsigned int sse_tot_c = 0;
882 unsigned int sse_tot_simd = 0;
883 int sum_tot_c = 0;
884 int sum_tot_simd = 0;
885 const int stride = width();
886 int k = 0;
887
888 for (int row = 0; row < height(); row += 8) {
889 for (int col = 0; col < width(); col += 32) {
890 API_REGISTER_STATE_CHECK(params_.func(src_ + stride * row + col, stride,
891 ref_ + stride * row + col, stride,
892 &sse1[k], &sum1[k], &sse_tot_simd,
893 &sum_tot_simd, &var1[k]));
894 aom_get_var_sse_sum_8x8_quad_c(
895 src_ + stride * row + col, stride, ref_ + stride * row + col,
896 stride, &sse2[k], &sum2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
897 k += 4;
898 }
899 }
900 EXPECT_EQ(sse_tot_c, sse_tot_simd);
901 EXPECT_EQ(sum_tot_c, sum_tot_simd);
902 for (int p = 0; p < 256; p++) {
903 EXPECT_EQ(sse1[p], sse2[p]);
904 EXPECT_EQ(sum1[p], sum2[p]);
905 EXPECT_EQ(var1[p], var2[p]);
906 }
907 }
908 }
909
910 template <typename GetSseSum8x8QuadFuncType>
MinTestSseSum()911 void MainTestClass<GetSseSum8x8QuadFuncType>::MinTestSseSum() {
912 memset(src_, 0, block_size());
913 memset(ref_, 255, block_size());
914 unsigned int sse1[256] = { 0 };
915 unsigned int sse2[256] = { 0 };
916 unsigned int var1[256] = { 0 };
917 unsigned int var2[256] = { 0 };
918 int sum1[256] = { 0 };
919 int sum2[256] = { 0 };
920 unsigned int sse_tot_c = 0;
921 unsigned int sse_tot_simd = 0;
922 int sum_tot_c = 0;
923 int sum_tot_simd = 0;
924 const int stride = width();
925 int k = 0;
926
927 for (int i = 0; i < height(); i += 8) {
928 for (int j = 0; j < width(); j += 32) {
929 API_REGISTER_STATE_CHECK(params_.func(
930 src_ + stride * i + j, stride, ref_ + stride * i + j, stride,
931 &sse1[k], &sum1[k], &sse_tot_simd, &sum_tot_simd, &var1[k]));
932 aom_get_var_sse_sum_8x8_quad_c(
933 src_ + stride * i + j, stride, ref_ + stride * i + j, stride,
934 &sse2[k], &sum2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
935 k += 4;
936 }
937 }
938 EXPECT_EQ(sse_tot_simd, sse_tot_c);
939 EXPECT_EQ(sum_tot_simd, sum_tot_c);
940 for (int p = 0; p < 256; p++) {
941 EXPECT_EQ(sse1[p], sse2[p]);
942 EXPECT_EQ(sum1[p], sum2[p]);
943 EXPECT_EQ(var1[p], var2[p]);
944 }
945 }
946
947 template <typename GetSseSum8x8QuadFuncType>
MaxTestSseSum()948 void MainTestClass<GetSseSum8x8QuadFuncType>::MaxTestSseSum() {
949 memset(src_, 255, block_size());
950 memset(ref_, 0, block_size());
951 unsigned int sse1[256] = { 0 };
952 unsigned int sse2[256] = { 0 };
953 unsigned int var1[256] = { 0 };
954 unsigned int var2[256] = { 0 };
955 int sum1[256] = { 0 };
956 int sum2[256] = { 0 };
957 unsigned int sse_tot_c = 0;
958 unsigned int sse_tot_simd = 0;
959 int sum_tot_c = 0;
960 int sum_tot_simd = 0;
961 const int stride = width();
962 int k = 0;
963
964 for (int i = 0; i < height(); i += 8) {
965 for (int j = 0; j < width(); j += 32) {
966 API_REGISTER_STATE_CHECK(params_.func(
967 src_ + stride * i + j, stride, ref_ + stride * i + j, stride,
968 &sse1[k], &sum1[k], &sse_tot_simd, &sum_tot_simd, &var1[k]));
969 aom_get_var_sse_sum_8x8_quad_c(
970 src_ + stride * i + j, stride, ref_ + stride * i + j, stride,
971 &sse2[k], &sum2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
972 k += 4;
973 }
974 }
975 EXPECT_EQ(sse_tot_c, sse_tot_simd);
976 EXPECT_EQ(sum_tot_c, sum_tot_simd);
977
978 for (int p = 0; p < 256; p++) {
979 EXPECT_EQ(sse1[p], sse2[p]);
980 EXPECT_EQ(sum1[p], sum2[p]);
981 EXPECT_EQ(var1[p], var2[p]);
982 }
983 }
984
985 template <typename GetSseSum8x8QuadFuncType>
SseSum_SpeedTest()986 void MainTestClass<GetSseSum8x8QuadFuncType>::SseSum_SpeedTest() {
987 const int loop_count = 1000000000 / block_size();
988 for (int j = 0; j < block_size(); ++j) {
989 src_[j] = rnd_.Rand8();
990 ref_[j] = rnd_.Rand8();
991 }
992
993 unsigned int sse1[4] = { 0 };
994 unsigned int sse2[4] = { 0 };
995 unsigned int var1[4] = { 0 };
996 unsigned int var2[4] = { 0 };
997 int sum1[4] = { 0 };
998 int sum2[4] = { 0 };
999 unsigned int sse_tot_c = 0;
1000 unsigned int sse_tot_simd = 0;
1001 int sum_tot_c = 0;
1002 int sum_tot_simd = 0;
1003 const int stride = width();
1004
1005 aom_usec_timer timer;
1006 aom_usec_timer_start(&timer);
1007 for (int r = 0; r < loop_count; ++r) {
1008 for (int i = 0; i < height(); i += 8) {
1009 for (int j = 0; j < width(); j += 32) {
1010 aom_get_var_sse_sum_8x8_quad_c(src_ + stride * i + j, stride,
1011 ref_ + stride * i + j, stride, sse2,
1012 sum2, &sse_tot_c, &sum_tot_c, var2);
1013 }
1014 }
1015 }
1016 aom_usec_timer_mark(&timer);
1017 const double elapsed_time_ref =
1018 static_cast<double>(aom_usec_timer_elapsed(&timer));
1019
1020 aom_usec_timer_start(&timer);
1021 for (int r = 0; r < loop_count; ++r) {
1022 for (int i = 0; i < height(); i += 8) {
1023 for (int j = 0; j < width(); j += 32) {
1024 params_.func(src_ + stride * i + j, stride, ref_ + stride * i + j,
1025 stride, sse1, sum1, &sse_tot_simd, &sum_tot_simd, var1);
1026 }
1027 }
1028 }
1029 aom_usec_timer_mark(&timer);
1030 const double elapsed_time_simd =
1031 static_cast<double>(aom_usec_timer_elapsed(&timer));
1032
1033 printf(
1034 "aom_getvar_8x8_quad for block=%dx%d : ref_time=%lf \t simd_time=%lf \t "
1035 "gain=%lf \n",
1036 width(), height(), elapsed_time_ref, elapsed_time_simd,
1037 elapsed_time_ref / elapsed_time_simd);
1038 }
1039
1040 template <typename GetSseSum16x16DualFuncType>
RefTestSseSumDual()1041 void MainTestClass<GetSseSum16x16DualFuncType>::RefTestSseSumDual() {
1042 for (int iter = 0; iter < 10; ++iter) {
1043 for (int idx = 0; idx < block_size(); ++idx) {
1044 src_[idx] = rnd_.Rand8();
1045 ref_[idx] = rnd_.Rand8();
1046 }
1047 unsigned int sse1[64] = { 0 };
1048 unsigned int sse2[64] = { 0 };
1049 unsigned int var1[64] = { 0 };
1050 unsigned int var2[64] = { 0 };
1051 unsigned int sse_tot_c = 0;
1052 unsigned int sse_tot_simd = 0;
1053 int sum_tot_c = 0;
1054 int sum_tot_simd = 0;
1055 const int stride = width();
1056 int k = 0;
1057
1058 for (int row = 0; row < height(); row += 16) {
1059 for (int col = 0; col < width(); col += 32) {
1060 API_REGISTER_STATE_CHECK(params_.func(
1061 src_ + stride * row + col, stride, ref_ + stride * row + col,
1062 stride, &sse1[k], &sse_tot_simd, &sum_tot_simd, &var1[k]));
1063 aom_get_var_sse_sum_16x16_dual_c(
1064 src_ + stride * row + col, stride, ref_ + stride * row + col,
1065 stride, &sse2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
1066 k += 2;
1067 }
1068 }
1069 EXPECT_EQ(sse_tot_c, sse_tot_simd);
1070 EXPECT_EQ(sum_tot_c, sum_tot_simd);
1071 for (int p = 0; p < 64; p++) {
1072 EXPECT_EQ(sse1[p], sse2[p]);
1073 EXPECT_EQ(sse_tot_simd, sse_tot_c);
1074 EXPECT_EQ(sum_tot_simd, sum_tot_c);
1075 EXPECT_EQ(var1[p], var2[p]);
1076 }
1077 }
1078 }
1079
1080 template <typename GetSseSum16x16DualFuncType>
MinTestSseSumDual()1081 void MainTestClass<GetSseSum16x16DualFuncType>::MinTestSseSumDual() {
1082 memset(src_, 0, block_size());
1083 memset(ref_, 255, block_size());
1084 unsigned int sse1[64] = { 0 };
1085 unsigned int sse2[64] = { 0 };
1086 unsigned int var1[64] = { 0 };
1087 unsigned int var2[64] = { 0 };
1088 unsigned int sse_tot_c = 0;
1089 unsigned int sse_tot_simd = 0;
1090 int sum_tot_c = 0;
1091 int sum_tot_simd = 0;
1092 const int stride = width();
1093 int k = 0;
1094
1095 for (int row = 0; row < height(); row += 16) {
1096 for (int col = 0; col < width(); col += 32) {
1097 API_REGISTER_STATE_CHECK(params_.func(
1098 src_ + stride * row + col, stride, ref_ + stride * row + col, stride,
1099 &sse1[k], &sse_tot_simd, &sum_tot_simd, &var1[k]));
1100 aom_get_var_sse_sum_16x16_dual_c(
1101 src_ + stride * row + col, stride, ref_ + stride * row + col, stride,
1102 &sse2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
1103 k += 2;
1104 }
1105 }
1106 EXPECT_EQ(sse_tot_simd, sse_tot_c);
1107 EXPECT_EQ(sum_tot_simd, sum_tot_c);
1108 for (int p = 0; p < 64; p++) {
1109 EXPECT_EQ(sse1[p], sse2[p]);
1110 EXPECT_EQ(var1[p], var2[p]);
1111 }
1112 }
1113
1114 template <typename GetSseSum16x16DualFuncType>
MaxTestSseSumDual()1115 void MainTestClass<GetSseSum16x16DualFuncType>::MaxTestSseSumDual() {
1116 memset(src_, 255, block_size());
1117 memset(ref_, 0, block_size());
1118 unsigned int sse1[64] = { 0 };
1119 unsigned int sse2[64] = { 0 };
1120 unsigned int var1[64] = { 0 };
1121 unsigned int var2[64] = { 0 };
1122 unsigned int sse_tot_c = 0;
1123 unsigned int sse_tot_simd = 0;
1124 int sum_tot_c = 0;
1125 int sum_tot_simd = 0;
1126 const int stride = width();
1127 int k = 0;
1128
1129 for (int row = 0; row < height(); row += 16) {
1130 for (int col = 0; col < width(); col += 32) {
1131 API_REGISTER_STATE_CHECK(params_.func(
1132 src_ + stride * row + col, stride, ref_ + stride * row + col, stride,
1133 &sse1[k], &sse_tot_simd, &sum_tot_simd, &var1[k]));
1134 aom_get_var_sse_sum_16x16_dual_c(
1135 src_ + stride * row + col, stride, ref_ + stride * row + col, stride,
1136 &sse2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
1137 k += 2;
1138 }
1139 }
1140 EXPECT_EQ(sse_tot_c, sse_tot_simd);
1141 EXPECT_EQ(sum_tot_c, sum_tot_simd);
1142
1143 for (int p = 0; p < 64; p++) {
1144 EXPECT_EQ(sse1[p], sse2[p]);
1145 EXPECT_EQ(var1[p], var2[p]);
1146 }
1147 }
1148
1149 template <typename GetSseSum16x16DualFuncType>
SseSum_SpeedTestDual()1150 void MainTestClass<GetSseSum16x16DualFuncType>::SseSum_SpeedTestDual() {
1151 const int loop_count = 1000000000 / block_size();
1152 for (int idx = 0; idx < block_size(); ++idx) {
1153 src_[idx] = rnd_.Rand8();
1154 ref_[idx] = rnd_.Rand8();
1155 }
1156
1157 unsigned int sse1[2] = { 0 };
1158 unsigned int sse2[2] = { 0 };
1159 unsigned int var1[2] = { 0 };
1160 unsigned int var2[2] = { 0 };
1161 unsigned int sse_tot_c = 0;
1162 unsigned int sse_tot_simd = 0;
1163 int sum_tot_c = 0;
1164 int sum_tot_simd = 0;
1165 const int stride = width();
1166
1167 aom_usec_timer timer;
1168 aom_usec_timer_start(&timer);
1169 for (int r = 0; r < loop_count; ++r) {
1170 for (int row = 0; row < height(); row += 16) {
1171 for (int col = 0; col < width(); col += 32) {
1172 aom_get_var_sse_sum_16x16_dual_c(src_ + stride * row + col, stride,
1173 ref_ + stride * row + col, stride,
1174 sse2, &sse_tot_c, &sum_tot_c, var2);
1175 }
1176 }
1177 }
1178 aom_usec_timer_mark(&timer);
1179 const double elapsed_time_ref =
1180 static_cast<double>(aom_usec_timer_elapsed(&timer));
1181
1182 aom_usec_timer_start(&timer);
1183 for (int r = 0; r < loop_count; ++r) {
1184 for (int row = 0; row < height(); row += 16) {
1185 for (int col = 0; col < width(); col += 32) {
1186 params_.func(src_ + stride * row + col, stride,
1187 ref_ + stride * row + col, stride, sse1, &sse_tot_simd,
1188 &sum_tot_simd, var1);
1189 }
1190 }
1191 }
1192 aom_usec_timer_mark(&timer);
1193 const double elapsed_time_simd =
1194 static_cast<double>(aom_usec_timer_elapsed(&timer));
1195
1196 printf(
1197 "aom_getvar_16x16_dual for block=%dx%d : ref_time=%lf \t simd_time=%lf "
1198 "\t "
1199 "gain=%lf \n",
1200 width(), height(), elapsed_time_ref, elapsed_time_simd,
1201 elapsed_time_ref / elapsed_time_simd);
1202 }
1203
1204 ////////////////////////////////////////////////////////////////////////////////
1205 // Tests related to MSE / SSE.
1206
1207 template <typename FunctionType>
RefTestMse()1208 void MainTestClass<FunctionType>::RefTestMse() {
1209 for (int i = 0; i < 10; ++i) {
1210 for (int j = 0; j < block_size(); ++j) {
1211 if (!use_high_bit_depth()) {
1212 src_[j] = rnd_.Rand8();
1213 ref_[j] = rnd_.Rand8();
1214 #if CONFIG_AV1_HIGHBITDEPTH
1215 } else {
1216 CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
1217 CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
1218 #endif // CONFIG_AV1_HIGHBITDEPTH
1219 }
1220 }
1221 unsigned int sse1, sse2;
1222 const int stride = width();
1223 API_REGISTER_STATE_CHECK(params_.func(src_, stride, ref_, stride, &sse1));
1224 variance_ref(src_, ref_, params_.log2width, params_.log2height, stride,
1225 stride, &sse2, use_high_bit_depth(), params_.bit_depth);
1226 EXPECT_EQ(sse1, sse2);
1227 }
1228 }
1229
1230 template <typename FunctionType>
RefTestSse()1231 void MainTestClass<FunctionType>::RefTestSse() {
1232 for (int i = 0; i < 10; ++i) {
1233 for (int j = 0; j < block_size(); ++j) {
1234 src_[j] = rnd_.Rand8();
1235 ref_[j] = rnd_.Rand8();
1236 }
1237 unsigned int sse2;
1238 unsigned int var1;
1239 const int stride = width();
1240 API_REGISTER_STATE_CHECK(var1 = params_.func(src_, stride, ref_, stride));
1241 variance_ref(src_, ref_, params_.log2width, params_.log2height, stride,
1242 stride, &sse2, false, AOM_BITS_8);
1243 EXPECT_EQ(var1, sse2);
1244 }
1245 }
1246
1247 template <typename FunctionType>
MaxTestMse()1248 void MainTestClass<FunctionType>::MaxTestMse() {
1249 int max_value = (1 << params_.bit_depth) - 1;
1250 if (!use_high_bit_depth()) {
1251 memset(src_, max_value, block_size());
1252 memset(ref_, 0, block_size());
1253 #if CONFIG_AV1_HIGHBITDEPTH
1254 } else {
1255 aom_memset16(CONVERT_TO_SHORTPTR(src_), max_value, block_size());
1256 aom_memset16(CONVERT_TO_SHORTPTR(ref_), 0, block_size());
1257 #endif // CONFIG_AV1_HIGHBITDEPTH
1258 }
1259 unsigned int sse;
1260 API_REGISTER_STATE_CHECK(params_.func(src_, width(), ref_, width(), &sse));
1261 unsigned int expected = (unsigned int)block_size() * max_value * max_value;
1262 switch (params_.bit_depth) {
1263 case AOM_BITS_12: expected = ROUND_POWER_OF_TWO(expected, 8); break;
1264 case AOM_BITS_10: expected = ROUND_POWER_OF_TWO(expected, 4); break;
1265 case AOM_BITS_8:
1266 default: break;
1267 }
1268 EXPECT_EQ(expected, sse);
1269 }
1270
1271 template <typename FunctionType>
MaxTestSse()1272 void MainTestClass<FunctionType>::MaxTestSse() {
1273 memset(src_, 255, block_size());
1274 memset(ref_, 0, block_size());
1275 unsigned int var;
1276 API_REGISTER_STATE_CHECK(var = params_.func(src_, width(), ref_, width()));
1277 const unsigned int expected = block_size() * 255 * 255;
1278 EXPECT_EQ(expected, var);
1279 }
1280
1281 ////////////////////////////////////////////////////////////////////////////////
1282
1283 using std::get;
1284 using std::make_tuple;
1285 using std::tuple;
1286
1287 template <typename FunctionType>
1288 class SubpelVarianceTest
1289 : public ::testing::TestWithParam<TestParams<FunctionType> > {
1290 public:
SetUp()1291 void SetUp() override {
1292 params_ = this->GetParam();
1293
1294 rnd_.Reset(ACMRandom::DeterministicSeed());
1295 if (!use_high_bit_depth()) {
1296 src_ = reinterpret_cast<uint8_t *>(aom_memalign(32, block_size()));
1297 sec_ = reinterpret_cast<uint8_t *>(aom_memalign(32, block_size()));
1298 ref_ = reinterpret_cast<uint8_t *>(
1299 aom_memalign(32, block_size() + width() + height() + 1));
1300 } else {
1301 src_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(
1302 aom_memalign(32, block_size() * sizeof(uint16_t))));
1303 sec_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(
1304 aom_memalign(32, block_size() * sizeof(uint16_t))));
1305 ref_ = CONVERT_TO_BYTEPTR(aom_memalign(
1306 32, (block_size() + width() + height() + 1) * sizeof(uint16_t)));
1307 }
1308 ASSERT_NE(src_, nullptr);
1309 ASSERT_NE(sec_, nullptr);
1310 ASSERT_NE(ref_, nullptr);
1311 }
1312
TearDown()1313 void TearDown() override {
1314 if (!use_high_bit_depth()) {
1315 aom_free(src_);
1316 aom_free(ref_);
1317 aom_free(sec_);
1318 } else {
1319 aom_free(CONVERT_TO_SHORTPTR(src_));
1320 aom_free(CONVERT_TO_SHORTPTR(ref_));
1321 aom_free(CONVERT_TO_SHORTPTR(sec_));
1322 }
1323 }
1324
1325 protected:
1326 void RefTest();
1327 void ExtremeRefTest();
1328 void SpeedTest();
1329
1330 ACMRandom rnd_;
1331 uint8_t *src_;
1332 uint8_t *ref_;
1333 uint8_t *sec_;
1334 TestParams<FunctionType> params_;
1335 DIST_WTD_COMP_PARAMS jcp_param_;
1336
1337 // some relay helpers
use_high_bit_depth() const1338 bool use_high_bit_depth() const { return params_.use_high_bit_depth; }
byte_shift() const1339 int byte_shift() const { return params_.bit_depth - 8; }
block_size() const1340 int block_size() const { return params_.block_size; }
width() const1341 int width() const { return params_.width; }
height() const1342 int height() const { return params_.height; }
mask() const1343 uint32_t mask() const { return params_.mask; }
1344 };
1345
1346 template <typename SubpelVarianceFunctionType>
RefTest()1347 void SubpelVarianceTest<SubpelVarianceFunctionType>::RefTest() {
1348 for (int x = 0; x < 8; ++x) {
1349 for (int y = 0; y < 8; ++y) {
1350 if (!use_high_bit_depth()) {
1351 for (int j = 0; j < block_size(); j++) {
1352 src_[j] = rnd_.Rand8();
1353 }
1354 for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1355 ref_[j] = rnd_.Rand8();
1356 }
1357 } else {
1358 for (int j = 0; j < block_size(); j++) {
1359 CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
1360 }
1361 for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1362 CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
1363 }
1364 }
1365 unsigned int sse1, sse2;
1366 unsigned int var1;
1367 API_REGISTER_STATE_CHECK(
1368 var1 = params_.func(ref_, width() + 1, x, y, src_, width(), &sse1));
1369 const unsigned int var2 = subpel_variance_ref(
1370 ref_, src_, params_.log2width, params_.log2height, x, y, &sse2,
1371 use_high_bit_depth(), params_.bit_depth);
1372 EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
1373 EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
1374 }
1375 }
1376 }
1377
1378 template <typename SubpelVarianceFunctionType>
ExtremeRefTest()1379 void SubpelVarianceTest<SubpelVarianceFunctionType>::ExtremeRefTest() {
1380 // Compare against reference.
1381 // Src: Set the first half of values to 0, the second half to the maximum.
1382 // Ref: Set the first half of values to the maximum, the second half to 0.
1383 for (int x = 0; x < 8; ++x) {
1384 for (int y = 0; y < 8; ++y) {
1385 const int half = block_size() / 2;
1386 if (!use_high_bit_depth()) {
1387 memset(src_, 0, half);
1388 memset(src_ + half, 255, half);
1389 memset(ref_, 255, half);
1390 memset(ref_ + half, 0, half + width() + height() + 1);
1391 } else {
1392 aom_memset16(CONVERT_TO_SHORTPTR(src_), mask(), half);
1393 aom_memset16(CONVERT_TO_SHORTPTR(src_) + half, 0, half);
1394 aom_memset16(CONVERT_TO_SHORTPTR(ref_), 0, half);
1395 aom_memset16(CONVERT_TO_SHORTPTR(ref_) + half, mask(),
1396 half + width() + height() + 1);
1397 }
1398 unsigned int sse1, sse2;
1399 unsigned int var1;
1400 API_REGISTER_STATE_CHECK(
1401 var1 = params_.func(ref_, width() + 1, x, y, src_, width(), &sse1));
1402 const unsigned int var2 = subpel_variance_ref(
1403 ref_, src_, params_.log2width, params_.log2height, x, y, &sse2,
1404 use_high_bit_depth(), params_.bit_depth);
1405 EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y;
1406 EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y;
1407 }
1408 }
1409 }
1410
1411 template <typename SubpelVarianceFunctionType>
SpeedTest()1412 void SubpelVarianceTest<SubpelVarianceFunctionType>::SpeedTest() {
1413 if (!use_high_bit_depth()) {
1414 for (int j = 0; j < block_size(); j++) {
1415 src_[j] = rnd_.Rand8();
1416 }
1417 for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1418 ref_[j] = rnd_.Rand8();
1419 }
1420 } else {
1421 for (int j = 0; j < block_size(); j++) {
1422 CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
1423 }
1424 for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1425 CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
1426 }
1427 }
1428
1429 unsigned int sse1, sse2;
1430 int run_time = 1000000000 / block_size();
1431 aom_usec_timer timer;
1432
1433 aom_usec_timer_start(&timer);
1434 for (int i = 0; i < run_time; ++i) {
1435 int x = rnd_(8);
1436 int y = rnd_(8);
1437 params_.func(ref_, width() + 1, x, y, src_, width(), &sse1);
1438 }
1439 aom_usec_timer_mark(&timer);
1440
1441 const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
1442
1443 aom_usec_timer timer_c;
1444
1445 aom_usec_timer_start(&timer_c);
1446 for (int i = 0; i < run_time; ++i) {
1447 int x = rnd_(8);
1448 int y = rnd_(8);
1449 subpel_variance_ref(ref_, src_, params_.log2width, params_.log2height, x, y,
1450 &sse2, use_high_bit_depth(), params_.bit_depth);
1451 }
1452 aom_usec_timer_mark(&timer_c);
1453
1454 const int elapsed_time_c = static_cast<int>(aom_usec_timer_elapsed(&timer_c));
1455
1456 printf(
1457 "sub_pixel_variance_%dx%d_%d: ref_time=%d us opt_time=%d us gain=%d \n",
1458 width(), height(), params_.bit_depth, elapsed_time_c, elapsed_time,
1459 elapsed_time_c / elapsed_time);
1460 }
1461
1462 template <>
RefTest()1463 void SubpelVarianceTest<SubpixAvgVarMxNFunc>::RefTest() {
1464 for (int x = 0; x < 8; ++x) {
1465 for (int y = 0; y < 8; ++y) {
1466 if (!use_high_bit_depth()) {
1467 for (int j = 0; j < block_size(); j++) {
1468 src_[j] = rnd_.Rand8();
1469 sec_[j] = rnd_.Rand8();
1470 }
1471 for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1472 ref_[j] = rnd_.Rand8();
1473 }
1474 } else {
1475 for (int j = 0; j < block_size(); j++) {
1476 CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
1477 CONVERT_TO_SHORTPTR(sec_)[j] = rnd_.Rand16() & mask();
1478 }
1479 for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1480 CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
1481 }
1482 }
1483 uint32_t sse1, sse2;
1484 uint32_t var1, var2;
1485 API_REGISTER_STATE_CHECK(var1 = params_.func(ref_, width() + 1, x, y,
1486 src_, width(), &sse1, sec_));
1487 var2 = subpel_avg_variance_ref(ref_, src_, sec_, params_.log2width,
1488 params_.log2height, x, y, &sse2,
1489 use_high_bit_depth(), params_.bit_depth);
1490 EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
1491 EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
1492 }
1493 }
1494 }
1495
1496 template <>
RefTest()1497 void SubpelVarianceTest<DistWtdSubpixAvgVarMxNFunc>::RefTest() {
1498 for (int x = 0; x < 8; ++x) {
1499 for (int y = 0; y < 8; ++y) {
1500 if (!use_high_bit_depth()) {
1501 for (int j = 0; j < block_size(); j++) {
1502 src_[j] = rnd_.Rand8();
1503 sec_[j] = rnd_.Rand8();
1504 }
1505 for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1506 ref_[j] = rnd_.Rand8();
1507 }
1508 } else {
1509 for (int j = 0; j < block_size(); j++) {
1510 CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
1511 CONVERT_TO_SHORTPTR(sec_)[j] = rnd_.Rand16() & mask();
1512 }
1513 for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1514 CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
1515 }
1516 }
1517 for (int x0 = 0; x0 < 2; ++x0) {
1518 for (int y0 = 0; y0 < 4; ++y0) {
1519 uint32_t sse1, sse2;
1520 uint32_t var1, var2;
1521 jcp_param_.fwd_offset = quant_dist_lookup_table[y0][x0];
1522 jcp_param_.bck_offset = quant_dist_lookup_table[y0][1 - x0];
1523 API_REGISTER_STATE_CHECK(var1 = params_.func(ref_, width() + 0, x, y,
1524 src_, width(), &sse1,
1525 sec_, &jcp_param_));
1526 var2 = dist_wtd_subpel_avg_variance_ref(
1527 ref_, src_, sec_, params_.log2width, params_.log2height, x, y,
1528 &sse2, use_high_bit_depth(), params_.bit_depth, &jcp_param_);
1529 EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
1530 EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
1531 }
1532 }
1533 }
1534 }
1535 }
1536
1537 ////////////////////////////////////////////////////////////////////////////////
1538
1539 #if !CONFIG_REALTIME_ONLY
1540
1541 static const int kMaskMax = 64;
1542
1543 typedef TestParams<ObmcSubpelVarFunc> ObmcSubpelVarianceParams;
1544
1545 template <typename FunctionType>
1546 class ObmcVarianceTest
1547 : public ::testing::TestWithParam<TestParams<FunctionType> > {
1548 public:
SetUp()1549 void SetUp() override {
1550 params_ = this->GetParam();
1551
1552 rnd_.Reset(ACMRandom::DeterministicSeed());
1553 if (!use_high_bit_depth()) {
1554 pre_ = reinterpret_cast<uint8_t *>(
1555 aom_memalign(32, block_size() + width() + height() + 1));
1556 } else {
1557 pre_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(aom_memalign(
1558 32, (block_size() + width() + height() + 1) * sizeof(uint16_t))));
1559 }
1560 wsrc_ = reinterpret_cast<int32_t *>(
1561 aom_memalign(32, block_size() * sizeof(uint32_t)));
1562 mask_ = reinterpret_cast<int32_t *>(
1563 aom_memalign(32, block_size() * sizeof(uint32_t)));
1564 ASSERT_NE(pre_, nullptr);
1565 ASSERT_NE(wsrc_, nullptr);
1566 ASSERT_NE(mask_, nullptr);
1567 }
1568
TearDown()1569 void TearDown() override {
1570 if (!use_high_bit_depth()) {
1571 aom_free(pre_);
1572 } else {
1573 aom_free(CONVERT_TO_SHORTPTR(pre_));
1574 }
1575 aom_free(wsrc_);
1576 aom_free(mask_);
1577 }
1578
1579 protected:
1580 void RefTest();
1581 void ExtremeRefTest();
1582 void SpeedTest();
1583
1584 ACMRandom rnd_;
1585 uint8_t *pre_;
1586 int32_t *wsrc_;
1587 int32_t *mask_;
1588 TestParams<FunctionType> params_;
1589
1590 // some relay helpers
use_high_bit_depth() const1591 bool use_high_bit_depth() const { return params_.use_high_bit_depth; }
byte_shift() const1592 int byte_shift() const { return params_.bit_depth - 8; }
block_size() const1593 int block_size() const { return params_.block_size; }
width() const1594 int width() const { return params_.width; }
height() const1595 int height() const { return params_.height; }
bd_mask() const1596 uint32_t bd_mask() const { return params_.mask; }
1597 };
1598
1599 template <>
RefTest()1600 void ObmcVarianceTest<ObmcSubpelVarFunc>::RefTest() {
1601 for (int x = 0; x < 8; ++x) {
1602 for (int y = 0; y < 8; ++y) {
1603 if (!use_high_bit_depth())
1604 for (int j = 0; j < block_size() + width() + height() + 1; j++)
1605 pre_[j] = rnd_.Rand8();
1606 else
1607 for (int j = 0; j < block_size() + width() + height() + 1; j++)
1608 CONVERT_TO_SHORTPTR(pre_)[j] = rnd_.Rand16() & bd_mask();
1609 for (int j = 0; j < block_size(); j++) {
1610 wsrc_[j] = (rnd_.Rand16() & bd_mask()) * rnd_(kMaskMax * kMaskMax + 1);
1611 mask_[j] = rnd_(kMaskMax * kMaskMax + 1);
1612 }
1613
1614 uint32_t sse1, sse2;
1615 uint32_t var1, var2;
1616 API_REGISTER_STATE_CHECK(
1617 var1 = params_.func(pre_, width() + 1, x, y, wsrc_, mask_, &sse1));
1618 var2 = obmc_subpel_variance_ref(
1619 pre_, params_.log2width, params_.log2height, x, y, wsrc_, mask_,
1620 &sse2, use_high_bit_depth(), params_.bit_depth);
1621 EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y;
1622 EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y;
1623 }
1624 }
1625 }
1626
1627 template <>
ExtremeRefTest()1628 void ObmcVarianceTest<ObmcSubpelVarFunc>::ExtremeRefTest() {
1629 // Pre: Set the first half of values to the maximum, the second half to 0.
1630 // Mask: same as above
1631 // WSrc: Set the first half of values to 0, the second half to the maximum.
1632 for (int x = 0; x < 8; ++x) {
1633 for (int y = 0; y < 8; ++y) {
1634 const int half = block_size() / 2;
1635 if (!use_high_bit_depth()) {
1636 memset(pre_, 255, half);
1637 memset(pre_ + half, 0, half + width() + height() + 1);
1638 } else {
1639 aom_memset16(CONVERT_TO_SHORTPTR(pre_), bd_mask(), half);
1640 aom_memset16(CONVERT_TO_SHORTPTR(pre_) + half, 0,
1641 half + width() + height() + 1);
1642 }
1643 for (int j = 0; j < half; j++) {
1644 wsrc_[j] = bd_mask() * kMaskMax * kMaskMax;
1645 mask_[j] = 0;
1646 }
1647 for (int j = half; j < block_size(); j++) {
1648 wsrc_[j] = 0;
1649 mask_[j] = kMaskMax * kMaskMax;
1650 }
1651
1652 uint32_t sse1, sse2;
1653 uint32_t var1, var2;
1654 API_REGISTER_STATE_CHECK(
1655 var1 = params_.func(pre_, width() + 1, x, y, wsrc_, mask_, &sse1));
1656 var2 = obmc_subpel_variance_ref(
1657 pre_, params_.log2width, params_.log2height, x, y, wsrc_, mask_,
1658 &sse2, use_high_bit_depth(), params_.bit_depth);
1659 EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y;
1660 EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y;
1661 }
1662 }
1663 }
1664
1665 template <>
SpeedTest()1666 void ObmcVarianceTest<ObmcSubpelVarFunc>::SpeedTest() {
1667 if (!use_high_bit_depth())
1668 for (int j = 0; j < block_size() + width() + height() + 1; j++)
1669 pre_[j] = rnd_.Rand8();
1670 else
1671 for (int j = 0; j < block_size() + width() + height() + 1; j++)
1672 CONVERT_TO_SHORTPTR(pre_)[j] = rnd_.Rand16() & bd_mask();
1673 for (int j = 0; j < block_size(); j++) {
1674 wsrc_[j] = (rnd_.Rand16() & bd_mask()) * rnd_(kMaskMax * kMaskMax + 1);
1675 mask_[j] = rnd_(kMaskMax * kMaskMax + 1);
1676 }
1677 unsigned int sse1;
1678 const int stride = width() + 1;
1679 int run_time = 1000000000 / block_size();
1680 aom_usec_timer timer;
1681
1682 aom_usec_timer_start(&timer);
1683 for (int i = 0; i < run_time; ++i) {
1684 int x = rnd_(8);
1685 int y = rnd_(8);
1686 API_REGISTER_STATE_CHECK(
1687 params_.func(pre_, stride, x, y, wsrc_, mask_, &sse1));
1688 }
1689 aom_usec_timer_mark(&timer);
1690
1691 const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
1692 printf("obmc_sub_pixel_variance_%dx%d_%d: %d us\n", width(), height(),
1693 params_.bit_depth, elapsed_time);
1694 }
1695
1696 #endif // !CONFIG_REALTIME_ONLY
1697
1698 typedef MseWxHTestClass<MseWxH16bitFunc> MseWxHTest;
1699 typedef Mse16xHTestClass<Mse16xH16bitFunc> Mse16xHTest;
1700 typedef MainTestClass<VarianceMxNFunc> AvxMseTest;
1701 typedef MainTestClass<VarianceMxNFunc> AvxVarianceTest;
1702 typedef MainTestClass<GetSseSum8x8QuadFunc> GetSseSum8x8QuadTest;
1703 typedef MainTestClass<GetSseSum16x16DualFunc> GetSseSum16x16DualTest;
1704 typedef SubpelVarianceTest<SubpixVarMxNFunc> AvxSubpelVarianceTest;
1705 typedef SubpelVarianceTest<SubpixAvgVarMxNFunc> AvxSubpelAvgVarianceTest;
1706 typedef SubpelVarianceTest<DistWtdSubpixAvgVarMxNFunc>
1707 AvxDistWtdSubpelAvgVarianceTest;
1708 #if !CONFIG_REALTIME_ONLY
1709 typedef ObmcVarianceTest<ObmcSubpelVarFunc> AvxObmcSubpelVarianceTest;
1710 #endif
1711 typedef TestParams<MseWxH16bitFunc> MseWxHParams;
1712 typedef TestParams<Mse16xH16bitFunc> Mse16xHParams;
1713
TEST_P(MseWxHTest,RefMse)1714 TEST_P(MseWxHTest, RefMse) { RefMatchTestMse(); }
TEST_P(MseWxHTest,DISABLED_SpeedMse)1715 TEST_P(MseWxHTest, DISABLED_SpeedMse) { SpeedTest(); }
TEST_P(Mse16xHTest,RefMse)1716 TEST_P(Mse16xHTest, RefMse) { RefMatchTestMse(); }
TEST_P(Mse16xHTest,RefMseExtreme)1717 TEST_P(Mse16xHTest, RefMseExtreme) { RefMatchExtremeTestMse(); }
TEST_P(Mse16xHTest,DISABLED_SpeedMse)1718 TEST_P(Mse16xHTest, DISABLED_SpeedMse) { SpeedTest(); }
TEST_P(AvxMseTest,RefMse)1719 TEST_P(AvxMseTest, RefMse) { RefTestMse(); }
TEST_P(AvxMseTest,MaxMse)1720 TEST_P(AvxMseTest, MaxMse) { MaxTestMse(); }
TEST_P(AvxVarianceTest,Zero)1721 TEST_P(AvxVarianceTest, Zero) { ZeroTest(); }
TEST_P(AvxVarianceTest,Ref)1722 TEST_P(AvxVarianceTest, Ref) { RefTest(); }
TEST_P(AvxVarianceTest,RefStride)1723 TEST_P(AvxVarianceTest, RefStride) { RefStrideTest(); }
TEST_P(AvxVarianceTest,OneQuarter)1724 TEST_P(AvxVarianceTest, OneQuarter) { OneQuarterTest(); }
TEST_P(AvxVarianceTest,DISABLED_Speed)1725 TEST_P(AvxVarianceTest, DISABLED_Speed) { SpeedTest(); }
TEST_P(GetSseSum8x8QuadTest,RefMseSum)1726 TEST_P(GetSseSum8x8QuadTest, RefMseSum) { RefTestSseSum(); }
TEST_P(GetSseSum8x8QuadTest,MinSseSum)1727 TEST_P(GetSseSum8x8QuadTest, MinSseSum) { MinTestSseSum(); }
TEST_P(GetSseSum8x8QuadTest,MaxMseSum)1728 TEST_P(GetSseSum8x8QuadTest, MaxMseSum) { MaxTestSseSum(); }
TEST_P(GetSseSum8x8QuadTest,DISABLED_Speed)1729 TEST_P(GetSseSum8x8QuadTest, DISABLED_Speed) { SseSum_SpeedTest(); }
TEST_P(GetSseSum16x16DualTest,RefMseSum)1730 TEST_P(GetSseSum16x16DualTest, RefMseSum) { RefTestSseSumDual(); }
TEST_P(GetSseSum16x16DualTest,MinSseSum)1731 TEST_P(GetSseSum16x16DualTest, MinSseSum) { MinTestSseSumDual(); }
TEST_P(GetSseSum16x16DualTest,MaxMseSum)1732 TEST_P(GetSseSum16x16DualTest, MaxMseSum) { MaxTestSseSumDual(); }
TEST_P(GetSseSum16x16DualTest,DISABLED_Speed)1733 TEST_P(GetSseSum16x16DualTest, DISABLED_Speed) { SseSum_SpeedTestDual(); }
1734 #if !CONFIG_REALTIME_ONLY
TEST_P(SumOfSquaresTest,Const)1735 TEST_P(SumOfSquaresTest, Const) { ConstTest(); }
TEST_P(SumOfSquaresTest,Ref)1736 TEST_P(SumOfSquaresTest, Ref) { RefTest(); }
1737 #endif // !CONFIG_REALTIME_ONLY
TEST_P(AvxSubpelVarianceTest,Ref)1738 TEST_P(AvxSubpelVarianceTest, Ref) { RefTest(); }
TEST_P(AvxSubpelVarianceTest,ExtremeRef)1739 TEST_P(AvxSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
TEST_P(AvxSubpelVarianceTest,DISABLED_Speed)1740 TEST_P(AvxSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); }
TEST_P(AvxSubpelAvgVarianceTest,Ref)1741 TEST_P(AvxSubpelAvgVarianceTest, Ref) { RefTest(); }
TEST_P(AvxDistWtdSubpelAvgVarianceTest,Ref)1742 TEST_P(AvxDistWtdSubpelAvgVarianceTest, Ref) { RefTest(); }
1743 #if !CONFIG_REALTIME_ONLY
TEST_P(AvxObmcSubpelVarianceTest,Ref)1744 TEST_P(AvxObmcSubpelVarianceTest, Ref) { RefTest(); }
TEST_P(AvxObmcSubpelVarianceTest,ExtremeRef)1745 TEST_P(AvxObmcSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
TEST_P(AvxObmcSubpelVarianceTest,DISABLED_Speed)1746 TEST_P(AvxObmcSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); }
1747 #endif
1748
1749 INSTANTIATE_TEST_SUITE_P(
1750 C, MseWxHTest,
1751 ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_c, 8),
1752 MseWxHParams(3, 2, &aom_mse_wxh_16bit_c, 8),
1753 MseWxHParams(2, 3, &aom_mse_wxh_16bit_c, 8),
1754 MseWxHParams(2, 2, &aom_mse_wxh_16bit_c, 8)));
1755
1756 INSTANTIATE_TEST_SUITE_P(
1757 C, Mse16xHTest,
1758 ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_c, 8),
1759 Mse16xHParams(3, 2, &aom_mse_16xh_16bit_c, 8),
1760 Mse16xHParams(2, 3, &aom_mse_16xh_16bit_c, 8),
1761 Mse16xHParams(2, 2, &aom_mse_16xh_16bit_c, 8)));
1762
1763 #if !CONFIG_REALTIME_ONLY
1764 INSTANTIATE_TEST_SUITE_P(C, SumOfSquaresTest,
1765 ::testing::Values(aom_get_mb_ss_c));
1766 #endif // !CONFIG_REALTIME_ONLY
1767
1768 typedef TestParams<VarianceMxNFunc> MseParams;
1769 INSTANTIATE_TEST_SUITE_P(C, AvxMseTest,
1770 ::testing::Values(MseParams(4, 4, &aom_mse16x16_c),
1771 MseParams(4, 3, &aom_mse16x8_c),
1772 MseParams(3, 4, &aom_mse8x16_c),
1773 MseParams(3, 3, &aom_mse8x8_c)));
1774
1775 typedef TestParams<VarianceMxNFunc> VarianceParams;
1776 const VarianceParams kArrayVariance_c[] = {
1777 VarianceParams(7, 7, &aom_variance128x128_c),
1778 VarianceParams(7, 6, &aom_variance128x64_c),
1779 VarianceParams(6, 7, &aom_variance64x128_c),
1780 VarianceParams(6, 6, &aom_variance64x64_c),
1781 VarianceParams(6, 5, &aom_variance64x32_c),
1782 VarianceParams(5, 6, &aom_variance32x64_c),
1783 VarianceParams(5, 5, &aom_variance32x32_c),
1784 VarianceParams(5, 4, &aom_variance32x16_c),
1785 VarianceParams(4, 5, &aom_variance16x32_c),
1786 VarianceParams(4, 4, &aom_variance16x16_c),
1787 VarianceParams(4, 3, &aom_variance16x8_c),
1788 VarianceParams(3, 4, &aom_variance8x16_c),
1789 VarianceParams(3, 3, &aom_variance8x8_c),
1790 VarianceParams(3, 2, &aom_variance8x4_c),
1791 VarianceParams(2, 3, &aom_variance4x8_c),
1792 VarianceParams(2, 2, &aom_variance4x4_c),
1793 #if !CONFIG_REALTIME_ONLY
1794 VarianceParams(6, 4, &aom_variance64x16_c),
1795 VarianceParams(4, 6, &aom_variance16x64_c),
1796 VarianceParams(5, 3, &aom_variance32x8_c),
1797 VarianceParams(3, 5, &aom_variance8x32_c),
1798 VarianceParams(4, 2, &aom_variance16x4_c),
1799 VarianceParams(2, 4, &aom_variance4x16_c),
1800 #endif
1801 };
1802 INSTANTIATE_TEST_SUITE_P(C, AvxVarianceTest,
1803 ::testing::ValuesIn(kArrayVariance_c));
1804
1805 typedef TestParams<GetSseSum8x8QuadFunc> GetSseSumParams;
1806 const GetSseSumParams kArrayGetSseSum8x8Quad_c[] = {
1807 GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_c, 0),
1808 GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_c, 0),
1809 GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_c, 0),
1810 GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_c, 0)
1811 };
1812 INSTANTIATE_TEST_SUITE_P(C, GetSseSum8x8QuadTest,
1813 ::testing::ValuesIn(kArrayGetSseSum8x8Quad_c));
1814
1815 typedef TestParams<GetSseSum16x16DualFunc> GetSseSumParamsDual;
1816 const GetSseSumParamsDual kArrayGetSseSum16x16Dual_c[] = {
1817 GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_c, 0),
1818 GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_c, 0),
1819 GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_c, 0),
1820 GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_c, 0)
1821 };
1822
1823 INSTANTIATE_TEST_SUITE_P(C, GetSseSum16x16DualTest,
1824 ::testing::ValuesIn(kArrayGetSseSum16x16Dual_c));
1825
1826 typedef TestParams<SubpixVarMxNFunc> SubpelVarianceParams;
1827 const SubpelVarianceParams kArraySubpelVariance_c[] = {
1828 SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_c, 0),
1829 SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_c, 0),
1830 SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_c, 0),
1831 SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_c, 0),
1832 SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_c, 0),
1833 SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_c, 0),
1834 SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_c, 0),
1835 SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_c, 0),
1836 SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_c, 0),
1837 SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_c, 0),
1838 SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_c, 0),
1839 SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_c, 0),
1840 SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_c, 0),
1841 SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_c, 0),
1842 SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_c, 0),
1843 SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_c, 0),
1844 #if !CONFIG_REALTIME_ONLY
1845 SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_c, 0),
1846 SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_c, 0),
1847 SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_c, 0),
1848 SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_c, 0),
1849 SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_c, 0),
1850 SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_c, 0),
1851 #endif
1852 };
1853 INSTANTIATE_TEST_SUITE_P(C, AvxSubpelVarianceTest,
1854 ::testing::ValuesIn(kArraySubpelVariance_c));
1855
1856 typedef TestParams<SubpixAvgVarMxNFunc> SubpelAvgVarianceParams;
1857 const SubpelAvgVarianceParams kArraySubpelAvgVariance_c[] = {
1858 SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_c, 0),
1859 SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_c, 0),
1860 SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_c, 0),
1861 SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_c, 0),
1862 SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_c, 0),
1863 SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_c, 0),
1864 SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_c, 0),
1865 SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_c, 0),
1866 SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_c, 0),
1867 SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_c, 0),
1868 SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_c, 0),
1869 SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_c, 0),
1870 SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_c, 0),
1871 SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_c, 0),
1872 SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_c, 0),
1873 SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_c, 0),
1874 #if !CONFIG_REALTIME_ONLY
1875 SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_c, 0),
1876 SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_c, 0),
1877 SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_c, 0),
1878 SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_c, 0),
1879 SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_c, 0),
1880 SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_c, 0),
1881 #endif
1882 };
1883 INSTANTIATE_TEST_SUITE_P(C, AvxSubpelAvgVarianceTest,
1884 ::testing::ValuesIn(kArraySubpelAvgVariance_c));
1885
1886 typedef TestParams<DistWtdSubpixAvgVarMxNFunc> DistWtdSubpelAvgVarianceParams;
1887 const DistWtdSubpelAvgVarianceParams kArrayDistWtdSubpelAvgVariance_c[] = {
1888 DistWtdSubpelAvgVarianceParams(
1889 6, 6, &aom_dist_wtd_sub_pixel_avg_variance64x64_c, 0),
1890 DistWtdSubpelAvgVarianceParams(
1891 6, 5, &aom_dist_wtd_sub_pixel_avg_variance64x32_c, 0),
1892 DistWtdSubpelAvgVarianceParams(
1893 5, 6, &aom_dist_wtd_sub_pixel_avg_variance32x64_c, 0),
1894 DistWtdSubpelAvgVarianceParams(
1895 5, 5, &aom_dist_wtd_sub_pixel_avg_variance32x32_c, 0),
1896 DistWtdSubpelAvgVarianceParams(
1897 5, 4, &aom_dist_wtd_sub_pixel_avg_variance32x16_c, 0),
1898 DistWtdSubpelAvgVarianceParams(
1899 4, 5, &aom_dist_wtd_sub_pixel_avg_variance16x32_c, 0),
1900 DistWtdSubpelAvgVarianceParams(
1901 4, 4, &aom_dist_wtd_sub_pixel_avg_variance16x16_c, 0),
1902 DistWtdSubpelAvgVarianceParams(4, 3,
1903 &aom_dist_wtd_sub_pixel_avg_variance16x8_c, 0),
1904 DistWtdSubpelAvgVarianceParams(3, 4,
1905 &aom_dist_wtd_sub_pixel_avg_variance8x16_c, 0),
1906 DistWtdSubpelAvgVarianceParams(3, 3,
1907 &aom_dist_wtd_sub_pixel_avg_variance8x8_c, 0),
1908 DistWtdSubpelAvgVarianceParams(3, 2,
1909 &aom_dist_wtd_sub_pixel_avg_variance8x4_c, 0),
1910 DistWtdSubpelAvgVarianceParams(2, 3,
1911 &aom_dist_wtd_sub_pixel_avg_variance4x8_c, 0),
1912 DistWtdSubpelAvgVarianceParams(2, 2,
1913 &aom_dist_wtd_sub_pixel_avg_variance4x4_c, 0),
1914 #if !CONFIG_REALTIME_ONLY
1915
1916 DistWtdSubpelAvgVarianceParams(
1917 6, 4, &aom_dist_wtd_sub_pixel_avg_variance64x16_c, 0),
1918 DistWtdSubpelAvgVarianceParams(
1919 4, 6, &aom_dist_wtd_sub_pixel_avg_variance16x64_c, 0),
1920 DistWtdSubpelAvgVarianceParams(5, 3,
1921 &aom_dist_wtd_sub_pixel_avg_variance32x8_c, 0),
1922 DistWtdSubpelAvgVarianceParams(3, 5,
1923 &aom_dist_wtd_sub_pixel_avg_variance8x32_c, 0),
1924 DistWtdSubpelAvgVarianceParams(4, 2,
1925 &aom_dist_wtd_sub_pixel_avg_variance16x4_c, 0),
1926 DistWtdSubpelAvgVarianceParams(2, 4,
1927 &aom_dist_wtd_sub_pixel_avg_variance4x16_c, 0),
1928 #endif
1929 };
1930 INSTANTIATE_TEST_SUITE_P(C, AvxDistWtdSubpelAvgVarianceTest,
1931 ::testing::ValuesIn(kArrayDistWtdSubpelAvgVariance_c));
1932
1933 #if !CONFIG_REALTIME_ONLY
1934 INSTANTIATE_TEST_SUITE_P(
1935 C, AvxObmcSubpelVarianceTest,
1936 ::testing::Values(
1937 ObmcSubpelVarianceParams(7, 7, &aom_obmc_sub_pixel_variance128x128_c,
1938 0),
1939 ObmcSubpelVarianceParams(7, 6, &aom_obmc_sub_pixel_variance128x64_c, 0),
1940 ObmcSubpelVarianceParams(6, 7, &aom_obmc_sub_pixel_variance64x128_c, 0),
1941 ObmcSubpelVarianceParams(6, 6, &aom_obmc_sub_pixel_variance64x64_c, 0),
1942 ObmcSubpelVarianceParams(6, 5, &aom_obmc_sub_pixel_variance64x32_c, 0),
1943 ObmcSubpelVarianceParams(5, 6, &aom_obmc_sub_pixel_variance32x64_c, 0),
1944 ObmcSubpelVarianceParams(5, 5, &aom_obmc_sub_pixel_variance32x32_c, 0),
1945 ObmcSubpelVarianceParams(5, 4, &aom_obmc_sub_pixel_variance32x16_c, 0),
1946 ObmcSubpelVarianceParams(4, 5, &aom_obmc_sub_pixel_variance16x32_c, 0),
1947 ObmcSubpelVarianceParams(4, 4, &aom_obmc_sub_pixel_variance16x16_c, 0),
1948 ObmcSubpelVarianceParams(4, 3, &aom_obmc_sub_pixel_variance16x8_c, 0),
1949 ObmcSubpelVarianceParams(3, 4, &aom_obmc_sub_pixel_variance8x16_c, 0),
1950 ObmcSubpelVarianceParams(3, 3, &aom_obmc_sub_pixel_variance8x8_c, 0),
1951 ObmcSubpelVarianceParams(3, 2, &aom_obmc_sub_pixel_variance8x4_c, 0),
1952 ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_c, 0),
1953 ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_c, 0),
1954
1955 ObmcSubpelVarianceParams(6, 4, &aom_obmc_sub_pixel_variance64x16_c, 0),
1956 ObmcSubpelVarianceParams(4, 6, &aom_obmc_sub_pixel_variance16x64_c, 0),
1957 ObmcSubpelVarianceParams(5, 3, &aom_obmc_sub_pixel_variance32x8_c, 0),
1958 ObmcSubpelVarianceParams(3, 5, &aom_obmc_sub_pixel_variance8x32_c, 0),
1959 ObmcSubpelVarianceParams(4, 2, &aom_obmc_sub_pixel_variance16x4_c, 0),
1960 ObmcSubpelVarianceParams(2, 4, &aom_obmc_sub_pixel_variance4x16_c, 0)));
1961 #endif
1962
1963 #if CONFIG_AV1_HIGHBITDEPTH
1964 typedef uint64_t (*MseHBDWxH16bitFunc)(uint16_t *dst, int dstride,
1965 uint16_t *src, int sstride, int w,
1966 int h);
1967
1968 template <typename FunctionType>
1969 class MseHBDWxHTestClass
1970 : public ::testing::TestWithParam<TestParams<FunctionType> > {
1971 public:
SetUp()1972 void SetUp() override {
1973 params_ = this->GetParam();
1974
1975 rnd_.Reset(ACMRandom::DeterministicSeed());
1976 src_ = reinterpret_cast<uint16_t *>(
1977 aom_memalign(16, block_size() * sizeof(src_)));
1978 dst_ = reinterpret_cast<uint16_t *>(
1979 aom_memalign(16, block_size() * sizeof(dst_)));
1980 ASSERT_NE(src_, nullptr);
1981 ASSERT_NE(dst_, nullptr);
1982 }
1983
TearDown()1984 void TearDown() override {
1985 aom_free(src_);
1986 aom_free(dst_);
1987 src_ = nullptr;
1988 dst_ = nullptr;
1989 }
1990
1991 protected:
1992 void RefMatchTestMse();
1993 void SpeedTest();
1994
1995 protected:
1996 ACMRandom rnd_;
1997 uint16_t *dst_;
1998 uint16_t *src_;
1999 TestParams<FunctionType> params_;
2000
2001 // some relay helpers
block_size() const2002 int block_size() const { return params_.block_size; }
width() const2003 int width() const { return params_.width; }
d_stride() const2004 int d_stride() const { return params_.width; } // stride is same as width
s_stride() const2005 int s_stride() const { return params_.width; } // stride is same as width
height() const2006 int height() const { return params_.height; }
mask() const2007 int mask() const { return params_.mask; }
2008 };
2009
2010 template <typename MseHBDWxHFunctionType>
SpeedTest()2011 void MseHBDWxHTestClass<MseHBDWxHFunctionType>::SpeedTest() {
2012 aom_usec_timer ref_timer, test_timer;
2013 double elapsed_time_c = 0;
2014 double elapsed_time_simd = 0;
2015 int run_time = 10000000;
2016 int w = width();
2017 int h = height();
2018 int dstride = d_stride();
2019 int sstride = s_stride();
2020 for (int k = 0; k < block_size(); ++k) {
2021 dst_[k] = rnd_.Rand16() & mask();
2022 src_[k] = rnd_.Rand16() & mask();
2023 }
2024 aom_usec_timer_start(&ref_timer);
2025 for (int i = 0; i < run_time; i++) {
2026 aom_mse_wxh_16bit_highbd_c(dst_, dstride, src_, sstride, w, h);
2027 }
2028 aom_usec_timer_mark(&ref_timer);
2029 elapsed_time_c = static_cast<double>(aom_usec_timer_elapsed(&ref_timer));
2030
2031 aom_usec_timer_start(&test_timer);
2032 for (int i = 0; i < run_time; i++) {
2033 params_.func(dst_, dstride, src_, sstride, w, h);
2034 }
2035 aom_usec_timer_mark(&test_timer);
2036 elapsed_time_simd = static_cast<double>(aom_usec_timer_elapsed(&test_timer));
2037
2038 printf("%dx%d\tc_time=%lf \t simd_time=%lf \t gain=%lf\n", width(), height(),
2039 elapsed_time_c, elapsed_time_simd,
2040 (elapsed_time_c / elapsed_time_simd));
2041 }
2042
2043 template <typename MseHBDWxHFunctionType>
RefMatchTestMse()2044 void MseHBDWxHTestClass<MseHBDWxHFunctionType>::RefMatchTestMse() {
2045 uint64_t mse_ref = 0;
2046 uint64_t mse_mod = 0;
2047 int w = width();
2048 int h = height();
2049 int dstride = d_stride();
2050 int sstride = s_stride();
2051 for (int i = 0; i < 10; i++) {
2052 for (int k = 0; k < block_size(); ++k) {
2053 dst_[k] = rnd_.Rand16() & mask();
2054 src_[k] = rnd_.Rand16() & mask();
2055 }
2056 API_REGISTER_STATE_CHECK(mse_ref = aom_mse_wxh_16bit_highbd_c(
2057 dst_, dstride, src_, sstride, w, h));
2058 API_REGISTER_STATE_CHECK(
2059 mse_mod = params_.func(dst_, dstride, src_, sstride, w, h));
2060 EXPECT_EQ(mse_ref, mse_mod)
2061 << "ref mse: " << mse_ref << " mod mse: " << mse_mod;
2062 }
2063 }
2064
2065 typedef TestParams<MseHBDWxH16bitFunc> MseHBDWxHParams;
2066 typedef MseHBDWxHTestClass<MseHBDWxH16bitFunc> MseHBDWxHTest;
2067 typedef MainTestClass<VarianceMxNFunc> AvxHBDMseTest;
2068 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AvxHBDMseTest);
2069 typedef MainTestClass<VarianceMxNFunc> AvxHBDVarianceTest;
2070 typedef SubpelVarianceTest<SubpixVarMxNFunc> AvxHBDSubpelVarianceTest;
2071 typedef SubpelVarianceTest<SubpixAvgVarMxNFunc> AvxHBDSubpelAvgVarianceTest;
2072 typedef SubpelVarianceTest<DistWtdSubpixAvgVarMxNFunc>
2073 AvxHBDDistWtdSubpelAvgVarianceTest;
2074 #if !CONFIG_REALTIME_ONLY
2075 typedef ObmcVarianceTest<ObmcSubpelVarFunc> AvxHBDObmcSubpelVarianceTest;
2076 #endif
2077 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AvxHBDObmcSubpelVarianceTest);
2078
TEST_P(MseHBDWxHTest,RefMse)2079 TEST_P(MseHBDWxHTest, RefMse) { RefMatchTestMse(); }
TEST_P(MseHBDWxHTest,DISABLED_SpeedMse)2080 TEST_P(MseHBDWxHTest, DISABLED_SpeedMse) { SpeedTest(); }
TEST_P(AvxHBDMseTest,RefMse)2081 TEST_P(AvxHBDMseTest, RefMse) { RefTestMse(); }
TEST_P(AvxHBDMseTest,MaxMse)2082 TEST_P(AvxHBDMseTest, MaxMse) { MaxTestMse(); }
TEST_P(AvxHBDMseTest,DISABLED_SpeedMse)2083 TEST_P(AvxHBDMseTest, DISABLED_SpeedMse) { SpeedTest(); }
TEST_P(AvxHBDVarianceTest,Zero)2084 TEST_P(AvxHBDVarianceTest, Zero) { ZeroTest(); }
TEST_P(AvxHBDVarianceTest,Ref)2085 TEST_P(AvxHBDVarianceTest, Ref) { RefTest(); }
TEST_P(AvxHBDVarianceTest,RefStride)2086 TEST_P(AvxHBDVarianceTest, RefStride) { RefStrideTest(); }
TEST_P(AvxHBDVarianceTest,OneQuarter)2087 TEST_P(AvxHBDVarianceTest, OneQuarter) { OneQuarterTest(); }
TEST_P(AvxHBDVarianceTest,DISABLED_Speed)2088 TEST_P(AvxHBDVarianceTest, DISABLED_Speed) { SpeedTest(); }
TEST_P(AvxHBDSubpelVarianceTest,Ref)2089 TEST_P(AvxHBDSubpelVarianceTest, Ref) { RefTest(); }
TEST_P(AvxHBDSubpelVarianceTest,ExtremeRef)2090 TEST_P(AvxHBDSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
TEST_P(AvxHBDSubpelVarianceTest,DISABLED_Speed)2091 TEST_P(AvxHBDSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); }
TEST_P(AvxHBDSubpelAvgVarianceTest,Ref)2092 TEST_P(AvxHBDSubpelAvgVarianceTest, Ref) { RefTest(); }
TEST_P(AvxHBDDistWtdSubpelAvgVarianceTest,Ref)2093 TEST_P(AvxHBDDistWtdSubpelAvgVarianceTest, Ref) { RefTest(); }
2094 #if !CONFIG_REALTIME_ONLY
TEST_P(AvxHBDObmcSubpelVarianceTest,Ref)2095 TEST_P(AvxHBDObmcSubpelVarianceTest, Ref) { RefTest(); }
TEST_P(AvxHBDObmcSubpelVarianceTest,ExtremeRef)2096 TEST_P(AvxHBDObmcSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
TEST_P(AvxHBDObmcSubpelVarianceTest,DISABLED_Speed)2097 TEST_P(AvxHBDObmcSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); }
2098 #endif
2099
2100 INSTANTIATE_TEST_SUITE_P(
2101 C, MseHBDWxHTest,
2102 ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_c, 10),
2103 MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_c, 10),
2104 MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_c, 10),
2105 MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_c, 10)));
2106
2107 INSTANTIATE_TEST_SUITE_P(
2108 C, AvxHBDMseTest,
2109 ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_c, 12),
2110 MseParams(4, 3, &aom_highbd_12_mse16x8_c, 12),
2111 MseParams(3, 4, &aom_highbd_12_mse8x16_c, 12),
2112 MseParams(3, 3, &aom_highbd_12_mse8x8_c, 12),
2113 MseParams(4, 4, &aom_highbd_10_mse16x16_c, 10),
2114 MseParams(4, 3, &aom_highbd_10_mse16x8_c, 10),
2115 MseParams(3, 4, &aom_highbd_10_mse8x16_c, 10),
2116 MseParams(3, 3, &aom_highbd_10_mse8x8_c, 10),
2117 MseParams(4, 4, &aom_highbd_8_mse16x16_c, 8),
2118 MseParams(4, 3, &aom_highbd_8_mse16x8_c, 8),
2119 MseParams(3, 4, &aom_highbd_8_mse8x16_c, 8),
2120 MseParams(3, 3, &aom_highbd_8_mse8x8_c, 8)));
2121
2122 #if HAVE_NEON
2123 INSTANTIATE_TEST_SUITE_P(
2124 NEON, MseHBDWxHTest,
2125 ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_neon, 10),
2126 MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_neon, 10),
2127 MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_neon, 10),
2128 MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_neon,
2129 10)));
2130
2131 INSTANTIATE_TEST_SUITE_P(
2132 NEON, AvxHBDMseTest,
2133 ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_neon, 12),
2134 MseParams(4, 3, &aom_highbd_12_mse16x8_neon, 12),
2135 MseParams(3, 4, &aom_highbd_12_mse8x16_neon, 12),
2136 MseParams(3, 3, &aom_highbd_12_mse8x8_neon, 12),
2137 MseParams(4, 4, &aom_highbd_10_mse16x16_neon, 10),
2138 MseParams(4, 3, &aom_highbd_10_mse16x8_neon, 10),
2139 MseParams(3, 4, &aom_highbd_10_mse8x16_neon, 10),
2140 MseParams(3, 3, &aom_highbd_10_mse8x8_neon, 10),
2141 MseParams(4, 4, &aom_highbd_8_mse16x16_neon, 8),
2142 MseParams(4, 3, &aom_highbd_8_mse16x8_neon, 8),
2143 MseParams(3, 4, &aom_highbd_8_mse8x16_neon, 8),
2144 MseParams(3, 3, &aom_highbd_8_mse8x8_neon, 8)));
2145 #endif // HAVE_NEON
2146
2147 #if HAVE_NEON_DOTPROD
2148 INSTANTIATE_TEST_SUITE_P(
2149 NEON_DOTPROD, AvxHBDMseTest,
2150 ::testing::Values(MseParams(4, 4, &aom_highbd_8_mse16x16_neon_dotprod, 8),
2151 MseParams(4, 3, &aom_highbd_8_mse16x8_neon_dotprod, 8),
2152 MseParams(3, 4, &aom_highbd_8_mse8x16_neon_dotprod, 8),
2153 MseParams(3, 3, &aom_highbd_8_mse8x8_neon_dotprod, 8)));
2154 #endif // HAVE_NEON_DOTPROD
2155
2156 #if HAVE_SVE
2157 INSTANTIATE_TEST_SUITE_P(
2158 SVE, MseHBDWxHTest,
2159 ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_sve, 10),
2160 MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_sve, 10),
2161 MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_sve, 10),
2162 MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_sve,
2163 10)));
2164
2165 INSTANTIATE_TEST_SUITE_P(
2166 SVE, AvxHBDMseTest,
2167 ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_sve, 12),
2168 MseParams(4, 3, &aom_highbd_12_mse16x8_sve, 12),
2169 MseParams(3, 4, &aom_highbd_12_mse8x16_sve, 12),
2170 MseParams(3, 3, &aom_highbd_12_mse8x8_sve, 12),
2171 MseParams(4, 4, &aom_highbd_10_mse16x16_sve, 10),
2172 MseParams(4, 3, &aom_highbd_10_mse16x8_sve, 10),
2173 MseParams(3, 4, &aom_highbd_10_mse8x16_sve, 10),
2174 MseParams(3, 3, &aom_highbd_10_mse8x8_sve, 10)));
2175 #endif // HAVE_SVE
2176
2177 const VarianceParams kArrayHBDVariance_c[] = {
2178 VarianceParams(7, 7, &aom_highbd_12_variance128x128_c, 12),
2179 VarianceParams(7, 6, &aom_highbd_12_variance128x64_c, 12),
2180 VarianceParams(6, 7, &aom_highbd_12_variance64x128_c, 12),
2181 VarianceParams(6, 6, &aom_highbd_12_variance64x64_c, 12),
2182 VarianceParams(6, 5, &aom_highbd_12_variance64x32_c, 12),
2183 VarianceParams(5, 6, &aom_highbd_12_variance32x64_c, 12),
2184 VarianceParams(5, 5, &aom_highbd_12_variance32x32_c, 12),
2185 VarianceParams(5, 4, &aom_highbd_12_variance32x16_c, 12),
2186 VarianceParams(4, 5, &aom_highbd_12_variance16x32_c, 12),
2187 VarianceParams(4, 4, &aom_highbd_12_variance16x16_c, 12),
2188 VarianceParams(4, 3, &aom_highbd_12_variance16x8_c, 12),
2189 VarianceParams(3, 4, &aom_highbd_12_variance8x16_c, 12),
2190 VarianceParams(3, 3, &aom_highbd_12_variance8x8_c, 12),
2191 VarianceParams(3, 2, &aom_highbd_12_variance8x4_c, 12),
2192 VarianceParams(2, 3, &aom_highbd_12_variance4x8_c, 12),
2193 VarianceParams(2, 2, &aom_highbd_12_variance4x4_c, 12),
2194 VarianceParams(7, 7, &aom_highbd_10_variance128x128_c, 10),
2195 VarianceParams(7, 6, &aom_highbd_10_variance128x64_c, 10),
2196 VarianceParams(6, 7, &aom_highbd_10_variance64x128_c, 10),
2197 VarianceParams(6, 6, &aom_highbd_10_variance64x64_c, 10),
2198 VarianceParams(6, 5, &aom_highbd_10_variance64x32_c, 10),
2199 VarianceParams(5, 6, &aom_highbd_10_variance32x64_c, 10),
2200 VarianceParams(5, 5, &aom_highbd_10_variance32x32_c, 10),
2201 VarianceParams(5, 4, &aom_highbd_10_variance32x16_c, 10),
2202 VarianceParams(4, 5, &aom_highbd_10_variance16x32_c, 10),
2203 VarianceParams(4, 4, &aom_highbd_10_variance16x16_c, 10),
2204 VarianceParams(4, 3, &aom_highbd_10_variance16x8_c, 10),
2205 VarianceParams(3, 4, &aom_highbd_10_variance8x16_c, 10),
2206 VarianceParams(3, 3, &aom_highbd_10_variance8x8_c, 10),
2207 VarianceParams(3, 2, &aom_highbd_10_variance8x4_c, 10),
2208 VarianceParams(2, 3, &aom_highbd_10_variance4x8_c, 10),
2209 VarianceParams(2, 2, &aom_highbd_10_variance4x4_c, 10),
2210 VarianceParams(7, 7, &aom_highbd_8_variance128x128_c, 8),
2211 VarianceParams(7, 6, &aom_highbd_8_variance128x64_c, 8),
2212 VarianceParams(6, 7, &aom_highbd_8_variance64x128_c, 8),
2213 VarianceParams(6, 6, &aom_highbd_8_variance64x64_c, 8),
2214 VarianceParams(6, 5, &aom_highbd_8_variance64x32_c, 8),
2215 VarianceParams(5, 6, &aom_highbd_8_variance32x64_c, 8),
2216 VarianceParams(5, 5, &aom_highbd_8_variance32x32_c, 8),
2217 VarianceParams(5, 4, &aom_highbd_8_variance32x16_c, 8),
2218 VarianceParams(4, 5, &aom_highbd_8_variance16x32_c, 8),
2219 VarianceParams(4, 4, &aom_highbd_8_variance16x16_c, 8),
2220 VarianceParams(4, 3, &aom_highbd_8_variance16x8_c, 8),
2221 VarianceParams(3, 4, &aom_highbd_8_variance8x16_c, 8),
2222 VarianceParams(3, 3, &aom_highbd_8_variance8x8_c, 8),
2223 VarianceParams(3, 2, &aom_highbd_8_variance8x4_c, 8),
2224 VarianceParams(2, 3, &aom_highbd_8_variance4x8_c, 8),
2225 VarianceParams(2, 2, &aom_highbd_8_variance4x4_c, 8),
2226 #if !CONFIG_REALTIME_ONLY
2227 VarianceParams(6, 4, &aom_highbd_12_variance64x16_c, 12),
2228 VarianceParams(4, 6, &aom_highbd_12_variance16x64_c, 12),
2229 VarianceParams(5, 3, &aom_highbd_12_variance32x8_c, 12),
2230 VarianceParams(3, 5, &aom_highbd_12_variance8x32_c, 12),
2231 VarianceParams(4, 2, &aom_highbd_12_variance16x4_c, 12),
2232 VarianceParams(2, 4, &aom_highbd_12_variance4x16_c, 12),
2233 VarianceParams(6, 4, &aom_highbd_10_variance64x16_c, 10),
2234 VarianceParams(4, 6, &aom_highbd_10_variance16x64_c, 10),
2235 VarianceParams(5, 3, &aom_highbd_10_variance32x8_c, 10),
2236 VarianceParams(3, 5, &aom_highbd_10_variance8x32_c, 10),
2237 VarianceParams(4, 2, &aom_highbd_10_variance16x4_c, 10),
2238 VarianceParams(2, 4, &aom_highbd_10_variance4x16_c, 10),
2239 VarianceParams(6, 4, &aom_highbd_8_variance64x16_c, 8),
2240 VarianceParams(4, 6, &aom_highbd_8_variance16x64_c, 8),
2241 VarianceParams(5, 3, &aom_highbd_8_variance32x8_c, 8),
2242 VarianceParams(3, 5, &aom_highbd_8_variance8x32_c, 8),
2243 VarianceParams(4, 2, &aom_highbd_8_variance16x4_c, 8),
2244 VarianceParams(2, 4, &aom_highbd_8_variance4x16_c, 8),
2245 #endif
2246 };
2247 INSTANTIATE_TEST_SUITE_P(C, AvxHBDVarianceTest,
2248 ::testing::ValuesIn(kArrayHBDVariance_c));
2249
2250 #if HAVE_SSE4_1
2251 INSTANTIATE_TEST_SUITE_P(
2252 SSE4_1, AvxHBDVarianceTest,
2253 ::testing::Values(
2254 VarianceParams(2, 2, &aom_highbd_8_variance4x4_sse4_1, 8),
2255 VarianceParams(2, 2, &aom_highbd_10_variance4x4_sse4_1, 10),
2256 VarianceParams(2, 2, &aom_highbd_12_variance4x4_sse4_1, 12)));
2257 #endif // HAVE_SSE4_1
2258
2259 const SubpelVarianceParams kArrayHBDSubpelVariance_c[] = {
2260 SubpelVarianceParams(7, 7, &aom_highbd_8_sub_pixel_variance128x128_c, 8),
2261 SubpelVarianceParams(7, 6, &aom_highbd_8_sub_pixel_variance128x64_c, 8),
2262 SubpelVarianceParams(6, 7, &aom_highbd_8_sub_pixel_variance64x128_c, 8),
2263 SubpelVarianceParams(6, 6, &aom_highbd_8_sub_pixel_variance64x64_c, 8),
2264 SubpelVarianceParams(6, 5, &aom_highbd_8_sub_pixel_variance64x32_c, 8),
2265 SubpelVarianceParams(5, 6, &aom_highbd_8_sub_pixel_variance32x64_c, 8),
2266 SubpelVarianceParams(5, 5, &aom_highbd_8_sub_pixel_variance32x32_c, 8),
2267 SubpelVarianceParams(5, 4, &aom_highbd_8_sub_pixel_variance32x16_c, 8),
2268 SubpelVarianceParams(4, 5, &aom_highbd_8_sub_pixel_variance16x32_c, 8),
2269 SubpelVarianceParams(4, 4, &aom_highbd_8_sub_pixel_variance16x16_c, 8),
2270 SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_c, 8),
2271 SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_c, 8),
2272 SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_c, 8),
2273 SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_c, 8),
2274 SubpelVarianceParams(2, 3, &aom_highbd_8_sub_pixel_variance4x8_c, 8),
2275 SubpelVarianceParams(2, 2, &aom_highbd_8_sub_pixel_variance4x4_c, 8),
2276 SubpelVarianceParams(7, 7, &aom_highbd_10_sub_pixel_variance128x128_c, 10),
2277 SubpelVarianceParams(7, 6, &aom_highbd_10_sub_pixel_variance128x64_c, 10),
2278 SubpelVarianceParams(6, 7, &aom_highbd_10_sub_pixel_variance64x128_c, 10),
2279 SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_c, 10),
2280 SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_c, 10),
2281 SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_c, 10),
2282 SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_c, 10),
2283 SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_c, 10),
2284 SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_c, 10),
2285 SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_c, 10),
2286 SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_c, 10),
2287 SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_c, 10),
2288 SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_c, 10),
2289 SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_c, 10),
2290 SubpelVarianceParams(2, 3, &aom_highbd_10_sub_pixel_variance4x8_c, 10),
2291 SubpelVarianceParams(2, 2, &aom_highbd_10_sub_pixel_variance4x4_c, 10),
2292 SubpelVarianceParams(7, 7, &aom_highbd_12_sub_pixel_variance128x128_c, 12),
2293 SubpelVarianceParams(7, 6, &aom_highbd_12_sub_pixel_variance128x64_c, 12),
2294 SubpelVarianceParams(6, 7, &aom_highbd_12_sub_pixel_variance64x128_c, 12),
2295 SubpelVarianceParams(6, 6, &aom_highbd_12_sub_pixel_variance64x64_c, 12),
2296 SubpelVarianceParams(6, 5, &aom_highbd_12_sub_pixel_variance64x32_c, 12),
2297 SubpelVarianceParams(5, 6, &aom_highbd_12_sub_pixel_variance32x64_c, 12),
2298 SubpelVarianceParams(5, 5, &aom_highbd_12_sub_pixel_variance32x32_c, 12),
2299 SubpelVarianceParams(5, 4, &aom_highbd_12_sub_pixel_variance32x16_c, 12),
2300 SubpelVarianceParams(4, 5, &aom_highbd_12_sub_pixel_variance16x32_c, 12),
2301 SubpelVarianceParams(4, 4, &aom_highbd_12_sub_pixel_variance16x16_c, 12),
2302 SubpelVarianceParams(4, 3, &aom_highbd_12_sub_pixel_variance16x8_c, 12),
2303 SubpelVarianceParams(3, 4, &aom_highbd_12_sub_pixel_variance8x16_c, 12),
2304 SubpelVarianceParams(3, 3, &aom_highbd_12_sub_pixel_variance8x8_c, 12),
2305 SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_c, 12),
2306 SubpelVarianceParams(2, 3, &aom_highbd_12_sub_pixel_variance4x8_c, 12),
2307 SubpelVarianceParams(2, 2, &aom_highbd_12_sub_pixel_variance4x4_c, 12),
2308 #if !CONFIG_REALTIME_ONLY
2309 SubpelVarianceParams(6, 4, &aom_highbd_8_sub_pixel_variance64x16_c, 8),
2310 SubpelVarianceParams(4, 6, &aom_highbd_8_sub_pixel_variance16x64_c, 8),
2311 SubpelVarianceParams(5, 3, &aom_highbd_8_sub_pixel_variance32x8_c, 8),
2312 SubpelVarianceParams(3, 5, &aom_highbd_8_sub_pixel_variance8x32_c, 8),
2313 SubpelVarianceParams(4, 2, &aom_highbd_8_sub_pixel_variance16x4_c, 8),
2314 SubpelVarianceParams(2, 4, &aom_highbd_8_sub_pixel_variance4x16_c, 8),
2315 SubpelVarianceParams(6, 4, &aom_highbd_10_sub_pixel_variance64x16_c, 10),
2316 SubpelVarianceParams(4, 6, &aom_highbd_10_sub_pixel_variance16x64_c, 10),
2317 SubpelVarianceParams(5, 3, &aom_highbd_10_sub_pixel_variance32x8_c, 10),
2318 SubpelVarianceParams(3, 5, &aom_highbd_10_sub_pixel_variance8x32_c, 10),
2319 SubpelVarianceParams(4, 2, &aom_highbd_10_sub_pixel_variance16x4_c, 10),
2320 SubpelVarianceParams(2, 4, &aom_highbd_10_sub_pixel_variance4x16_c, 10),
2321 SubpelVarianceParams(6, 4, &aom_highbd_12_sub_pixel_variance64x16_c, 12),
2322 SubpelVarianceParams(4, 6, &aom_highbd_12_sub_pixel_variance16x64_c, 12),
2323 SubpelVarianceParams(5, 3, &aom_highbd_12_sub_pixel_variance32x8_c, 12),
2324 SubpelVarianceParams(3, 5, &aom_highbd_12_sub_pixel_variance8x32_c, 12),
2325 SubpelVarianceParams(4, 2, &aom_highbd_12_sub_pixel_variance16x4_c, 12),
2326 SubpelVarianceParams(2, 4, &aom_highbd_12_sub_pixel_variance4x16_c, 12),
2327 #endif
2328 };
2329 INSTANTIATE_TEST_SUITE_P(C, AvxHBDSubpelVarianceTest,
2330 ::testing::ValuesIn(kArrayHBDSubpelVariance_c));
2331
2332 const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_c[] = {
2333 SubpelAvgVarianceParams(7, 7, &aom_highbd_8_sub_pixel_avg_variance128x128_c,
2334 8),
2335 SubpelAvgVarianceParams(7, 6, &aom_highbd_8_sub_pixel_avg_variance128x64_c,
2336 8),
2337 SubpelAvgVarianceParams(6, 7, &aom_highbd_8_sub_pixel_avg_variance64x128_c,
2338 8),
2339 SubpelAvgVarianceParams(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_c, 8),
2340 SubpelAvgVarianceParams(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_c, 8),
2341 SubpelAvgVarianceParams(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_c, 8),
2342 SubpelAvgVarianceParams(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_c, 8),
2343 SubpelAvgVarianceParams(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_c, 8),
2344 SubpelAvgVarianceParams(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_c, 8),
2345 SubpelAvgVarianceParams(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_c, 8),
2346 SubpelAvgVarianceParams(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_c, 8),
2347 SubpelAvgVarianceParams(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_c, 8),
2348 SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_c, 8),
2349 SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_c, 8),
2350 SubpelAvgVarianceParams(2, 3, &aom_highbd_8_sub_pixel_avg_variance4x8_c, 8),
2351 SubpelAvgVarianceParams(2, 2, &aom_highbd_8_sub_pixel_avg_variance4x4_c, 8),
2352 SubpelAvgVarianceParams(7, 7, &aom_highbd_10_sub_pixel_avg_variance128x128_c,
2353 10),
2354 SubpelAvgVarianceParams(7, 6, &aom_highbd_10_sub_pixel_avg_variance128x64_c,
2355 10),
2356 SubpelAvgVarianceParams(6, 7, &aom_highbd_10_sub_pixel_avg_variance64x128_c,
2357 10),
2358 SubpelAvgVarianceParams(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_c,
2359 10),
2360 SubpelAvgVarianceParams(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_c,
2361 10),
2362 SubpelAvgVarianceParams(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_c,
2363 10),
2364 SubpelAvgVarianceParams(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_c,
2365 10),
2366 SubpelAvgVarianceParams(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_c,
2367 10),
2368 SubpelAvgVarianceParams(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_c,
2369 10),
2370 SubpelAvgVarianceParams(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_c,
2371 10),
2372 SubpelAvgVarianceParams(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_c,
2373 10),
2374 SubpelAvgVarianceParams(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_c,
2375 10),
2376 SubpelAvgVarianceParams(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_c, 10),
2377 SubpelAvgVarianceParams(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_c, 10),
2378 SubpelAvgVarianceParams(2, 3, &aom_highbd_10_sub_pixel_avg_variance4x8_c, 10),
2379 SubpelAvgVarianceParams(2, 2, &aom_highbd_10_sub_pixel_avg_variance4x4_c, 10),
2380 SubpelAvgVarianceParams(7, 7, &aom_highbd_12_sub_pixel_avg_variance128x128_c,
2381 12),
2382 SubpelAvgVarianceParams(7, 6, &aom_highbd_12_sub_pixel_avg_variance128x64_c,
2383 12),
2384 SubpelAvgVarianceParams(6, 7, &aom_highbd_12_sub_pixel_avg_variance64x128_c,
2385 12),
2386 SubpelAvgVarianceParams(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_c,
2387 12),
2388 SubpelAvgVarianceParams(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_c,
2389 12),
2390 SubpelAvgVarianceParams(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_c,
2391 12),
2392 SubpelAvgVarianceParams(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_c,
2393 12),
2394 SubpelAvgVarianceParams(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_c,
2395 12),
2396 SubpelAvgVarianceParams(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_c,
2397 12),
2398 SubpelAvgVarianceParams(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_c,
2399 12),
2400 SubpelAvgVarianceParams(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_c,
2401 12),
2402 SubpelAvgVarianceParams(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_c,
2403 12),
2404 SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_c, 12),
2405 SubpelAvgVarianceParams(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_c, 12),
2406 SubpelAvgVarianceParams(2, 3, &aom_highbd_12_sub_pixel_avg_variance4x8_c, 12),
2407 SubpelAvgVarianceParams(2, 2, &aom_highbd_12_sub_pixel_avg_variance4x4_c, 12),
2408
2409 #if !CONFIG_REALTIME_ONLY
2410 SubpelAvgVarianceParams(6, 4, &aom_highbd_8_sub_pixel_avg_variance64x16_c, 8),
2411 SubpelAvgVarianceParams(4, 6, &aom_highbd_8_sub_pixel_avg_variance16x64_c, 8),
2412 SubpelAvgVarianceParams(5, 3, &aom_highbd_8_sub_pixel_avg_variance32x8_c, 8),
2413 SubpelAvgVarianceParams(3, 5, &aom_highbd_8_sub_pixel_avg_variance8x32_c, 8),
2414 SubpelAvgVarianceParams(4, 2, &aom_highbd_8_sub_pixel_avg_variance16x4_c, 8),
2415 SubpelAvgVarianceParams(2, 4, &aom_highbd_8_sub_pixel_avg_variance4x16_c, 8),
2416 SubpelAvgVarianceParams(6, 4, &aom_highbd_10_sub_pixel_avg_variance64x16_c,
2417 10),
2418 SubpelAvgVarianceParams(4, 6, &aom_highbd_10_sub_pixel_avg_variance16x64_c,
2419 10),
2420 SubpelAvgVarianceParams(5, 3, &aom_highbd_10_sub_pixel_avg_variance32x8_c,
2421 10),
2422 SubpelAvgVarianceParams(3, 5, &aom_highbd_10_sub_pixel_avg_variance8x32_c,
2423 10),
2424 SubpelAvgVarianceParams(4, 2, &aom_highbd_10_sub_pixel_avg_variance16x4_c,
2425 10),
2426 SubpelAvgVarianceParams(2, 4, &aom_highbd_10_sub_pixel_avg_variance4x16_c,
2427 10),
2428 SubpelAvgVarianceParams(6, 4, &aom_highbd_12_sub_pixel_avg_variance64x16_c,
2429 12),
2430 SubpelAvgVarianceParams(4, 6, &aom_highbd_12_sub_pixel_avg_variance16x64_c,
2431 12),
2432 SubpelAvgVarianceParams(5, 3, &aom_highbd_12_sub_pixel_avg_variance32x8_c,
2433 12),
2434 SubpelAvgVarianceParams(3, 5, &aom_highbd_12_sub_pixel_avg_variance8x32_c,
2435 12),
2436 SubpelAvgVarianceParams(4, 2, &aom_highbd_12_sub_pixel_avg_variance16x4_c,
2437 12),
2438 SubpelAvgVarianceParams(2, 4, &aom_highbd_12_sub_pixel_avg_variance4x16_c,
2439 12),
2440 #endif
2441 };
2442 INSTANTIATE_TEST_SUITE_P(C, AvxHBDSubpelAvgVarianceTest,
2443 ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_c));
2444
2445 const DistWtdSubpelAvgVarianceParams kArrayHBDDistWtdSubpelAvgVariance_c[] = {
2446 DistWtdSubpelAvgVarianceParams(
2447 7, 7, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance128x128_c, 8),
2448 DistWtdSubpelAvgVarianceParams(
2449 7, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance128x64_c, 8),
2450 DistWtdSubpelAvgVarianceParams(
2451 6, 7, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x128_c, 8),
2452 DistWtdSubpelAvgVarianceParams(
2453 6, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x64_c, 8),
2454 DistWtdSubpelAvgVarianceParams(
2455 6, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x32_c, 8),
2456 DistWtdSubpelAvgVarianceParams(
2457 5, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x64_c, 8),
2458 DistWtdSubpelAvgVarianceParams(
2459 5, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x32_c, 8),
2460 DistWtdSubpelAvgVarianceParams(
2461 5, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x16_c, 8),
2462 DistWtdSubpelAvgVarianceParams(
2463 4, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x32_c, 8),
2464 DistWtdSubpelAvgVarianceParams(
2465 4, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x16_c, 8),
2466 DistWtdSubpelAvgVarianceParams(
2467 4, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x8_c, 8),
2468 DistWtdSubpelAvgVarianceParams(
2469 3, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x16_c, 8),
2470 DistWtdSubpelAvgVarianceParams(
2471 3, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x8_c, 8),
2472 DistWtdSubpelAvgVarianceParams(
2473 3, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x4_c, 8),
2474 DistWtdSubpelAvgVarianceParams(
2475 2, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x8_c, 8),
2476 DistWtdSubpelAvgVarianceParams(
2477 2, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x4_c, 8),
2478 DistWtdSubpelAvgVarianceParams(
2479 7, 7, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance128x128_c, 10),
2480 DistWtdSubpelAvgVarianceParams(
2481 7, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance128x64_c, 10),
2482 DistWtdSubpelAvgVarianceParams(
2483 6, 7, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x128_c, 10),
2484 DistWtdSubpelAvgVarianceParams(
2485 6, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x64_c, 10),
2486 DistWtdSubpelAvgVarianceParams(
2487 6, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x32_c, 10),
2488 DistWtdSubpelAvgVarianceParams(
2489 5, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x64_c, 10),
2490 DistWtdSubpelAvgVarianceParams(
2491 5, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x32_c, 10),
2492 DistWtdSubpelAvgVarianceParams(
2493 5, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x16_c, 10),
2494 DistWtdSubpelAvgVarianceParams(
2495 4, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x32_c, 10),
2496 DistWtdSubpelAvgVarianceParams(
2497 4, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x16_c, 10),
2498 DistWtdSubpelAvgVarianceParams(
2499 4, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x8_c, 10),
2500 DistWtdSubpelAvgVarianceParams(
2501 3, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x16_c, 10),
2502 DistWtdSubpelAvgVarianceParams(
2503 3, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x8_c, 10),
2504 DistWtdSubpelAvgVarianceParams(
2505 3, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x4_c, 10),
2506 DistWtdSubpelAvgVarianceParams(
2507 2, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x8_c, 10),
2508 DistWtdSubpelAvgVarianceParams(
2509 2, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x4_c, 10),
2510 DistWtdSubpelAvgVarianceParams(
2511 7, 7, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance128x128_c, 12),
2512 DistWtdSubpelAvgVarianceParams(
2513 7, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance128x64_c, 12),
2514 DistWtdSubpelAvgVarianceParams(
2515 6, 7, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x128_c, 12),
2516 DistWtdSubpelAvgVarianceParams(
2517 6, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x64_c, 12),
2518 DistWtdSubpelAvgVarianceParams(
2519 6, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x32_c, 12),
2520 DistWtdSubpelAvgVarianceParams(
2521 5, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x64_c, 12),
2522 DistWtdSubpelAvgVarianceParams(
2523 5, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x32_c, 12),
2524 DistWtdSubpelAvgVarianceParams(
2525 5, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x16_c, 12),
2526 DistWtdSubpelAvgVarianceParams(
2527 4, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x32_c, 12),
2528 DistWtdSubpelAvgVarianceParams(
2529 4, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x16_c, 12),
2530 DistWtdSubpelAvgVarianceParams(
2531 4, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x8_c, 12),
2532 DistWtdSubpelAvgVarianceParams(
2533 3, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x16_c, 12),
2534 DistWtdSubpelAvgVarianceParams(
2535 3, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x8_c, 12),
2536 DistWtdSubpelAvgVarianceParams(
2537 3, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x4_c, 12),
2538 DistWtdSubpelAvgVarianceParams(
2539 2, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x8_c, 12),
2540 DistWtdSubpelAvgVarianceParams(
2541 2, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x4_c, 12),
2542
2543 #if !CONFIG_REALTIME_ONLY
2544 DistWtdSubpelAvgVarianceParams(
2545 6, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x16_c, 8),
2546 DistWtdSubpelAvgVarianceParams(
2547 4, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x64_c, 8),
2548 DistWtdSubpelAvgVarianceParams(
2549 5, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x8_c, 8),
2550 DistWtdSubpelAvgVarianceParams(
2551 3, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x32_c, 8),
2552 DistWtdSubpelAvgVarianceParams(
2553 4, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x4_c, 8),
2554 DistWtdSubpelAvgVarianceParams(
2555 2, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x16_c, 8),
2556 DistWtdSubpelAvgVarianceParams(
2557 6, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x16_c, 10),
2558 DistWtdSubpelAvgVarianceParams(
2559 4, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x64_c, 10),
2560 DistWtdSubpelAvgVarianceParams(
2561 5, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x8_c, 10),
2562 DistWtdSubpelAvgVarianceParams(
2563 3, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x32_c, 10),
2564 DistWtdSubpelAvgVarianceParams(
2565 4, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x4_c, 10),
2566 DistWtdSubpelAvgVarianceParams(
2567 2, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x16_c, 10),
2568 DistWtdSubpelAvgVarianceParams(
2569 6, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x16_c, 12),
2570 DistWtdSubpelAvgVarianceParams(
2571 4, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x64_c, 12),
2572 DistWtdSubpelAvgVarianceParams(
2573 5, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x8_c, 12),
2574 DistWtdSubpelAvgVarianceParams(
2575 3, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x32_c, 12),
2576 DistWtdSubpelAvgVarianceParams(
2577 4, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x4_c, 12),
2578 DistWtdSubpelAvgVarianceParams(
2579 2, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x16_c, 12),
2580 #endif
2581 };
2582 INSTANTIATE_TEST_SUITE_P(
2583 C, AvxHBDDistWtdSubpelAvgVarianceTest,
2584 ::testing::ValuesIn(kArrayHBDDistWtdSubpelAvgVariance_c));
2585
2586 #if !CONFIG_REALTIME_ONLY
2587 const ObmcSubpelVarianceParams kArrayHBDObmcSubpelVariance_c[] = {
2588 ObmcSubpelVarianceParams(7, 7, &aom_highbd_8_obmc_sub_pixel_variance128x128_c,
2589 8),
2590 ObmcSubpelVarianceParams(7, 6, &aom_highbd_8_obmc_sub_pixel_variance128x64_c,
2591 8),
2592 ObmcSubpelVarianceParams(6, 7, &aom_highbd_8_obmc_sub_pixel_variance64x128_c,
2593 8),
2594 ObmcSubpelVarianceParams(6, 6, &aom_highbd_8_obmc_sub_pixel_variance64x64_c,
2595 8),
2596 ObmcSubpelVarianceParams(6, 5, &aom_highbd_8_obmc_sub_pixel_variance64x32_c,
2597 8),
2598 ObmcSubpelVarianceParams(5, 6, &aom_highbd_8_obmc_sub_pixel_variance32x64_c,
2599 8),
2600 ObmcSubpelVarianceParams(5, 5, &aom_highbd_8_obmc_sub_pixel_variance32x32_c,
2601 8),
2602 ObmcSubpelVarianceParams(5, 4, &aom_highbd_8_obmc_sub_pixel_variance32x16_c,
2603 8),
2604 ObmcSubpelVarianceParams(4, 5, &aom_highbd_8_obmc_sub_pixel_variance16x32_c,
2605 8),
2606 ObmcSubpelVarianceParams(4, 4, &aom_highbd_8_obmc_sub_pixel_variance16x16_c,
2607 8),
2608 ObmcSubpelVarianceParams(4, 3, &aom_highbd_8_obmc_sub_pixel_variance16x8_c,
2609 8),
2610 ObmcSubpelVarianceParams(3, 4, &aom_highbd_8_obmc_sub_pixel_variance8x16_c,
2611 8),
2612 ObmcSubpelVarianceParams(3, 3, &aom_highbd_8_obmc_sub_pixel_variance8x8_c, 8),
2613 ObmcSubpelVarianceParams(3, 2, &aom_highbd_8_obmc_sub_pixel_variance8x4_c, 8),
2614 ObmcSubpelVarianceParams(2, 3, &aom_highbd_8_obmc_sub_pixel_variance4x8_c, 8),
2615 ObmcSubpelVarianceParams(2, 2, &aom_highbd_8_obmc_sub_pixel_variance4x4_c, 8),
2616 ObmcSubpelVarianceParams(7, 7,
2617 &aom_highbd_10_obmc_sub_pixel_variance128x128_c, 10),
2618 ObmcSubpelVarianceParams(7, 6, &aom_highbd_10_obmc_sub_pixel_variance128x64_c,
2619 10),
2620 ObmcSubpelVarianceParams(6, 7, &aom_highbd_10_obmc_sub_pixel_variance64x128_c,
2621 10),
2622 ObmcSubpelVarianceParams(6, 6, &aom_highbd_10_obmc_sub_pixel_variance64x64_c,
2623 10),
2624 ObmcSubpelVarianceParams(6, 5, &aom_highbd_10_obmc_sub_pixel_variance64x32_c,
2625 10),
2626 ObmcSubpelVarianceParams(5, 6, &aom_highbd_10_obmc_sub_pixel_variance32x64_c,
2627 10),
2628 ObmcSubpelVarianceParams(5, 5, &aom_highbd_10_obmc_sub_pixel_variance32x32_c,
2629 10),
2630 ObmcSubpelVarianceParams(5, 4, &aom_highbd_10_obmc_sub_pixel_variance32x16_c,
2631 10),
2632 ObmcSubpelVarianceParams(4, 5, &aom_highbd_10_obmc_sub_pixel_variance16x32_c,
2633 10),
2634 ObmcSubpelVarianceParams(4, 4, &aom_highbd_10_obmc_sub_pixel_variance16x16_c,
2635 10),
2636 ObmcSubpelVarianceParams(4, 3, &aom_highbd_10_obmc_sub_pixel_variance16x8_c,
2637 10),
2638 ObmcSubpelVarianceParams(3, 4, &aom_highbd_10_obmc_sub_pixel_variance8x16_c,
2639 10),
2640 ObmcSubpelVarianceParams(3, 3, &aom_highbd_10_obmc_sub_pixel_variance8x8_c,
2641 10),
2642 ObmcSubpelVarianceParams(3, 2, &aom_highbd_10_obmc_sub_pixel_variance8x4_c,
2643 10),
2644 ObmcSubpelVarianceParams(2, 3, &aom_highbd_10_obmc_sub_pixel_variance4x8_c,
2645 10),
2646 ObmcSubpelVarianceParams(2, 2, &aom_highbd_10_obmc_sub_pixel_variance4x4_c,
2647 10),
2648 ObmcSubpelVarianceParams(7, 7,
2649 &aom_highbd_12_obmc_sub_pixel_variance128x128_c, 12),
2650 ObmcSubpelVarianceParams(7, 6, &aom_highbd_12_obmc_sub_pixel_variance128x64_c,
2651 12),
2652 ObmcSubpelVarianceParams(6, 7, &aom_highbd_12_obmc_sub_pixel_variance64x128_c,
2653 12),
2654 ObmcSubpelVarianceParams(6, 6, &aom_highbd_12_obmc_sub_pixel_variance64x64_c,
2655 12),
2656 ObmcSubpelVarianceParams(6, 5, &aom_highbd_12_obmc_sub_pixel_variance64x32_c,
2657 12),
2658 ObmcSubpelVarianceParams(5, 6, &aom_highbd_12_obmc_sub_pixel_variance32x64_c,
2659 12),
2660 ObmcSubpelVarianceParams(5, 5, &aom_highbd_12_obmc_sub_pixel_variance32x32_c,
2661 12),
2662 ObmcSubpelVarianceParams(5, 4, &aom_highbd_12_obmc_sub_pixel_variance32x16_c,
2663 12),
2664 ObmcSubpelVarianceParams(4, 5, &aom_highbd_12_obmc_sub_pixel_variance16x32_c,
2665 12),
2666 ObmcSubpelVarianceParams(4, 4, &aom_highbd_12_obmc_sub_pixel_variance16x16_c,
2667 12),
2668 ObmcSubpelVarianceParams(4, 3, &aom_highbd_12_obmc_sub_pixel_variance16x8_c,
2669 12),
2670 ObmcSubpelVarianceParams(3, 4, &aom_highbd_12_obmc_sub_pixel_variance8x16_c,
2671 12),
2672 ObmcSubpelVarianceParams(3, 3, &aom_highbd_12_obmc_sub_pixel_variance8x8_c,
2673 12),
2674 ObmcSubpelVarianceParams(3, 2, &aom_highbd_12_obmc_sub_pixel_variance8x4_c,
2675 12),
2676 ObmcSubpelVarianceParams(2, 3, &aom_highbd_12_obmc_sub_pixel_variance4x8_c,
2677 12),
2678 ObmcSubpelVarianceParams(2, 2, &aom_highbd_12_obmc_sub_pixel_variance4x4_c,
2679 12),
2680
2681 ObmcSubpelVarianceParams(6, 4, &aom_highbd_8_obmc_sub_pixel_variance64x16_c,
2682 8),
2683 ObmcSubpelVarianceParams(4, 6, &aom_highbd_8_obmc_sub_pixel_variance16x64_c,
2684 8),
2685 ObmcSubpelVarianceParams(5, 3, &aom_highbd_8_obmc_sub_pixel_variance32x8_c,
2686 8),
2687 ObmcSubpelVarianceParams(3, 5, &aom_highbd_8_obmc_sub_pixel_variance8x32_c,
2688 8),
2689 ObmcSubpelVarianceParams(4, 2, &aom_highbd_8_obmc_sub_pixel_variance16x4_c,
2690 8),
2691 ObmcSubpelVarianceParams(2, 4, &aom_highbd_8_obmc_sub_pixel_variance4x16_c,
2692 8),
2693 ObmcSubpelVarianceParams(6, 4, &aom_highbd_10_obmc_sub_pixel_variance64x16_c,
2694 10),
2695 ObmcSubpelVarianceParams(4, 6, &aom_highbd_10_obmc_sub_pixel_variance16x64_c,
2696 10),
2697 ObmcSubpelVarianceParams(5, 3, &aom_highbd_10_obmc_sub_pixel_variance32x8_c,
2698 10),
2699 ObmcSubpelVarianceParams(3, 5, &aom_highbd_10_obmc_sub_pixel_variance8x32_c,
2700 10),
2701 ObmcSubpelVarianceParams(4, 2, &aom_highbd_10_obmc_sub_pixel_variance16x4_c,
2702 10),
2703 ObmcSubpelVarianceParams(2, 4, &aom_highbd_10_obmc_sub_pixel_variance4x16_c,
2704 10),
2705 ObmcSubpelVarianceParams(6, 4, &aom_highbd_12_obmc_sub_pixel_variance64x16_c,
2706 12),
2707 ObmcSubpelVarianceParams(4, 6, &aom_highbd_12_obmc_sub_pixel_variance16x64_c,
2708 12),
2709 ObmcSubpelVarianceParams(5, 3, &aom_highbd_12_obmc_sub_pixel_variance32x8_c,
2710 12),
2711 ObmcSubpelVarianceParams(3, 5, &aom_highbd_12_obmc_sub_pixel_variance8x32_c,
2712 12),
2713 ObmcSubpelVarianceParams(4, 2, &aom_highbd_12_obmc_sub_pixel_variance16x4_c,
2714 12),
2715 ObmcSubpelVarianceParams(2, 4, &aom_highbd_12_obmc_sub_pixel_variance4x16_c,
2716 12),
2717 };
2718 INSTANTIATE_TEST_SUITE_P(C, AvxHBDObmcSubpelVarianceTest,
2719 ::testing::ValuesIn(kArrayHBDObmcSubpelVariance_c));
2720 #endif // !CONFIG_REALTIME_ONLY
2721 #endif // CONFIG_AV1_HIGHBITDEPTH
2722
2723 #if HAVE_SSE2
2724 INSTANTIATE_TEST_SUITE_P(
2725 SSE2, MseWxHTest,
2726 ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_sse2, 8),
2727 MseWxHParams(3, 2, &aom_mse_wxh_16bit_sse2, 8),
2728 MseWxHParams(2, 3, &aom_mse_wxh_16bit_sse2, 8),
2729 MseWxHParams(2, 2, &aom_mse_wxh_16bit_sse2, 8)));
2730
2731 INSTANTIATE_TEST_SUITE_P(
2732 SSE2, Mse16xHTest,
2733 ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_sse2, 8),
2734 Mse16xHParams(3, 2, &aom_mse_16xh_16bit_sse2, 8),
2735 Mse16xHParams(2, 3, &aom_mse_16xh_16bit_sse2, 8),
2736 Mse16xHParams(2, 2, &aom_mse_16xh_16bit_sse2, 8)));
2737
2738 #if !CONFIG_REALTIME_ONLY
2739 INSTANTIATE_TEST_SUITE_P(SSE2, SumOfSquaresTest,
2740 ::testing::Values(aom_get_mb_ss_sse2));
2741 #endif // !CONFIG_REALTIME_ONLY
2742
2743 INSTANTIATE_TEST_SUITE_P(SSE2, AvxMseTest,
2744 ::testing::Values(MseParams(4, 4, &aom_mse16x16_sse2),
2745 MseParams(4, 3, &aom_mse16x8_sse2),
2746 MseParams(3, 4, &aom_mse8x16_sse2),
2747 MseParams(3, 3, &aom_mse8x8_sse2)));
2748
2749 const VarianceParams kArrayVariance_sse2[] = {
2750 VarianceParams(7, 7, &aom_variance128x128_sse2),
2751 VarianceParams(7, 6, &aom_variance128x64_sse2),
2752 VarianceParams(6, 7, &aom_variance64x128_sse2),
2753 VarianceParams(6, 6, &aom_variance64x64_sse2),
2754 VarianceParams(6, 5, &aom_variance64x32_sse2),
2755 VarianceParams(5, 6, &aom_variance32x64_sse2),
2756 VarianceParams(5, 5, &aom_variance32x32_sse2),
2757 VarianceParams(5, 4, &aom_variance32x16_sse2),
2758 VarianceParams(4, 5, &aom_variance16x32_sse2),
2759 VarianceParams(4, 4, &aom_variance16x16_sse2),
2760 VarianceParams(4, 3, &aom_variance16x8_sse2),
2761 VarianceParams(3, 4, &aom_variance8x16_sse2),
2762 VarianceParams(3, 3, &aom_variance8x8_sse2),
2763 VarianceParams(3, 2, &aom_variance8x4_sse2),
2764 VarianceParams(2, 3, &aom_variance4x8_sse2),
2765 VarianceParams(2, 2, &aom_variance4x4_sse2),
2766 #if !CONFIG_REALTIME_ONLY
2767 VarianceParams(6, 4, &aom_variance64x16_sse2),
2768 VarianceParams(5, 3, &aom_variance32x8_sse2),
2769 VarianceParams(4, 6, &aom_variance16x64_sse2),
2770 VarianceParams(4, 2, &aom_variance16x4_sse2),
2771 VarianceParams(3, 5, &aom_variance8x32_sse2),
2772 VarianceParams(2, 4, &aom_variance4x16_sse2),
2773 #endif
2774 };
2775 INSTANTIATE_TEST_SUITE_P(SSE2, AvxVarianceTest,
2776 ::testing::ValuesIn(kArrayVariance_sse2));
2777
2778 const GetSseSumParams kArrayGetSseSum8x8Quad_sse2[] = {
2779 GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_sse2, 0),
2780 GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_sse2, 0),
2781 GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_sse2, 0),
2782 GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_sse2, 0)
2783 };
2784 INSTANTIATE_TEST_SUITE_P(SSE2, GetSseSum8x8QuadTest,
2785 ::testing::ValuesIn(kArrayGetSseSum8x8Quad_sse2));
2786
2787 const GetSseSumParamsDual kArrayGetSseSum16x16Dual_sse2[] = {
2788 GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_sse2, 0),
2789 GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_sse2, 0),
2790 GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_sse2, 0),
2791 GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_sse2, 0)
2792 };
2793 INSTANTIATE_TEST_SUITE_P(SSE2, GetSseSum16x16DualTest,
2794 ::testing::ValuesIn(kArrayGetSseSum16x16Dual_sse2));
2795
2796 #if CONFIG_AV1_HIGHBITDEPTH
2797 #if HAVE_SSE2
2798 INSTANTIATE_TEST_SUITE_P(
2799 SSE2, MseHBDWxHTest,
2800 ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_sse2, 10),
2801 MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_sse2, 10),
2802 MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_sse2, 10),
2803 MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_sse2,
2804 10)));
2805
2806 INSTANTIATE_TEST_SUITE_P(
2807 SSE2, AvxHBDMseTest,
2808 ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_sse2, 12),
2809 MseParams(3, 3, &aom_highbd_12_mse8x8_sse2, 12),
2810 MseParams(4, 4, &aom_highbd_10_mse16x16_sse2, 10),
2811 MseParams(3, 3, &aom_highbd_10_mse8x8_sse2, 10),
2812 MseParams(4, 4, &aom_highbd_8_mse16x16_sse2, 8),
2813 MseParams(3, 3, &aom_highbd_8_mse8x8_sse2, 8)));
2814 #endif // HAVE_SSE2
2815 #if HAVE_SSE4_1
2816 INSTANTIATE_TEST_SUITE_P(
2817 SSE4_1, AvxSubpelVarianceTest,
2818 ::testing::Values(
2819 SubpelVarianceParams(2, 2, &aom_highbd_8_sub_pixel_variance4x4_sse4_1,
2820 8),
2821 SubpelVarianceParams(2, 2, &aom_highbd_10_sub_pixel_variance4x4_sse4_1,
2822 10),
2823 SubpelVarianceParams(2, 2, &aom_highbd_12_sub_pixel_variance4x4_sse4_1,
2824 12)));
2825
2826 INSTANTIATE_TEST_SUITE_P(
2827 SSE4_1, AvxSubpelAvgVarianceTest,
2828 ::testing::Values(
2829 SubpelAvgVarianceParams(2, 2,
2830 &aom_highbd_8_sub_pixel_avg_variance4x4_sse4_1,
2831 8),
2832 SubpelAvgVarianceParams(2, 2,
2833 &aom_highbd_10_sub_pixel_avg_variance4x4_sse4_1,
2834 10),
2835 SubpelAvgVarianceParams(2, 2,
2836 &aom_highbd_12_sub_pixel_avg_variance4x4_sse4_1,
2837 12)));
2838 #endif // HAVE_SSE4_1
2839
2840 #if HAVE_AVX2
2841 INSTANTIATE_TEST_SUITE_P(
2842 AVX2, AvxHBDMseTest,
2843 ::testing::Values(MseParams(4, 4, &aom_highbd_10_mse16x16_avx2, 10)));
2844 #endif // HAVE_AVX2
2845
2846 const VarianceParams kArrayHBDVariance_sse2[] = {
2847 VarianceParams(7, 7, &aom_highbd_12_variance128x128_sse2, 12),
2848 VarianceParams(7, 6, &aom_highbd_12_variance128x64_sse2, 12),
2849 VarianceParams(6, 7, &aom_highbd_12_variance64x128_sse2, 12),
2850 VarianceParams(6, 6, &aom_highbd_12_variance64x64_sse2, 12),
2851 VarianceParams(6, 5, &aom_highbd_12_variance64x32_sse2, 12),
2852 VarianceParams(5, 6, &aom_highbd_12_variance32x64_sse2, 12),
2853 VarianceParams(5, 5, &aom_highbd_12_variance32x32_sse2, 12),
2854 VarianceParams(5, 4, &aom_highbd_12_variance32x16_sse2, 12),
2855 VarianceParams(4, 5, &aom_highbd_12_variance16x32_sse2, 12),
2856 VarianceParams(4, 4, &aom_highbd_12_variance16x16_sse2, 12),
2857 VarianceParams(4, 3, &aom_highbd_12_variance16x8_sse2, 12),
2858 VarianceParams(3, 4, &aom_highbd_12_variance8x16_sse2, 12),
2859 VarianceParams(3, 3, &aom_highbd_12_variance8x8_sse2, 12),
2860 VarianceParams(7, 7, &aom_highbd_10_variance128x128_sse2, 10),
2861 VarianceParams(7, 6, &aom_highbd_10_variance128x64_sse2, 10),
2862 VarianceParams(6, 7, &aom_highbd_10_variance64x128_sse2, 10),
2863 VarianceParams(6, 6, &aom_highbd_10_variance64x64_sse2, 10),
2864 VarianceParams(6, 5, &aom_highbd_10_variance64x32_sse2, 10),
2865 VarianceParams(5, 6, &aom_highbd_10_variance32x64_sse2, 10),
2866 VarianceParams(5, 5, &aom_highbd_10_variance32x32_sse2, 10),
2867 VarianceParams(5, 4, &aom_highbd_10_variance32x16_sse2, 10),
2868 VarianceParams(4, 5, &aom_highbd_10_variance16x32_sse2, 10),
2869 VarianceParams(4, 4, &aom_highbd_10_variance16x16_sse2, 10),
2870 VarianceParams(4, 3, &aom_highbd_10_variance16x8_sse2, 10),
2871 VarianceParams(3, 4, &aom_highbd_10_variance8x16_sse2, 10),
2872 VarianceParams(3, 3, &aom_highbd_10_variance8x8_sse2, 10),
2873 VarianceParams(7, 7, &aom_highbd_8_variance128x128_sse2, 8),
2874 VarianceParams(7, 6, &aom_highbd_8_variance128x64_sse2, 8),
2875 VarianceParams(6, 7, &aom_highbd_8_variance64x128_sse2, 8),
2876 VarianceParams(6, 6, &aom_highbd_8_variance64x64_sse2, 8),
2877 VarianceParams(6, 5, &aom_highbd_8_variance64x32_sse2, 8),
2878 VarianceParams(5, 6, &aom_highbd_8_variance32x64_sse2, 8),
2879 VarianceParams(5, 5, &aom_highbd_8_variance32x32_sse2, 8),
2880 VarianceParams(5, 4, &aom_highbd_8_variance32x16_sse2, 8),
2881 VarianceParams(4, 5, &aom_highbd_8_variance16x32_sse2, 8),
2882 VarianceParams(4, 4, &aom_highbd_8_variance16x16_sse2, 8),
2883 VarianceParams(4, 3, &aom_highbd_8_variance16x8_sse2, 8),
2884 VarianceParams(3, 4, &aom_highbd_8_variance8x16_sse2, 8),
2885 VarianceParams(3, 3, &aom_highbd_8_variance8x8_sse2, 8),
2886 #if !CONFIG_REALTIME_ONLY
2887 VarianceParams(6, 4, &aom_highbd_12_variance64x16_sse2, 12),
2888 VarianceParams(4, 6, &aom_highbd_12_variance16x64_sse2, 12),
2889 VarianceParams(5, 3, &aom_highbd_12_variance32x8_sse2, 12),
2890 VarianceParams(3, 5, &aom_highbd_12_variance8x32_sse2, 12),
2891 // VarianceParams(4, 2, &aom_highbd_12_variance16x4_sse2, 12),
2892 // VarianceParams(2, 4, &aom_highbd_12_variance4x16_sse2, 12),
2893 VarianceParams(6, 4, &aom_highbd_10_variance64x16_sse2, 10),
2894 VarianceParams(4, 6, &aom_highbd_10_variance16x64_sse2, 10),
2895 VarianceParams(5, 3, &aom_highbd_10_variance32x8_sse2, 10),
2896 VarianceParams(3, 5, &aom_highbd_10_variance8x32_sse2, 10),
2897 // VarianceParams(4, 2, &aom_highbd_10_variance16x4_sse2, 10),
2898 // VarianceParams(2, 4, &aom_highbd_10_variance4x16_sse2, 10),
2899 VarianceParams(6, 4, &aom_highbd_8_variance64x16_sse2, 8),
2900 VarianceParams(4, 6, &aom_highbd_8_variance16x64_sse2, 8),
2901 VarianceParams(5, 3, &aom_highbd_8_variance32x8_sse2, 8),
2902 VarianceParams(3, 5, &aom_highbd_8_variance8x32_sse2, 8),
2903 // VarianceParams(4, 2, &aom_highbd_8_variance16x4_sse2, 8),
2904 // VarianceParams(2, 4, &aom_highbd_8_variance4x16_sse2, 8),
2905 #endif
2906 };
2907 INSTANTIATE_TEST_SUITE_P(SSE2, AvxHBDVarianceTest,
2908 ::testing::ValuesIn(kArrayHBDVariance_sse2));
2909
2910 #if HAVE_AVX2
2911
2912 INSTANTIATE_TEST_SUITE_P(
2913 AVX2, MseHBDWxHTest,
2914 ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_avx2, 10),
2915 MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_avx2, 10),
2916 MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_avx2, 10),
2917 MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_avx2,
2918 10)));
2919
2920 const VarianceParams kArrayHBDVariance_avx2[] = {
2921 VarianceParams(7, 7, &aom_highbd_10_variance128x128_avx2, 10),
2922 VarianceParams(7, 6, &aom_highbd_10_variance128x64_avx2, 10),
2923 VarianceParams(6, 7, &aom_highbd_10_variance64x128_avx2, 10),
2924 VarianceParams(6, 6, &aom_highbd_10_variance64x64_avx2, 10),
2925 VarianceParams(6, 5, &aom_highbd_10_variance64x32_avx2, 10),
2926 VarianceParams(5, 6, &aom_highbd_10_variance32x64_avx2, 10),
2927 VarianceParams(5, 5, &aom_highbd_10_variance32x32_avx2, 10),
2928 VarianceParams(5, 4, &aom_highbd_10_variance32x16_avx2, 10),
2929 VarianceParams(4, 5, &aom_highbd_10_variance16x32_avx2, 10),
2930 VarianceParams(4, 4, &aom_highbd_10_variance16x16_avx2, 10),
2931 VarianceParams(4, 3, &aom_highbd_10_variance16x8_avx2, 10),
2932 VarianceParams(3, 4, &aom_highbd_10_variance8x16_avx2, 10),
2933 VarianceParams(3, 3, &aom_highbd_10_variance8x8_avx2, 10),
2934 #if !CONFIG_REALTIME_ONLY
2935 VarianceParams(6, 4, &aom_highbd_10_variance64x16_avx2, 10),
2936 VarianceParams(5, 3, &aom_highbd_10_variance32x8_avx2, 10),
2937 VarianceParams(4, 6, &aom_highbd_10_variance16x64_avx2, 10),
2938 VarianceParams(3, 5, &aom_highbd_10_variance8x32_avx2, 10),
2939 #endif
2940 };
2941
2942 INSTANTIATE_TEST_SUITE_P(AVX2, AvxHBDVarianceTest,
2943 ::testing::ValuesIn(kArrayHBDVariance_avx2));
2944
2945 const SubpelVarianceParams kArrayHBDSubpelVariance_avx2[] = {
2946 SubpelVarianceParams(7, 7, &aom_highbd_10_sub_pixel_variance128x128_avx2, 10),
2947 SubpelVarianceParams(7, 6, &aom_highbd_10_sub_pixel_variance128x64_avx2, 10),
2948 SubpelVarianceParams(6, 7, &aom_highbd_10_sub_pixel_variance64x128_avx2, 10),
2949 SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_avx2, 10),
2950 SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_avx2, 10),
2951 SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_avx2, 10),
2952 SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_avx2, 10),
2953 SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_avx2, 10),
2954 SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_avx2, 10),
2955 SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_avx2, 10),
2956 SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_avx2, 10),
2957 SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_avx2, 10),
2958 SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_avx2, 10),
2959 };
2960
2961 INSTANTIATE_TEST_SUITE_P(AVX2, AvxHBDSubpelVarianceTest,
2962 ::testing::ValuesIn(kArrayHBDSubpelVariance_avx2));
2963 #endif // HAVE_AVX2
2964
2965 const SubpelVarianceParams kArrayHBDSubpelVariance_sse2[] = {
2966 SubpelVarianceParams(7, 7, &aom_highbd_12_sub_pixel_variance128x128_sse2, 12),
2967 SubpelVarianceParams(7, 6, &aom_highbd_12_sub_pixel_variance128x64_sse2, 12),
2968 SubpelVarianceParams(6, 7, &aom_highbd_12_sub_pixel_variance64x128_sse2, 12),
2969 SubpelVarianceParams(6, 6, &aom_highbd_12_sub_pixel_variance64x64_sse2, 12),
2970 SubpelVarianceParams(6, 5, &aom_highbd_12_sub_pixel_variance64x32_sse2, 12),
2971 SubpelVarianceParams(5, 6, &aom_highbd_12_sub_pixel_variance32x64_sse2, 12),
2972 SubpelVarianceParams(5, 5, &aom_highbd_12_sub_pixel_variance32x32_sse2, 12),
2973 SubpelVarianceParams(5, 4, &aom_highbd_12_sub_pixel_variance32x16_sse2, 12),
2974 SubpelVarianceParams(4, 5, &aom_highbd_12_sub_pixel_variance16x32_sse2, 12),
2975 SubpelVarianceParams(4, 4, &aom_highbd_12_sub_pixel_variance16x16_sse2, 12),
2976 SubpelVarianceParams(4, 3, &aom_highbd_12_sub_pixel_variance16x8_sse2, 12),
2977 SubpelVarianceParams(3, 4, &aom_highbd_12_sub_pixel_variance8x16_sse2, 12),
2978 SubpelVarianceParams(3, 3, &aom_highbd_12_sub_pixel_variance8x8_sse2, 12),
2979 SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_sse2, 12),
2980 SubpelVarianceParams(7, 7, &aom_highbd_10_sub_pixel_variance128x128_sse2, 10),
2981 SubpelVarianceParams(7, 6, &aom_highbd_10_sub_pixel_variance128x64_sse2, 10),
2982 SubpelVarianceParams(6, 7, &aom_highbd_10_sub_pixel_variance64x128_sse2, 10),
2983 SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_sse2, 10),
2984 SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_sse2, 10),
2985 SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_sse2, 10),
2986 SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_sse2, 10),
2987 SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_sse2, 10),
2988 SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_sse2, 10),
2989 SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_sse2, 10),
2990 SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_sse2, 10),
2991 SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_sse2, 10),
2992 SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_sse2, 10),
2993 SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_sse2, 10),
2994 SubpelVarianceParams(7, 7, &aom_highbd_8_sub_pixel_variance128x128_sse2, 8),
2995 SubpelVarianceParams(7, 6, &aom_highbd_8_sub_pixel_variance128x64_sse2, 8),
2996 SubpelVarianceParams(6, 7, &aom_highbd_8_sub_pixel_variance64x128_sse2, 8),
2997 SubpelVarianceParams(6, 6, &aom_highbd_8_sub_pixel_variance64x64_sse2, 8),
2998 SubpelVarianceParams(6, 5, &aom_highbd_8_sub_pixel_variance64x32_sse2, 8),
2999 SubpelVarianceParams(5, 6, &aom_highbd_8_sub_pixel_variance32x64_sse2, 8),
3000 SubpelVarianceParams(5, 5, &aom_highbd_8_sub_pixel_variance32x32_sse2, 8),
3001 SubpelVarianceParams(5, 4, &aom_highbd_8_sub_pixel_variance32x16_sse2, 8),
3002 SubpelVarianceParams(4, 5, &aom_highbd_8_sub_pixel_variance16x32_sse2, 8),
3003 SubpelVarianceParams(4, 4, &aom_highbd_8_sub_pixel_variance16x16_sse2, 8),
3004 SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_sse2, 8),
3005 SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_sse2, 8),
3006 SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_sse2, 8),
3007 SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_sse2, 8),
3008 #if !CONFIG_REALTIME_ONLY
3009 SubpelVarianceParams(6, 4, &aom_highbd_12_sub_pixel_variance64x16_sse2, 12),
3010 SubpelVarianceParams(4, 6, &aom_highbd_12_sub_pixel_variance16x64_sse2, 12),
3011 SubpelVarianceParams(5, 3, &aom_highbd_12_sub_pixel_variance32x8_sse2, 12),
3012 SubpelVarianceParams(3, 5, &aom_highbd_12_sub_pixel_variance8x32_sse2, 12),
3013 SubpelVarianceParams(4, 2, &aom_highbd_12_sub_pixel_variance16x4_sse2, 12),
3014 // SubpelVarianceParams(2, 4, &aom_highbd_12_sub_pixel_variance4x16_sse2, 12),
3015 SubpelVarianceParams(6, 4, &aom_highbd_10_sub_pixel_variance64x16_sse2, 10),
3016 SubpelVarianceParams(4, 6, &aom_highbd_10_sub_pixel_variance16x64_sse2, 10),
3017 SubpelVarianceParams(5, 3, &aom_highbd_10_sub_pixel_variance32x8_sse2, 10),
3018 SubpelVarianceParams(3, 5, &aom_highbd_10_sub_pixel_variance8x32_sse2, 10),
3019 SubpelVarianceParams(4, 2, &aom_highbd_10_sub_pixel_variance16x4_sse2, 10),
3020 // SubpelVarianceParams(2, 4, &aom_highbd_10_sub_pixel_variance4x16_sse2, 10),
3021 SubpelVarianceParams(6, 4, &aom_highbd_8_sub_pixel_variance64x16_sse2, 8),
3022 SubpelVarianceParams(4, 6, &aom_highbd_8_sub_pixel_variance16x64_sse2, 8),
3023 SubpelVarianceParams(5, 3, &aom_highbd_8_sub_pixel_variance32x8_sse2, 8),
3024 SubpelVarianceParams(3, 5, &aom_highbd_8_sub_pixel_variance8x32_sse2, 8),
3025 SubpelVarianceParams(4, 2, &aom_highbd_8_sub_pixel_variance16x4_sse2, 8),
3026 // SubpelVarianceParams(2, 4, &aom_highbd_8_sub_pixel_variance4x16_sse2, 8),
3027 #endif
3028 };
3029 INSTANTIATE_TEST_SUITE_P(SSE2, AvxHBDSubpelVarianceTest,
3030 ::testing::ValuesIn(kArrayHBDSubpelVariance_sse2));
3031
3032 const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_sse2[] = {
3033 SubpelAvgVarianceParams(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_sse2,
3034 12),
3035 SubpelAvgVarianceParams(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_sse2,
3036 12),
3037 SubpelAvgVarianceParams(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_sse2,
3038 12),
3039 SubpelAvgVarianceParams(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_sse2,
3040 12),
3041 SubpelAvgVarianceParams(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_sse2,
3042 12),
3043 SubpelAvgVarianceParams(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_sse2,
3044 12),
3045 SubpelAvgVarianceParams(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_sse2,
3046 12),
3047 SubpelAvgVarianceParams(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_sse2,
3048 12),
3049 SubpelAvgVarianceParams(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_sse2,
3050 12),
3051 SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_sse2,
3052 12),
3053 SubpelAvgVarianceParams(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_sse2,
3054 12),
3055 SubpelAvgVarianceParams(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_sse2,
3056 10),
3057 SubpelAvgVarianceParams(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_sse2,
3058 10),
3059 SubpelAvgVarianceParams(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_sse2,
3060 10),
3061 SubpelAvgVarianceParams(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_sse2,
3062 10),
3063 SubpelAvgVarianceParams(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_sse2,
3064 10),
3065 SubpelAvgVarianceParams(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_sse2,
3066 10),
3067 SubpelAvgVarianceParams(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_sse2,
3068 10),
3069 SubpelAvgVarianceParams(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_sse2,
3070 10),
3071 SubpelAvgVarianceParams(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_sse2,
3072 10),
3073 SubpelAvgVarianceParams(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_sse2,
3074 10),
3075 SubpelAvgVarianceParams(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_sse2,
3076 10),
3077 SubpelAvgVarianceParams(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_sse2,
3078 8),
3079 SubpelAvgVarianceParams(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_sse2,
3080 8),
3081 SubpelAvgVarianceParams(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_sse2,
3082 8),
3083 SubpelAvgVarianceParams(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_sse2,
3084 8),
3085 SubpelAvgVarianceParams(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_sse2,
3086 8),
3087 SubpelAvgVarianceParams(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_sse2,
3088 8),
3089 SubpelAvgVarianceParams(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_sse2,
3090 8),
3091 SubpelAvgVarianceParams(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_sse2,
3092 8),
3093 SubpelAvgVarianceParams(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_sse2,
3094 8),
3095 SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_sse2,
3096 8),
3097 SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_sse2,
3098 8),
3099
3100 #if !CONFIG_REALTIME_ONLY
3101 SubpelAvgVarianceParams(6, 4, &aom_highbd_12_sub_pixel_avg_variance64x16_sse2,
3102 12),
3103 SubpelAvgVarianceParams(4, 6, &aom_highbd_12_sub_pixel_avg_variance16x64_sse2,
3104 12),
3105 SubpelAvgVarianceParams(5, 3, &aom_highbd_12_sub_pixel_avg_variance32x8_sse2,
3106 12),
3107 SubpelAvgVarianceParams(3, 5, &aom_highbd_12_sub_pixel_avg_variance8x32_sse2,
3108 12),
3109 SubpelAvgVarianceParams(4, 2, &aom_highbd_12_sub_pixel_avg_variance16x4_sse2,
3110 12),
3111 // SubpelAvgVarianceParams(2, 4,
3112 // &aom_highbd_12_sub_pixel_avg_variance4x16_sse2, 12),
3113 SubpelAvgVarianceParams(6, 4, &aom_highbd_10_sub_pixel_avg_variance64x16_sse2,
3114 10),
3115 SubpelAvgVarianceParams(4, 6, &aom_highbd_10_sub_pixel_avg_variance16x64_sse2,
3116 10),
3117 SubpelAvgVarianceParams(5, 3, &aom_highbd_10_sub_pixel_avg_variance32x8_sse2,
3118 10),
3119 SubpelAvgVarianceParams(3, 5, &aom_highbd_10_sub_pixel_avg_variance8x32_sse2,
3120 10),
3121 SubpelAvgVarianceParams(4, 2, &aom_highbd_10_sub_pixel_avg_variance16x4_sse2,
3122 10),
3123 // SubpelAvgVarianceParams(2, 4,
3124 // &aom_highbd_10_sub_pixel_avg_variance4x16_sse2, 10),
3125 SubpelAvgVarianceParams(6, 4, &aom_highbd_8_sub_pixel_avg_variance64x16_sse2,
3126 8),
3127 SubpelAvgVarianceParams(4, 6, &aom_highbd_8_sub_pixel_avg_variance16x64_sse2,
3128 8),
3129 SubpelAvgVarianceParams(5, 3, &aom_highbd_8_sub_pixel_avg_variance32x8_sse2,
3130 8),
3131 SubpelAvgVarianceParams(3, 5, &aom_highbd_8_sub_pixel_avg_variance8x32_sse2,
3132 8),
3133 SubpelAvgVarianceParams(4, 2, &aom_highbd_8_sub_pixel_avg_variance16x4_sse2,
3134 8),
3135 // SubpelAvgVarianceParams(2, 4,
3136 // &aom_highbd_8_sub_pixel_avg_variance4x16_sse2, 8),
3137 #endif
3138 };
3139
3140 INSTANTIATE_TEST_SUITE_P(SSE2, AvxHBDSubpelAvgVarianceTest,
3141 ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_sse2));
3142 #endif // HAVE_SSE2
3143 #endif // CONFIG_AV1_HIGHBITDEPTH
3144
3145 #if HAVE_SSSE3
3146 const SubpelVarianceParams kArraySubpelVariance_ssse3[] = {
3147 SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_ssse3, 0),
3148 SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_ssse3, 0),
3149 SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_ssse3, 0),
3150 SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_ssse3, 0),
3151 SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_ssse3, 0),
3152 SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_ssse3, 0),
3153 SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_ssse3, 0),
3154 SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_ssse3, 0),
3155 SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_ssse3, 0),
3156 SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_ssse3, 0),
3157 SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_ssse3, 0),
3158 SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_ssse3, 0),
3159 SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_ssse3, 0),
3160 SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_ssse3, 0),
3161 SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_ssse3, 0),
3162 SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_ssse3, 0),
3163 #if !CONFIG_REALTIME_ONLY
3164 SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_ssse3, 0),
3165 SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_ssse3, 0),
3166 SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_ssse3, 0),
3167 SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_ssse3, 0),
3168 SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_ssse3, 0),
3169 SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_ssse3, 0),
3170 #endif
3171 };
3172 INSTANTIATE_TEST_SUITE_P(SSSE3, AvxSubpelVarianceTest,
3173 ::testing::ValuesIn(kArraySubpelVariance_ssse3));
3174
3175 const SubpelAvgVarianceParams kArraySubpelAvgVariance_ssse3[] = {
3176 SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_ssse3, 0),
3177 SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_ssse3, 0),
3178 SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_ssse3, 0),
3179 SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_ssse3, 0),
3180 SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_ssse3, 0),
3181 SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_ssse3, 0),
3182 SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_ssse3, 0),
3183 SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_ssse3, 0),
3184 SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_ssse3, 0),
3185 SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_ssse3, 0),
3186 SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_ssse3, 0),
3187 SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_ssse3, 0),
3188 SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_ssse3, 0),
3189 SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_ssse3, 0),
3190 SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_ssse3, 0),
3191 SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_ssse3, 0),
3192 #if !CONFIG_REALTIME_ONLY
3193 SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_ssse3, 0),
3194 SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_ssse3, 0),
3195 SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_ssse3, 0),
3196 SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_ssse3, 0),
3197 SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_ssse3, 0),
3198 SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_ssse3, 0),
3199 #endif
3200 };
3201 INSTANTIATE_TEST_SUITE_P(SSSE3, AvxSubpelAvgVarianceTest,
3202 ::testing::ValuesIn(kArraySubpelAvgVariance_ssse3));
3203
3204 const DistWtdSubpelAvgVarianceParams kArrayDistWtdSubpelAvgVariance_ssse3[] = {
3205 DistWtdSubpelAvgVarianceParams(
3206 7, 7, &aom_dist_wtd_sub_pixel_avg_variance128x128_ssse3, 0),
3207 DistWtdSubpelAvgVarianceParams(
3208 7, 6, &aom_dist_wtd_sub_pixel_avg_variance128x64_ssse3, 0),
3209 DistWtdSubpelAvgVarianceParams(
3210 6, 7, &aom_dist_wtd_sub_pixel_avg_variance64x128_ssse3, 0),
3211 DistWtdSubpelAvgVarianceParams(
3212 6, 6, &aom_dist_wtd_sub_pixel_avg_variance64x64_ssse3, 0),
3213 DistWtdSubpelAvgVarianceParams(
3214 6, 5, &aom_dist_wtd_sub_pixel_avg_variance64x32_ssse3, 0),
3215 DistWtdSubpelAvgVarianceParams(
3216 5, 6, &aom_dist_wtd_sub_pixel_avg_variance32x64_ssse3, 0),
3217 DistWtdSubpelAvgVarianceParams(
3218 5, 5, &aom_dist_wtd_sub_pixel_avg_variance32x32_ssse3, 0),
3219 DistWtdSubpelAvgVarianceParams(
3220 5, 4, &aom_dist_wtd_sub_pixel_avg_variance32x16_ssse3, 0),
3221 DistWtdSubpelAvgVarianceParams(
3222 4, 5, &aom_dist_wtd_sub_pixel_avg_variance16x32_ssse3, 0),
3223 DistWtdSubpelAvgVarianceParams(
3224 4, 4, &aom_dist_wtd_sub_pixel_avg_variance16x16_ssse3, 0),
3225 DistWtdSubpelAvgVarianceParams(
3226 4, 3, &aom_dist_wtd_sub_pixel_avg_variance16x8_ssse3, 0),
3227 DistWtdSubpelAvgVarianceParams(
3228 3, 4, &aom_dist_wtd_sub_pixel_avg_variance8x16_ssse3, 0),
3229 DistWtdSubpelAvgVarianceParams(
3230 3, 3, &aom_dist_wtd_sub_pixel_avg_variance8x8_ssse3, 0),
3231 DistWtdSubpelAvgVarianceParams(
3232 3, 2, &aom_dist_wtd_sub_pixel_avg_variance8x4_ssse3, 0),
3233 DistWtdSubpelAvgVarianceParams(
3234 2, 3, &aom_dist_wtd_sub_pixel_avg_variance4x8_ssse3, 0),
3235 DistWtdSubpelAvgVarianceParams(
3236 2, 2, &aom_dist_wtd_sub_pixel_avg_variance4x4_ssse3, 0),
3237 #if !CONFIG_REALTIME_ONLY
3238 DistWtdSubpelAvgVarianceParams(
3239 6, 4, &aom_dist_wtd_sub_pixel_avg_variance64x16_ssse3, 0),
3240 DistWtdSubpelAvgVarianceParams(
3241 4, 6, &aom_dist_wtd_sub_pixel_avg_variance16x64_ssse3, 0),
3242 DistWtdSubpelAvgVarianceParams(
3243 5, 3, &aom_dist_wtd_sub_pixel_avg_variance32x8_ssse3, 0),
3244 DistWtdSubpelAvgVarianceParams(
3245 3, 5, &aom_dist_wtd_sub_pixel_avg_variance8x32_ssse3, 0),
3246 DistWtdSubpelAvgVarianceParams(
3247 4, 2, &aom_dist_wtd_sub_pixel_avg_variance16x4_ssse3, 0),
3248 DistWtdSubpelAvgVarianceParams(
3249 2, 4, &aom_dist_wtd_sub_pixel_avg_variance4x16_ssse3, 0),
3250 #endif
3251 };
3252 INSTANTIATE_TEST_SUITE_P(
3253 SSSE3, AvxDistWtdSubpelAvgVarianceTest,
3254 ::testing::ValuesIn(kArrayDistWtdSubpelAvgVariance_ssse3));
3255 #endif // HAVE_SSSE3
3256
3257 #if HAVE_SSE4_1
3258 #if !CONFIG_REALTIME_ONLY
3259 INSTANTIATE_TEST_SUITE_P(
3260 SSE4_1, AvxObmcSubpelVarianceTest,
3261 ::testing::Values(
3262 ObmcSubpelVarianceParams(7, 7,
3263 &aom_obmc_sub_pixel_variance128x128_sse4_1, 0),
3264 ObmcSubpelVarianceParams(7, 6,
3265 &aom_obmc_sub_pixel_variance128x64_sse4_1, 0),
3266 ObmcSubpelVarianceParams(6, 7,
3267 &aom_obmc_sub_pixel_variance64x128_sse4_1, 0),
3268 ObmcSubpelVarianceParams(6, 6, &aom_obmc_sub_pixel_variance64x64_sse4_1,
3269 0),
3270 ObmcSubpelVarianceParams(6, 5, &aom_obmc_sub_pixel_variance64x32_sse4_1,
3271 0),
3272 ObmcSubpelVarianceParams(5, 6, &aom_obmc_sub_pixel_variance32x64_sse4_1,
3273 0),
3274 ObmcSubpelVarianceParams(5, 5, &aom_obmc_sub_pixel_variance32x32_sse4_1,
3275 0),
3276 ObmcSubpelVarianceParams(5, 4, &aom_obmc_sub_pixel_variance32x16_sse4_1,
3277 0),
3278 ObmcSubpelVarianceParams(4, 5, &aom_obmc_sub_pixel_variance16x32_sse4_1,
3279 0),
3280 ObmcSubpelVarianceParams(4, 4, &aom_obmc_sub_pixel_variance16x16_sse4_1,
3281 0),
3282 ObmcSubpelVarianceParams(4, 3, &aom_obmc_sub_pixel_variance16x8_sse4_1,
3283 0),
3284 ObmcSubpelVarianceParams(3, 4, &aom_obmc_sub_pixel_variance8x16_sse4_1,
3285 0),
3286 ObmcSubpelVarianceParams(3, 3, &aom_obmc_sub_pixel_variance8x8_sse4_1,
3287 0),
3288 ObmcSubpelVarianceParams(3, 2, &aom_obmc_sub_pixel_variance8x4_sse4_1,
3289 0),
3290 ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_sse4_1,
3291 0),
3292 ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_sse4_1,
3293 0),
3294 ObmcSubpelVarianceParams(6, 4, &aom_obmc_sub_pixel_variance64x16_sse4_1,
3295 0),
3296 ObmcSubpelVarianceParams(4, 6, &aom_obmc_sub_pixel_variance16x64_sse4_1,
3297 0),
3298 ObmcSubpelVarianceParams(5, 3, &aom_obmc_sub_pixel_variance32x8_sse4_1,
3299 0),
3300 ObmcSubpelVarianceParams(3, 5, &aom_obmc_sub_pixel_variance8x32_sse4_1,
3301 0),
3302 ObmcSubpelVarianceParams(4, 2, &aom_obmc_sub_pixel_variance16x4_sse4_1,
3303 0),
3304 ObmcSubpelVarianceParams(2, 4, &aom_obmc_sub_pixel_variance4x16_sse4_1,
3305 0)));
3306 #endif
3307 #endif // HAVE_SSE4_1
3308
3309 #if HAVE_AVX2
3310
3311 INSTANTIATE_TEST_SUITE_P(
3312 AVX2, MseWxHTest,
3313 ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_avx2, 8),
3314 MseWxHParams(3, 2, &aom_mse_wxh_16bit_avx2, 8),
3315 MseWxHParams(2, 3, &aom_mse_wxh_16bit_avx2, 8),
3316 MseWxHParams(2, 2, &aom_mse_wxh_16bit_avx2, 8)));
3317
3318 INSTANTIATE_TEST_SUITE_P(
3319 AVX2, Mse16xHTest,
3320 ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_avx2, 8),
3321 Mse16xHParams(3, 2, &aom_mse_16xh_16bit_avx2, 8),
3322 Mse16xHParams(2, 3, &aom_mse_16xh_16bit_avx2, 8),
3323 Mse16xHParams(2, 2, &aom_mse_16xh_16bit_avx2, 8)));
3324
3325 INSTANTIATE_TEST_SUITE_P(AVX2, AvxMseTest,
3326 ::testing::Values(MseParams(4, 4,
3327 &aom_mse16x16_avx2)));
3328
3329 const VarianceParams kArrayVariance_avx2[] = {
3330 VarianceParams(7, 7, &aom_variance128x128_avx2),
3331 VarianceParams(7, 6, &aom_variance128x64_avx2),
3332 VarianceParams(6, 7, &aom_variance64x128_avx2),
3333 VarianceParams(6, 6, &aom_variance64x64_avx2),
3334 VarianceParams(6, 5, &aom_variance64x32_avx2),
3335 VarianceParams(5, 6, &aom_variance32x64_avx2),
3336 VarianceParams(5, 5, &aom_variance32x32_avx2),
3337 VarianceParams(5, 4, &aom_variance32x16_avx2),
3338 VarianceParams(4, 5, &aom_variance16x32_avx2),
3339 VarianceParams(4, 4, &aom_variance16x16_avx2),
3340 VarianceParams(4, 3, &aom_variance16x8_avx2),
3341 #if !CONFIG_REALTIME_ONLY
3342 VarianceParams(6, 4, &aom_variance64x16_avx2),
3343 VarianceParams(4, 6, &aom_variance16x64_avx2),
3344 VarianceParams(5, 3, &aom_variance32x8_avx2),
3345 VarianceParams(4, 2, &aom_variance16x4_avx2),
3346 #endif
3347 };
3348 INSTANTIATE_TEST_SUITE_P(AVX2, AvxVarianceTest,
3349 ::testing::ValuesIn(kArrayVariance_avx2));
3350
3351 const GetSseSumParams kArrayGetSseSum8x8Quad_avx2[] = {
3352 GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_avx2, 0),
3353 GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_avx2, 0),
3354 GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_avx2, 0),
3355 GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_avx2, 0)
3356 };
3357 INSTANTIATE_TEST_SUITE_P(AVX2, GetSseSum8x8QuadTest,
3358 ::testing::ValuesIn(kArrayGetSseSum8x8Quad_avx2));
3359
3360 const GetSseSumParamsDual kArrayGetSseSum16x16Dual_avx2[] = {
3361 GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_avx2, 0),
3362 GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_avx2, 0),
3363 GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_avx2, 0),
3364 GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_avx2, 0)
3365 };
3366 INSTANTIATE_TEST_SUITE_P(AVX2, GetSseSum16x16DualTest,
3367 ::testing::ValuesIn(kArrayGetSseSum16x16Dual_avx2));
3368
3369 const SubpelVarianceParams kArraySubpelVariance_avx2[] = {
3370 SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_avx2, 0),
3371 SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_avx2, 0),
3372 SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_avx2, 0),
3373 SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_avx2, 0),
3374 SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_avx2, 0),
3375 SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_avx2, 0),
3376 SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_avx2, 0),
3377 SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_avx2, 0),
3378
3379 SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_avx2, 0),
3380 SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_avx2, 0),
3381 SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_avx2, 0),
3382 #if !CONFIG_REALTIME_ONLY
3383 SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_avx2, 0),
3384 SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_avx2, 0),
3385 #endif
3386 };
3387 INSTANTIATE_TEST_SUITE_P(AVX2, AvxSubpelVarianceTest,
3388 ::testing::ValuesIn(kArraySubpelVariance_avx2));
3389
3390 INSTANTIATE_TEST_SUITE_P(
3391 AVX2, AvxSubpelAvgVarianceTest,
3392 ::testing::Values(
3393 SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_avx2,
3394 0),
3395 SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_avx2,
3396 0),
3397 SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_avx2,
3398 0),
3399 SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_avx2, 0),
3400 SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_avx2, 0),
3401 SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_avx2, 0),
3402 SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_avx2, 0),
3403 SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_avx2,
3404 0)));
3405 #endif // HAVE_AVX2
3406
3407 #if HAVE_NEON
3408 INSTANTIATE_TEST_SUITE_P(
3409 NEON, MseWxHTest,
3410 ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_neon, 8),
3411 MseWxHParams(3, 2, &aom_mse_wxh_16bit_neon, 8),
3412 MseWxHParams(2, 3, &aom_mse_wxh_16bit_neon, 8),
3413 MseWxHParams(2, 2, &aom_mse_wxh_16bit_neon, 8)));
3414
3415 INSTANTIATE_TEST_SUITE_P(
3416 NEON, Mse16xHTest,
3417 ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_neon, 8),
3418 Mse16xHParams(3, 2, &aom_mse_16xh_16bit_neon, 8),
3419 Mse16xHParams(2, 3, &aom_mse_16xh_16bit_neon, 8),
3420 Mse16xHParams(2, 2, &aom_mse_16xh_16bit_neon, 8)));
3421
3422 #if !CONFIG_REALTIME_ONLY
3423 INSTANTIATE_TEST_SUITE_P(NEON, SumOfSquaresTest,
3424 ::testing::Values(aom_get_mb_ss_neon));
3425 #endif // !CONFIG_REALTIME_ONLY
3426
3427 INSTANTIATE_TEST_SUITE_P(NEON, AvxMseTest,
3428 ::testing::Values(MseParams(3, 3, &aom_mse8x8_neon),
3429 MseParams(3, 4, &aom_mse8x16_neon),
3430 MseParams(4, 4, &aom_mse16x16_neon),
3431 MseParams(4, 3, &aom_mse16x8_neon)));
3432
3433 const VarianceParams kArrayVariance_neon[] = {
3434 VarianceParams(7, 7, &aom_variance128x128_neon),
3435 VarianceParams(6, 6, &aom_variance64x64_neon),
3436 VarianceParams(7, 6, &aom_variance128x64_neon),
3437 VarianceParams(6, 7, &aom_variance64x128_neon),
3438 VarianceParams(6, 6, &aom_variance64x64_neon),
3439 VarianceParams(6, 5, &aom_variance64x32_neon),
3440 VarianceParams(5, 6, &aom_variance32x64_neon),
3441 VarianceParams(5, 5, &aom_variance32x32_neon),
3442 VarianceParams(5, 4, &aom_variance32x16_neon),
3443 VarianceParams(4, 5, &aom_variance16x32_neon),
3444 VarianceParams(4, 4, &aom_variance16x16_neon),
3445 VarianceParams(4, 3, &aom_variance16x8_neon),
3446 VarianceParams(3, 4, &aom_variance8x16_neon),
3447 VarianceParams(3, 3, &aom_variance8x8_neon),
3448 VarianceParams(3, 2, &aom_variance8x4_neon),
3449 VarianceParams(2, 3, &aom_variance4x8_neon),
3450 VarianceParams(2, 2, &aom_variance4x4_neon),
3451 #if !CONFIG_REALTIME_ONLY
3452 VarianceParams(2, 4, &aom_variance4x16_neon),
3453 VarianceParams(4, 2, &aom_variance16x4_neon),
3454 VarianceParams(3, 5, &aom_variance8x32_neon),
3455 VarianceParams(5, 3, &aom_variance32x8_neon),
3456 VarianceParams(4, 6, &aom_variance16x64_neon),
3457 VarianceParams(6, 4, &aom_variance64x16_neon),
3458 #endif
3459 };
3460
3461 INSTANTIATE_TEST_SUITE_P(NEON, AvxVarianceTest,
3462 ::testing::ValuesIn(kArrayVariance_neon));
3463
3464 const SubpelVarianceParams kArraySubpelVariance_neon[] = {
3465 SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_neon, 0),
3466 SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_neon, 0),
3467 SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_neon, 0),
3468 SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_neon, 0),
3469 SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_neon, 0),
3470 SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_neon, 0),
3471 SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_neon, 0),
3472 SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_neon, 0),
3473 SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_neon, 0),
3474 SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_neon, 0),
3475 SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_neon, 0),
3476 SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_neon, 0),
3477 SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_neon, 0),
3478 SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_neon, 0),
3479 SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_neon, 0),
3480 SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_neon, 0),
3481 #if !CONFIG_REALTIME_ONLY
3482 SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_neon, 0),
3483 SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_neon, 0),
3484 SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_neon, 0),
3485 SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_neon, 0),
3486 SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_neon, 0),
3487 SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_neon, 0),
3488 #endif
3489 };
3490 INSTANTIATE_TEST_SUITE_P(NEON, AvxSubpelVarianceTest,
3491 ::testing::ValuesIn(kArraySubpelVariance_neon));
3492
3493 const SubpelAvgVarianceParams kArraySubpelAvgVariance_neon[] = {
3494 SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_neon, 0),
3495 SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_neon, 0),
3496 SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_neon, 0),
3497 SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_neon, 0),
3498 SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_neon, 0),
3499 SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_neon, 0),
3500 SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_neon, 0),
3501 SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_neon, 0),
3502 SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_neon, 0),
3503 SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_neon, 0),
3504 SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_neon, 0),
3505 SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_neon, 0),
3506 SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_neon, 0),
3507 SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_neon, 0),
3508 SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_neon, 0),
3509 SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_neon, 0),
3510 #if !CONFIG_REALTIME_ONLY
3511 SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_neon, 0),
3512 SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_neon, 0),
3513 SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_neon, 0),
3514 SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_neon, 0),
3515 SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_neon, 0),
3516 SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_neon, 0),
3517 #endif
3518 };
3519 INSTANTIATE_TEST_SUITE_P(NEON, AvxSubpelAvgVarianceTest,
3520 ::testing::ValuesIn(kArraySubpelAvgVariance_neon));
3521
3522 const DistWtdSubpelAvgVarianceParams kArrayDistWtdSubpelAvgVariance_neon[] = {
3523 DistWtdSubpelAvgVarianceParams(
3524 6, 6, &aom_dist_wtd_sub_pixel_avg_variance64x64_neon, 0),
3525 DistWtdSubpelAvgVarianceParams(
3526 6, 5, &aom_dist_wtd_sub_pixel_avg_variance64x32_neon, 0),
3527 DistWtdSubpelAvgVarianceParams(
3528 5, 6, &aom_dist_wtd_sub_pixel_avg_variance32x64_neon, 0),
3529 DistWtdSubpelAvgVarianceParams(
3530 5, 5, &aom_dist_wtd_sub_pixel_avg_variance32x32_neon, 0),
3531 DistWtdSubpelAvgVarianceParams(
3532 5, 4, &aom_dist_wtd_sub_pixel_avg_variance32x16_neon, 0),
3533 DistWtdSubpelAvgVarianceParams(
3534 4, 5, &aom_dist_wtd_sub_pixel_avg_variance16x32_neon, 0),
3535 DistWtdSubpelAvgVarianceParams(
3536 4, 4, &aom_dist_wtd_sub_pixel_avg_variance16x16_neon, 0),
3537 DistWtdSubpelAvgVarianceParams(
3538 4, 3, &aom_dist_wtd_sub_pixel_avg_variance16x8_neon, 0),
3539 DistWtdSubpelAvgVarianceParams(
3540 3, 4, &aom_dist_wtd_sub_pixel_avg_variance8x16_neon, 0),
3541 DistWtdSubpelAvgVarianceParams(
3542 3, 3, &aom_dist_wtd_sub_pixel_avg_variance8x8_neon, 0),
3543 DistWtdSubpelAvgVarianceParams(
3544 3, 2, &aom_dist_wtd_sub_pixel_avg_variance8x4_neon, 0),
3545 DistWtdSubpelAvgVarianceParams(
3546 2, 3, &aom_dist_wtd_sub_pixel_avg_variance4x8_neon, 0),
3547 DistWtdSubpelAvgVarianceParams(
3548 2, 2, &aom_dist_wtd_sub_pixel_avg_variance4x4_neon, 0),
3549 #if !CONFIG_REALTIME_ONLY
3550 DistWtdSubpelAvgVarianceParams(
3551 6, 4, &aom_dist_wtd_sub_pixel_avg_variance64x16_neon, 0),
3552 DistWtdSubpelAvgVarianceParams(
3553 4, 6, &aom_dist_wtd_sub_pixel_avg_variance16x64_neon, 0),
3554 DistWtdSubpelAvgVarianceParams(
3555 5, 3, &aom_dist_wtd_sub_pixel_avg_variance32x8_neon, 0),
3556 DistWtdSubpelAvgVarianceParams(
3557 3, 5, &aom_dist_wtd_sub_pixel_avg_variance8x32_neon, 0),
3558 DistWtdSubpelAvgVarianceParams(
3559 4, 2, &aom_dist_wtd_sub_pixel_avg_variance16x4_neon, 0),
3560 DistWtdSubpelAvgVarianceParams(
3561 2, 4, &aom_dist_wtd_sub_pixel_avg_variance4x16_neon, 0),
3562 #endif // !CONFIG_REALTIME_ONLY
3563 };
3564 INSTANTIATE_TEST_SUITE_P(
3565 NEON, AvxDistWtdSubpelAvgVarianceTest,
3566 ::testing::ValuesIn(kArrayDistWtdSubpelAvgVariance_neon));
3567
3568 #if !CONFIG_REALTIME_ONLY
3569 const ObmcSubpelVarianceParams kArrayObmcSubpelVariance_neon[] = {
3570 ObmcSubpelVarianceParams(7, 7, &aom_obmc_sub_pixel_variance128x128_neon, 0),
3571 ObmcSubpelVarianceParams(7, 6, &aom_obmc_sub_pixel_variance128x64_neon, 0),
3572 ObmcSubpelVarianceParams(6, 7, &aom_obmc_sub_pixel_variance64x128_neon, 0),
3573 ObmcSubpelVarianceParams(6, 6, &aom_obmc_sub_pixel_variance64x64_neon, 0),
3574 ObmcSubpelVarianceParams(6, 5, &aom_obmc_sub_pixel_variance64x32_neon, 0),
3575 ObmcSubpelVarianceParams(5, 6, &aom_obmc_sub_pixel_variance32x64_neon, 0),
3576 ObmcSubpelVarianceParams(5, 5, &aom_obmc_sub_pixel_variance32x32_neon, 0),
3577 ObmcSubpelVarianceParams(5, 4, &aom_obmc_sub_pixel_variance32x16_neon, 0),
3578 ObmcSubpelVarianceParams(4, 5, &aom_obmc_sub_pixel_variance16x32_neon, 0),
3579 ObmcSubpelVarianceParams(4, 4, &aom_obmc_sub_pixel_variance16x16_neon, 0),
3580 ObmcSubpelVarianceParams(4, 3, &aom_obmc_sub_pixel_variance16x8_neon, 0),
3581 ObmcSubpelVarianceParams(3, 4, &aom_obmc_sub_pixel_variance8x16_neon, 0),
3582 ObmcSubpelVarianceParams(3, 3, &aom_obmc_sub_pixel_variance8x8_neon, 0),
3583 ObmcSubpelVarianceParams(3, 2, &aom_obmc_sub_pixel_variance8x4_neon, 0),
3584 ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_neon, 0),
3585 ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_neon, 0),
3586 ObmcSubpelVarianceParams(6, 4, &aom_obmc_sub_pixel_variance64x16_neon, 0),
3587 ObmcSubpelVarianceParams(4, 6, &aom_obmc_sub_pixel_variance16x64_neon, 0),
3588 ObmcSubpelVarianceParams(5, 3, &aom_obmc_sub_pixel_variance32x8_neon, 0),
3589 ObmcSubpelVarianceParams(3, 5, &aom_obmc_sub_pixel_variance8x32_neon, 0),
3590 ObmcSubpelVarianceParams(4, 2, &aom_obmc_sub_pixel_variance16x4_neon, 0),
3591 ObmcSubpelVarianceParams(2, 4, &aom_obmc_sub_pixel_variance4x16_neon, 0),
3592 };
3593 INSTANTIATE_TEST_SUITE_P(NEON, AvxObmcSubpelVarianceTest,
3594 ::testing::ValuesIn(kArrayObmcSubpelVariance_neon));
3595 #endif
3596
3597 const GetSseSumParams kArrayGetSseSum8x8Quad_neon[] = {
3598 GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_neon, 0),
3599 GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_neon, 0),
3600 GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_neon, 0),
3601 GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_neon, 0)
3602 };
3603 INSTANTIATE_TEST_SUITE_P(NEON, GetSseSum8x8QuadTest,
3604 ::testing::ValuesIn(kArrayGetSseSum8x8Quad_neon));
3605
3606 const GetSseSumParamsDual kArrayGetSseSum16x16Dual_neon[] = {
3607 GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_neon, 0),
3608 GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_neon, 0),
3609 GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_neon, 0),
3610 GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_neon, 0)
3611 };
3612 INSTANTIATE_TEST_SUITE_P(NEON, GetSseSum16x16DualTest,
3613 ::testing::ValuesIn(kArrayGetSseSum16x16Dual_neon));
3614
3615 #if CONFIG_AV1_HIGHBITDEPTH
3616 const VarianceParams kArrayHBDVariance_neon[] = {
3617 VarianceParams(7, 7, &aom_highbd_12_variance128x128_neon, 12),
3618 VarianceParams(7, 6, &aom_highbd_12_variance128x64_neon, 12),
3619 VarianceParams(6, 7, &aom_highbd_12_variance64x128_neon, 12),
3620 VarianceParams(6, 6, &aom_highbd_12_variance64x64_neon, 12),
3621 VarianceParams(6, 5, &aom_highbd_12_variance64x32_neon, 12),
3622 VarianceParams(5, 6, &aom_highbd_12_variance32x64_neon, 12),
3623 VarianceParams(5, 5, &aom_highbd_12_variance32x32_neon, 12),
3624 VarianceParams(5, 4, &aom_highbd_12_variance32x16_neon, 12),
3625 VarianceParams(4, 5, &aom_highbd_12_variance16x32_neon, 12),
3626 VarianceParams(4, 4, &aom_highbd_12_variance16x16_neon, 12),
3627 VarianceParams(4, 3, &aom_highbd_12_variance16x8_neon, 12),
3628 VarianceParams(3, 4, &aom_highbd_12_variance8x16_neon, 12),
3629 VarianceParams(3, 3, &aom_highbd_12_variance8x8_neon, 12),
3630 VarianceParams(3, 2, &aom_highbd_12_variance8x4_neon, 12),
3631 VarianceParams(2, 3, &aom_highbd_12_variance4x8_neon, 12),
3632 VarianceParams(2, 2, &aom_highbd_12_variance4x4_neon, 12),
3633 VarianceParams(7, 7, &aom_highbd_10_variance128x128_neon, 10),
3634 VarianceParams(7, 6, &aom_highbd_10_variance128x64_neon, 10),
3635 VarianceParams(6, 7, &aom_highbd_10_variance64x128_neon, 10),
3636 VarianceParams(6, 6, &aom_highbd_10_variance64x64_neon, 10),
3637 VarianceParams(6, 5, &aom_highbd_10_variance64x32_neon, 10),
3638 VarianceParams(5, 6, &aom_highbd_10_variance32x64_neon, 10),
3639 VarianceParams(5, 5, &aom_highbd_10_variance32x32_neon, 10),
3640 VarianceParams(5, 4, &aom_highbd_10_variance32x16_neon, 10),
3641 VarianceParams(4, 5, &aom_highbd_10_variance16x32_neon, 10),
3642 VarianceParams(4, 4, &aom_highbd_10_variance16x16_neon, 10),
3643 VarianceParams(4, 3, &aom_highbd_10_variance16x8_neon, 10),
3644 VarianceParams(3, 4, &aom_highbd_10_variance8x16_neon, 10),
3645 VarianceParams(3, 3, &aom_highbd_10_variance8x8_neon, 10),
3646 VarianceParams(3, 2, &aom_highbd_10_variance8x4_neon, 10),
3647 VarianceParams(2, 3, &aom_highbd_10_variance4x8_neon, 10),
3648 VarianceParams(2, 2, &aom_highbd_10_variance4x4_neon, 10),
3649 VarianceParams(7, 7, &aom_highbd_8_variance128x128_neon, 8),
3650 VarianceParams(7, 6, &aom_highbd_8_variance128x64_neon, 8),
3651 VarianceParams(6, 7, &aom_highbd_8_variance64x128_neon, 8),
3652 VarianceParams(6, 6, &aom_highbd_8_variance64x64_neon, 8),
3653 VarianceParams(6, 5, &aom_highbd_8_variance64x32_neon, 8),
3654 VarianceParams(5, 6, &aom_highbd_8_variance32x64_neon, 8),
3655 VarianceParams(5, 5, &aom_highbd_8_variance32x32_neon, 8),
3656 VarianceParams(5, 4, &aom_highbd_8_variance32x16_neon, 8),
3657 VarianceParams(4, 5, &aom_highbd_8_variance16x32_neon, 8),
3658 VarianceParams(4, 4, &aom_highbd_8_variance16x16_neon, 8),
3659 VarianceParams(4, 3, &aom_highbd_8_variance16x8_neon, 8),
3660 VarianceParams(3, 4, &aom_highbd_8_variance8x16_neon, 8),
3661 VarianceParams(3, 3, &aom_highbd_8_variance8x8_neon, 8),
3662 VarianceParams(3, 2, &aom_highbd_8_variance8x4_neon, 8),
3663 VarianceParams(2, 3, &aom_highbd_8_variance4x8_neon, 8),
3664 VarianceParams(2, 2, &aom_highbd_8_variance4x4_neon, 8),
3665 #if !CONFIG_REALTIME_ONLY
3666 VarianceParams(6, 4, &aom_highbd_12_variance64x16_neon, 12),
3667 VarianceParams(4, 6, &aom_highbd_12_variance16x64_neon, 12),
3668 VarianceParams(5, 3, &aom_highbd_12_variance32x8_neon, 12),
3669 VarianceParams(3, 5, &aom_highbd_12_variance8x32_neon, 12),
3670 VarianceParams(4, 2, &aom_highbd_12_variance16x4_neon, 12),
3671 VarianceParams(2, 4, &aom_highbd_12_variance4x16_neon, 12),
3672 VarianceParams(6, 4, &aom_highbd_10_variance64x16_neon, 10),
3673 VarianceParams(4, 6, &aom_highbd_10_variance16x64_neon, 10),
3674 VarianceParams(5, 3, &aom_highbd_10_variance32x8_neon, 10),
3675 VarianceParams(3, 5, &aom_highbd_10_variance8x32_neon, 10),
3676 VarianceParams(4, 2, &aom_highbd_10_variance16x4_neon, 10),
3677 VarianceParams(2, 4, &aom_highbd_10_variance4x16_neon, 10),
3678 VarianceParams(6, 4, &aom_highbd_8_variance64x16_neon, 8),
3679 VarianceParams(4, 6, &aom_highbd_8_variance16x64_neon, 8),
3680 VarianceParams(5, 3, &aom_highbd_8_variance32x8_neon, 8),
3681 VarianceParams(3, 5, &aom_highbd_8_variance8x32_neon, 8),
3682 VarianceParams(4, 2, &aom_highbd_8_variance16x4_neon, 8),
3683 VarianceParams(2, 4, &aom_highbd_8_variance4x16_neon, 8),
3684 #endif
3685 };
3686
3687 INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDVarianceTest,
3688 ::testing::ValuesIn(kArrayHBDVariance_neon));
3689
3690 const SubpelVarianceParams kArrayHBDSubpelVariance_neon[] = {
3691 SubpelVarianceParams(6, 6, &aom_highbd_12_sub_pixel_variance64x64_neon, 12),
3692 SubpelVarianceParams(6, 5, &aom_highbd_12_sub_pixel_variance64x32_neon, 12),
3693 SubpelVarianceParams(5, 6, &aom_highbd_12_sub_pixel_variance32x64_neon, 12),
3694 SubpelVarianceParams(5, 5, &aom_highbd_12_sub_pixel_variance32x32_neon, 12),
3695 SubpelVarianceParams(5, 4, &aom_highbd_12_sub_pixel_variance32x16_neon, 12),
3696 SubpelVarianceParams(4, 5, &aom_highbd_12_sub_pixel_variance16x32_neon, 12),
3697 SubpelVarianceParams(4, 4, &aom_highbd_12_sub_pixel_variance16x16_neon, 12),
3698 SubpelVarianceParams(4, 3, &aom_highbd_12_sub_pixel_variance16x8_neon, 12),
3699 SubpelVarianceParams(3, 4, &aom_highbd_12_sub_pixel_variance8x16_neon, 12),
3700 SubpelVarianceParams(3, 3, &aom_highbd_12_sub_pixel_variance8x8_neon, 12),
3701 SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_neon, 12),
3702 SubpelVarianceParams(2, 3, &aom_highbd_12_sub_pixel_variance4x8_neon, 12),
3703 SubpelVarianceParams(2, 2, &aom_highbd_12_sub_pixel_variance4x4_neon, 12),
3704 SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_neon, 10),
3705 SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_neon, 10),
3706 SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_neon, 10),
3707 SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_neon, 10),
3708 SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_neon, 10),
3709 SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_neon, 10),
3710 SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_neon, 10),
3711 SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_neon, 10),
3712 SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_neon, 10),
3713 SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_neon, 10),
3714 SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_neon, 10),
3715 SubpelVarianceParams(2, 3, &aom_highbd_10_sub_pixel_variance4x8_neon, 10),
3716 SubpelVarianceParams(2, 2, &aom_highbd_10_sub_pixel_variance4x4_neon, 10),
3717 SubpelVarianceParams(6, 6, &aom_highbd_8_sub_pixel_variance64x64_neon, 8),
3718 SubpelVarianceParams(6, 5, &aom_highbd_8_sub_pixel_variance64x32_neon, 8),
3719 SubpelVarianceParams(5, 6, &aom_highbd_8_sub_pixel_variance32x64_neon, 8),
3720 SubpelVarianceParams(5, 5, &aom_highbd_8_sub_pixel_variance32x32_neon, 8),
3721 SubpelVarianceParams(5, 4, &aom_highbd_8_sub_pixel_variance32x16_neon, 8),
3722 SubpelVarianceParams(4, 5, &aom_highbd_8_sub_pixel_variance16x32_neon, 8),
3723 SubpelVarianceParams(4, 4, &aom_highbd_8_sub_pixel_variance16x16_neon, 8),
3724 SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_neon, 8),
3725 SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_neon, 8),
3726 SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_neon, 8),
3727 SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_neon, 8),
3728 SubpelVarianceParams(2, 3, &aom_highbd_8_sub_pixel_variance4x8_neon, 8),
3729 SubpelVarianceParams(2, 2, &aom_highbd_8_sub_pixel_variance4x4_neon, 8),
3730 #if !CONFIG_REALTIME_ONLY
3731 SubpelVarianceParams(6, 4, &aom_highbd_8_sub_pixel_variance64x16_neon, 8),
3732 SubpelVarianceParams(4, 6, &aom_highbd_8_sub_pixel_variance16x64_neon, 8),
3733 SubpelVarianceParams(5, 3, &aom_highbd_8_sub_pixel_variance32x8_neon, 8),
3734 SubpelVarianceParams(3, 5, &aom_highbd_8_sub_pixel_variance8x32_neon, 8),
3735 SubpelVarianceParams(4, 2, &aom_highbd_8_sub_pixel_variance16x4_neon, 8),
3736 SubpelVarianceParams(2, 4, &aom_highbd_8_sub_pixel_variance4x16_neon, 8),
3737 SubpelVarianceParams(6, 4, &aom_highbd_10_sub_pixel_variance64x16_neon, 10),
3738 SubpelVarianceParams(4, 6, &aom_highbd_10_sub_pixel_variance16x64_neon, 10),
3739 SubpelVarianceParams(5, 3, &aom_highbd_10_sub_pixel_variance32x8_neon, 10),
3740 SubpelVarianceParams(3, 5, &aom_highbd_10_sub_pixel_variance8x32_neon, 10),
3741 SubpelVarianceParams(4, 2, &aom_highbd_10_sub_pixel_variance16x4_neon, 10),
3742 SubpelVarianceParams(2, 4, &aom_highbd_10_sub_pixel_variance4x16_neon, 10),
3743 SubpelVarianceParams(6, 4, &aom_highbd_12_sub_pixel_variance64x16_neon, 12),
3744 SubpelVarianceParams(4, 6, &aom_highbd_12_sub_pixel_variance16x64_neon, 12),
3745 SubpelVarianceParams(5, 3, &aom_highbd_12_sub_pixel_variance32x8_neon, 12),
3746 SubpelVarianceParams(3, 5, &aom_highbd_12_sub_pixel_variance8x32_neon, 12),
3747 SubpelVarianceParams(4, 2, &aom_highbd_12_sub_pixel_variance16x4_neon, 12),
3748 SubpelVarianceParams(2, 4, &aom_highbd_12_sub_pixel_variance4x16_neon, 12),
3749 #endif //! CONFIG_REALTIME_ONLY
3750 };
3751
3752 INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDSubpelVarianceTest,
3753 ::testing::ValuesIn(kArrayHBDSubpelVariance_neon));
3754
3755 const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_neon[] = {
3756 SubpelAvgVarianceParams(7, 7,
3757 &aom_highbd_8_sub_pixel_avg_variance128x128_neon, 8),
3758 SubpelAvgVarianceParams(7, 6, &aom_highbd_8_sub_pixel_avg_variance128x64_neon,
3759 8),
3760 SubpelAvgVarianceParams(6, 7, &aom_highbd_8_sub_pixel_avg_variance64x128_neon,
3761 8),
3762 SubpelAvgVarianceParams(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_neon,
3763 8),
3764 SubpelAvgVarianceParams(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_neon,
3765 8),
3766 SubpelAvgVarianceParams(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_neon,
3767 8),
3768 SubpelAvgVarianceParams(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_neon,
3769 8),
3770 SubpelAvgVarianceParams(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_neon,
3771 8),
3772 SubpelAvgVarianceParams(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_neon,
3773 8),
3774 SubpelAvgVarianceParams(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_neon,
3775 8),
3776 SubpelAvgVarianceParams(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_neon,
3777 8),
3778 SubpelAvgVarianceParams(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_neon,
3779 8),
3780 SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_neon,
3781 8),
3782 SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_neon,
3783 8),
3784 SubpelAvgVarianceParams(2, 3, &aom_highbd_8_sub_pixel_avg_variance4x8_neon,
3785 8),
3786 SubpelAvgVarianceParams(2, 2, &aom_highbd_8_sub_pixel_avg_variance4x4_neon,
3787 8),
3788 SubpelAvgVarianceParams(
3789 7, 7, &aom_highbd_10_sub_pixel_avg_variance128x128_neon, 10),
3790 SubpelAvgVarianceParams(7, 6,
3791 &aom_highbd_10_sub_pixel_avg_variance128x64_neon, 10),
3792 SubpelAvgVarianceParams(6, 7,
3793 &aom_highbd_10_sub_pixel_avg_variance64x128_neon, 10),
3794 SubpelAvgVarianceParams(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_neon,
3795 10),
3796 SubpelAvgVarianceParams(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_neon,
3797 10),
3798 SubpelAvgVarianceParams(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_neon,
3799 10),
3800 SubpelAvgVarianceParams(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_neon,
3801 10),
3802 SubpelAvgVarianceParams(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_neon,
3803 10),
3804 SubpelAvgVarianceParams(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_neon,
3805 10),
3806 SubpelAvgVarianceParams(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_neon,
3807 10),
3808 SubpelAvgVarianceParams(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_neon,
3809 10),
3810 SubpelAvgVarianceParams(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_neon,
3811 10),
3812 SubpelAvgVarianceParams(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_neon,
3813 10),
3814 SubpelAvgVarianceParams(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_neon,
3815 10),
3816 SubpelAvgVarianceParams(2, 3, &aom_highbd_10_sub_pixel_avg_variance4x8_neon,
3817 10),
3818 SubpelAvgVarianceParams(2, 2, &aom_highbd_10_sub_pixel_avg_variance4x4_neon,
3819 10),
3820 SubpelAvgVarianceParams(
3821 7, 7, &aom_highbd_12_sub_pixel_avg_variance128x128_neon, 12),
3822 SubpelAvgVarianceParams(7, 6,
3823 &aom_highbd_12_sub_pixel_avg_variance128x64_neon, 12),
3824 SubpelAvgVarianceParams(6, 7,
3825 &aom_highbd_12_sub_pixel_avg_variance64x128_neon, 12),
3826 SubpelAvgVarianceParams(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_neon,
3827 12),
3828 SubpelAvgVarianceParams(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_neon,
3829 12),
3830 SubpelAvgVarianceParams(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_neon,
3831 12),
3832 SubpelAvgVarianceParams(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_neon,
3833 12),
3834 SubpelAvgVarianceParams(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_neon,
3835 12),
3836 SubpelAvgVarianceParams(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_neon,
3837 12),
3838 SubpelAvgVarianceParams(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_neon,
3839 12),
3840 SubpelAvgVarianceParams(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_neon,
3841 12),
3842 SubpelAvgVarianceParams(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_neon,
3843 12),
3844 SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_neon,
3845 12),
3846 SubpelAvgVarianceParams(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_neon,
3847 12),
3848 SubpelAvgVarianceParams(2, 3, &aom_highbd_12_sub_pixel_avg_variance4x8_neon,
3849 12),
3850 SubpelAvgVarianceParams(2, 2, &aom_highbd_12_sub_pixel_avg_variance4x4_neon,
3851 12),
3852
3853 #if !CONFIG_REALTIME_ONLY
3854 SubpelAvgVarianceParams(6, 4, &aom_highbd_8_sub_pixel_avg_variance64x16_neon,
3855 8),
3856 SubpelAvgVarianceParams(4, 6, &aom_highbd_8_sub_pixel_avg_variance16x64_neon,
3857 8),
3858 SubpelAvgVarianceParams(5, 3, &aom_highbd_8_sub_pixel_avg_variance32x8_neon,
3859 8),
3860 SubpelAvgVarianceParams(3, 5, &aom_highbd_8_sub_pixel_avg_variance8x32_neon,
3861 8),
3862 SubpelAvgVarianceParams(4, 2, &aom_highbd_8_sub_pixel_avg_variance16x4_neon,
3863 8),
3864 SubpelAvgVarianceParams(2, 4, &aom_highbd_8_sub_pixel_avg_variance4x16_neon,
3865 8),
3866 SubpelAvgVarianceParams(6, 4, &aom_highbd_10_sub_pixel_avg_variance64x16_neon,
3867 10),
3868 SubpelAvgVarianceParams(4, 6, &aom_highbd_10_sub_pixel_avg_variance16x64_neon,
3869 10),
3870 SubpelAvgVarianceParams(5, 3, &aom_highbd_10_sub_pixel_avg_variance32x8_neon,
3871 10),
3872 SubpelAvgVarianceParams(3, 5, &aom_highbd_10_sub_pixel_avg_variance8x32_neon,
3873 10),
3874 SubpelAvgVarianceParams(4, 2, &aom_highbd_10_sub_pixel_avg_variance16x4_neon,
3875 10),
3876 SubpelAvgVarianceParams(2, 4, &aom_highbd_10_sub_pixel_avg_variance4x16_neon,
3877 10),
3878 SubpelAvgVarianceParams(6, 4, &aom_highbd_12_sub_pixel_avg_variance64x16_neon,
3879 12),
3880 SubpelAvgVarianceParams(4, 6, &aom_highbd_12_sub_pixel_avg_variance16x64_neon,
3881 12),
3882 SubpelAvgVarianceParams(5, 3, &aom_highbd_12_sub_pixel_avg_variance32x8_neon,
3883 12),
3884 SubpelAvgVarianceParams(3, 5, &aom_highbd_12_sub_pixel_avg_variance8x32_neon,
3885 12),
3886 SubpelAvgVarianceParams(4, 2, &aom_highbd_12_sub_pixel_avg_variance16x4_neon,
3887 12),
3888 SubpelAvgVarianceParams(2, 4, &aom_highbd_12_sub_pixel_avg_variance4x16_neon,
3889 12),
3890 #endif
3891 };
3892
3893 INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDSubpelAvgVarianceTest,
3894 ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_neon));
3895
3896 const DistWtdSubpelAvgVarianceParams
3897 kArrayHBDDistWtdSubpelAvgVariance_neon[] = {
3898 DistWtdSubpelAvgVarianceParams(
3899 7, 7, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance128x128_neon, 8),
3900 DistWtdSubpelAvgVarianceParams(
3901 7, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance128x64_neon, 8),
3902 DistWtdSubpelAvgVarianceParams(
3903 6, 7, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x128_neon, 8),
3904 DistWtdSubpelAvgVarianceParams(
3905 6, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x64_neon, 8),
3906 DistWtdSubpelAvgVarianceParams(
3907 6, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x32_neon, 8),
3908 DistWtdSubpelAvgVarianceParams(
3909 5, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x64_neon, 8),
3910 DistWtdSubpelAvgVarianceParams(
3911 5, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x32_neon, 8),
3912 DistWtdSubpelAvgVarianceParams(
3913 5, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x16_neon, 8),
3914 DistWtdSubpelAvgVarianceParams(
3915 4, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x32_neon, 8),
3916 DistWtdSubpelAvgVarianceParams(
3917 4, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x16_neon, 8),
3918 DistWtdSubpelAvgVarianceParams(
3919 4, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x8_neon, 8),
3920 DistWtdSubpelAvgVarianceParams(
3921 3, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x16_neon, 8),
3922 DistWtdSubpelAvgVarianceParams(
3923 3, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x8_neon, 8),
3924 DistWtdSubpelAvgVarianceParams(
3925 3, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x4_neon, 8),
3926 DistWtdSubpelAvgVarianceParams(
3927 2, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x8_neon, 8),
3928 DistWtdSubpelAvgVarianceParams(
3929 2, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x4_neon, 8),
3930 DistWtdSubpelAvgVarianceParams(
3931 7, 7, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance128x128_neon, 10),
3932 DistWtdSubpelAvgVarianceParams(
3933 7, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance128x64_neon, 10),
3934 DistWtdSubpelAvgVarianceParams(
3935 6, 7, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x128_neon, 10),
3936 DistWtdSubpelAvgVarianceParams(
3937 6, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x64_neon, 10),
3938 DistWtdSubpelAvgVarianceParams(
3939 6, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x32_neon, 10),
3940 DistWtdSubpelAvgVarianceParams(
3941 5, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x64_neon, 10),
3942 DistWtdSubpelAvgVarianceParams(
3943 5, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x32_neon, 10),
3944 DistWtdSubpelAvgVarianceParams(
3945 5, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x16_neon, 10),
3946 DistWtdSubpelAvgVarianceParams(
3947 4, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x32_neon, 10),
3948 DistWtdSubpelAvgVarianceParams(
3949 4, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x16_neon, 10),
3950 DistWtdSubpelAvgVarianceParams(
3951 4, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x8_neon, 10),
3952 DistWtdSubpelAvgVarianceParams(
3953 3, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x16_neon, 10),
3954 DistWtdSubpelAvgVarianceParams(
3955 3, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x8_neon, 10),
3956 DistWtdSubpelAvgVarianceParams(
3957 3, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x4_neon, 10),
3958 DistWtdSubpelAvgVarianceParams(
3959 2, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x8_neon, 10),
3960 DistWtdSubpelAvgVarianceParams(
3961 2, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x4_neon, 10),
3962 DistWtdSubpelAvgVarianceParams(
3963 7, 7, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance128x128_neon, 12),
3964 DistWtdSubpelAvgVarianceParams(
3965 7, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance128x64_neon, 12),
3966 DistWtdSubpelAvgVarianceParams(
3967 6, 7, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x128_neon, 12),
3968 DistWtdSubpelAvgVarianceParams(
3969 6, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x64_neon, 12),
3970 DistWtdSubpelAvgVarianceParams(
3971 6, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x32_neon, 12),
3972 DistWtdSubpelAvgVarianceParams(
3973 5, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x64_neon, 12),
3974 DistWtdSubpelAvgVarianceParams(
3975 5, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x32_neon, 12),
3976 DistWtdSubpelAvgVarianceParams(
3977 5, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x16_neon, 12),
3978 DistWtdSubpelAvgVarianceParams(
3979 4, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x32_neon, 12),
3980 DistWtdSubpelAvgVarianceParams(
3981 4, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x16_neon, 12),
3982 DistWtdSubpelAvgVarianceParams(
3983 4, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x8_neon, 12),
3984 DistWtdSubpelAvgVarianceParams(
3985 3, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x16_neon, 12),
3986 DistWtdSubpelAvgVarianceParams(
3987 3, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x8_neon, 12),
3988 DistWtdSubpelAvgVarianceParams(
3989 3, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x4_neon, 12),
3990 DistWtdSubpelAvgVarianceParams(
3991 2, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x8_neon, 12),
3992 DistWtdSubpelAvgVarianceParams(
3993 2, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x4_neon, 12),
3994
3995 #if !CONFIG_REALTIME_ONLY
3996 DistWtdSubpelAvgVarianceParams(
3997 6, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x16_neon, 8),
3998 DistWtdSubpelAvgVarianceParams(
3999 4, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x64_neon, 8),
4000 DistWtdSubpelAvgVarianceParams(
4001 5, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x8_neon, 8),
4002 DistWtdSubpelAvgVarianceParams(
4003 3, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x32_neon, 8),
4004 DistWtdSubpelAvgVarianceParams(
4005 4, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x4_neon, 8),
4006 DistWtdSubpelAvgVarianceParams(
4007 2, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x16_neon, 8),
4008 DistWtdSubpelAvgVarianceParams(
4009 6, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x16_neon, 10),
4010 DistWtdSubpelAvgVarianceParams(
4011 4, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x64_neon, 10),
4012 DistWtdSubpelAvgVarianceParams(
4013 5, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x8_neon, 10),
4014 DistWtdSubpelAvgVarianceParams(
4015 3, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x32_neon, 10),
4016 DistWtdSubpelAvgVarianceParams(
4017 4, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x4_neon, 10),
4018 DistWtdSubpelAvgVarianceParams(
4019 2, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x16_neon, 10),
4020 DistWtdSubpelAvgVarianceParams(
4021 6, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x16_neon, 12),
4022 DistWtdSubpelAvgVarianceParams(
4023 4, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x64_neon, 12),
4024 DistWtdSubpelAvgVarianceParams(
4025 5, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x8_neon, 12),
4026 DistWtdSubpelAvgVarianceParams(
4027 3, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x32_neon, 12),
4028 DistWtdSubpelAvgVarianceParams(
4029 4, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x4_neon, 12),
4030 DistWtdSubpelAvgVarianceParams(
4031 2, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x16_neon, 12),
4032 #endif // !CONFIG_REALTIME_ONLY
4033 };
4034 INSTANTIATE_TEST_SUITE_P(
4035 NEON, AvxHBDDistWtdSubpelAvgVarianceTest,
4036 ::testing::ValuesIn(kArrayHBDDistWtdSubpelAvgVariance_neon));
4037
4038 #if !CONFIG_REALTIME_ONLY
4039 const ObmcSubpelVarianceParams kArrayHBDObmcSubpelVariance_neon[] = {
4040 ObmcSubpelVarianceParams(
4041 7, 7, &aom_highbd_12_obmc_sub_pixel_variance128x128_neon, 12),
4042 ObmcSubpelVarianceParams(
4043 7, 6, &aom_highbd_12_obmc_sub_pixel_variance128x64_neon, 12),
4044 ObmcSubpelVarianceParams(
4045 6, 7, &aom_highbd_12_obmc_sub_pixel_variance64x128_neon, 12),
4046 ObmcSubpelVarianceParams(
4047 6, 6, &aom_highbd_12_obmc_sub_pixel_variance64x64_neon, 12),
4048 ObmcSubpelVarianceParams(
4049 6, 5, &aom_highbd_12_obmc_sub_pixel_variance64x32_neon, 12),
4050 ObmcSubpelVarianceParams(
4051 5, 6, &aom_highbd_12_obmc_sub_pixel_variance32x64_neon, 12),
4052 ObmcSubpelVarianceParams(
4053 5, 5, &aom_highbd_12_obmc_sub_pixel_variance32x32_neon, 12),
4054 ObmcSubpelVarianceParams(
4055 5, 4, &aom_highbd_12_obmc_sub_pixel_variance32x16_neon, 12),
4056 ObmcSubpelVarianceParams(
4057 4, 5, &aom_highbd_12_obmc_sub_pixel_variance16x32_neon, 12),
4058 ObmcSubpelVarianceParams(
4059 4, 4, &aom_highbd_12_obmc_sub_pixel_variance16x16_neon, 12),
4060 ObmcSubpelVarianceParams(4, 3,
4061 &aom_highbd_12_obmc_sub_pixel_variance16x8_neon, 12),
4062 ObmcSubpelVarianceParams(3, 4,
4063 &aom_highbd_12_obmc_sub_pixel_variance8x16_neon, 12),
4064 ObmcSubpelVarianceParams(3, 3, &aom_highbd_12_obmc_sub_pixel_variance8x8_neon,
4065 12),
4066 ObmcSubpelVarianceParams(3, 2, &aom_highbd_12_obmc_sub_pixel_variance8x4_neon,
4067 12),
4068 ObmcSubpelVarianceParams(2, 3, &aom_highbd_12_obmc_sub_pixel_variance4x8_neon,
4069 12),
4070 ObmcSubpelVarianceParams(2, 2, &aom_highbd_12_obmc_sub_pixel_variance4x4_neon,
4071 12),
4072 ObmcSubpelVarianceParams(
4073 6, 4, &aom_highbd_12_obmc_sub_pixel_variance64x16_neon, 12),
4074 ObmcSubpelVarianceParams(
4075 4, 6, &aom_highbd_12_obmc_sub_pixel_variance16x64_neon, 12),
4076 ObmcSubpelVarianceParams(5, 3,
4077 &aom_highbd_12_obmc_sub_pixel_variance32x8_neon, 12),
4078 ObmcSubpelVarianceParams(3, 5,
4079 &aom_highbd_12_obmc_sub_pixel_variance8x32_neon, 12),
4080 ObmcSubpelVarianceParams(4, 2,
4081 &aom_highbd_12_obmc_sub_pixel_variance16x4_neon, 12),
4082 ObmcSubpelVarianceParams(2, 4,
4083 &aom_highbd_12_obmc_sub_pixel_variance4x16_neon, 12),
4084 ObmcSubpelVarianceParams(
4085 7, 7, &aom_highbd_10_obmc_sub_pixel_variance128x128_neon, 10),
4086 ObmcSubpelVarianceParams(
4087 7, 6, &aom_highbd_10_obmc_sub_pixel_variance128x64_neon, 10),
4088 ObmcSubpelVarianceParams(
4089 6, 7, &aom_highbd_10_obmc_sub_pixel_variance64x128_neon, 10),
4090 ObmcSubpelVarianceParams(
4091 6, 6, &aom_highbd_10_obmc_sub_pixel_variance64x64_neon, 10),
4092 ObmcSubpelVarianceParams(
4093 6, 5, &aom_highbd_10_obmc_sub_pixel_variance64x32_neon, 10),
4094 ObmcSubpelVarianceParams(
4095 5, 6, &aom_highbd_10_obmc_sub_pixel_variance32x64_neon, 10),
4096 ObmcSubpelVarianceParams(
4097 5, 5, &aom_highbd_10_obmc_sub_pixel_variance32x32_neon, 10),
4098 ObmcSubpelVarianceParams(
4099 5, 4, &aom_highbd_10_obmc_sub_pixel_variance32x16_neon, 10),
4100 ObmcSubpelVarianceParams(
4101 4, 5, &aom_highbd_10_obmc_sub_pixel_variance16x32_neon, 10),
4102 ObmcSubpelVarianceParams(
4103 4, 4, &aom_highbd_10_obmc_sub_pixel_variance16x16_neon, 10),
4104 ObmcSubpelVarianceParams(4, 3,
4105 &aom_highbd_10_obmc_sub_pixel_variance16x8_neon, 10),
4106 ObmcSubpelVarianceParams(3, 4,
4107 &aom_highbd_10_obmc_sub_pixel_variance8x16_neon, 10),
4108 ObmcSubpelVarianceParams(3, 3, &aom_highbd_10_obmc_sub_pixel_variance8x8_neon,
4109 10),
4110 ObmcSubpelVarianceParams(3, 2, &aom_highbd_10_obmc_sub_pixel_variance8x4_neon,
4111 10),
4112 ObmcSubpelVarianceParams(2, 3, &aom_highbd_10_obmc_sub_pixel_variance4x8_neon,
4113 10),
4114 ObmcSubpelVarianceParams(2, 2, &aom_highbd_10_obmc_sub_pixel_variance4x4_neon,
4115 10),
4116 ObmcSubpelVarianceParams(
4117 6, 4, &aom_highbd_10_obmc_sub_pixel_variance64x16_neon, 10),
4118 ObmcSubpelVarianceParams(
4119 4, 6, &aom_highbd_10_obmc_sub_pixel_variance16x64_neon, 10),
4120 ObmcSubpelVarianceParams(5, 3,
4121 &aom_highbd_10_obmc_sub_pixel_variance32x8_neon, 10),
4122 ObmcSubpelVarianceParams(3, 5,
4123 &aom_highbd_10_obmc_sub_pixel_variance8x32_neon, 10),
4124 ObmcSubpelVarianceParams(4, 2,
4125 &aom_highbd_10_obmc_sub_pixel_variance16x4_neon, 10),
4126 ObmcSubpelVarianceParams(2, 4,
4127 &aom_highbd_10_obmc_sub_pixel_variance4x16_neon, 10),
4128 ObmcSubpelVarianceParams(
4129 7, 7, &aom_highbd_8_obmc_sub_pixel_variance128x128_neon, 8),
4130 ObmcSubpelVarianceParams(7, 6,
4131 &aom_highbd_8_obmc_sub_pixel_variance128x64_neon, 8),
4132 ObmcSubpelVarianceParams(6, 7,
4133 &aom_highbd_8_obmc_sub_pixel_variance64x128_neon, 8),
4134 ObmcSubpelVarianceParams(6, 6,
4135 &aom_highbd_8_obmc_sub_pixel_variance64x64_neon, 8),
4136 ObmcSubpelVarianceParams(6, 5,
4137 &aom_highbd_8_obmc_sub_pixel_variance64x32_neon, 8),
4138 ObmcSubpelVarianceParams(5, 6,
4139 &aom_highbd_8_obmc_sub_pixel_variance32x64_neon, 8),
4140 ObmcSubpelVarianceParams(5, 5,
4141 &aom_highbd_8_obmc_sub_pixel_variance32x32_neon, 8),
4142 ObmcSubpelVarianceParams(5, 4,
4143 &aom_highbd_8_obmc_sub_pixel_variance32x16_neon, 8),
4144 ObmcSubpelVarianceParams(4, 5,
4145 &aom_highbd_8_obmc_sub_pixel_variance16x32_neon, 8),
4146 ObmcSubpelVarianceParams(4, 4,
4147 &aom_highbd_8_obmc_sub_pixel_variance16x16_neon, 8),
4148 ObmcSubpelVarianceParams(4, 3, &aom_highbd_8_obmc_sub_pixel_variance16x8_neon,
4149 8),
4150 ObmcSubpelVarianceParams(3, 4, &aom_highbd_8_obmc_sub_pixel_variance8x16_neon,
4151 8),
4152 ObmcSubpelVarianceParams(3, 3, &aom_highbd_8_obmc_sub_pixel_variance8x8_neon,
4153 8),
4154 ObmcSubpelVarianceParams(3, 2, &aom_highbd_8_obmc_sub_pixel_variance8x4_neon,
4155 8),
4156 ObmcSubpelVarianceParams(2, 3, &aom_highbd_8_obmc_sub_pixel_variance4x8_neon,
4157 8),
4158 ObmcSubpelVarianceParams(2, 2, &aom_highbd_8_obmc_sub_pixel_variance4x4_neon,
4159 8),
4160 ObmcSubpelVarianceParams(6, 4,
4161 &aom_highbd_8_obmc_sub_pixel_variance64x16_neon, 8),
4162 ObmcSubpelVarianceParams(4, 6,
4163 &aom_highbd_8_obmc_sub_pixel_variance16x64_neon, 8),
4164 ObmcSubpelVarianceParams(5, 3, &aom_highbd_8_obmc_sub_pixel_variance32x8_neon,
4165 8),
4166 ObmcSubpelVarianceParams(3, 5, &aom_highbd_8_obmc_sub_pixel_variance8x32_neon,
4167 8),
4168 ObmcSubpelVarianceParams(4, 2, &aom_highbd_8_obmc_sub_pixel_variance16x4_neon,
4169 8),
4170 ObmcSubpelVarianceParams(2, 4, &aom_highbd_8_obmc_sub_pixel_variance4x16_neon,
4171 8),
4172 };
4173
4174 INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDObmcSubpelVarianceTest,
4175 ::testing::ValuesIn(kArrayHBDObmcSubpelVariance_neon));
4176 #endif // !CONFIG_REALTIME_ONLY
4177
4178 #endif // CONFIG_AV1_HIGHBITDEPTH
4179
4180 #endif // HAVE_NEON
4181
4182 #if HAVE_NEON_DOTPROD
4183
4184 const VarianceParams kArrayVariance_neon_dotprod[] = {
4185 VarianceParams(7, 7, &aom_variance128x128_neon_dotprod),
4186 VarianceParams(6, 6, &aom_variance64x64_neon_dotprod),
4187 VarianceParams(7, 6, &aom_variance128x64_neon_dotprod),
4188 VarianceParams(6, 7, &aom_variance64x128_neon_dotprod),
4189 VarianceParams(6, 6, &aom_variance64x64_neon_dotprod),
4190 VarianceParams(6, 5, &aom_variance64x32_neon_dotprod),
4191 VarianceParams(5, 6, &aom_variance32x64_neon_dotprod),
4192 VarianceParams(5, 5, &aom_variance32x32_neon_dotprod),
4193 VarianceParams(5, 4, &aom_variance32x16_neon_dotprod),
4194 VarianceParams(4, 5, &aom_variance16x32_neon_dotprod),
4195 VarianceParams(4, 4, &aom_variance16x16_neon_dotprod),
4196 VarianceParams(4, 3, &aom_variance16x8_neon_dotprod),
4197 VarianceParams(3, 4, &aom_variance8x16_neon_dotprod),
4198 VarianceParams(3, 3, &aom_variance8x8_neon_dotprod),
4199 VarianceParams(3, 2, &aom_variance8x4_neon_dotprod),
4200 VarianceParams(2, 3, &aom_variance4x8_neon_dotprod),
4201 VarianceParams(2, 2, &aom_variance4x4_neon_dotprod),
4202 #if !CONFIG_REALTIME_ONLY
4203 VarianceParams(2, 4, &aom_variance4x16_neon_dotprod),
4204 VarianceParams(4, 2, &aom_variance16x4_neon_dotprod),
4205 VarianceParams(3, 5, &aom_variance8x32_neon_dotprod),
4206 VarianceParams(5, 3, &aom_variance32x8_neon_dotprod),
4207 VarianceParams(4, 6, &aom_variance16x64_neon_dotprod),
4208 VarianceParams(6, 4, &aom_variance64x16_neon_dotprod),
4209 #endif
4210 };
4211
4212 INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, AvxVarianceTest,
4213 ::testing::ValuesIn(kArrayVariance_neon_dotprod));
4214
4215 const GetSseSumParams kArrayGetSseSum8x8Quad_neon_dotprod[] = {
4216 GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0),
4217 GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0),
4218 GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0),
4219 GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0)
4220 };
4221 INSTANTIATE_TEST_SUITE_P(
4222 NEON_DOTPROD, GetSseSum8x8QuadTest,
4223 ::testing::ValuesIn(kArrayGetSseSum8x8Quad_neon_dotprod));
4224
4225 const GetSseSumParamsDual kArrayGetSseSum16x16Dual_neon_dotprod[] = {
4226 GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0),
4227 GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0),
4228 GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0),
4229 GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0)
4230 };
4231 INSTANTIATE_TEST_SUITE_P(
4232 NEON_DOTPROD, GetSseSum16x16DualTest,
4233 ::testing::ValuesIn(kArrayGetSseSum16x16Dual_neon_dotprod));
4234
4235 INSTANTIATE_TEST_SUITE_P(
4236 NEON_DOTPROD, AvxMseTest,
4237 ::testing::Values(MseParams(3, 3, &aom_mse8x8_neon_dotprod),
4238 MseParams(3, 4, &aom_mse8x16_neon_dotprod),
4239 MseParams(4, 4, &aom_mse16x16_neon_dotprod),
4240 MseParams(4, 3, &aom_mse16x8_neon_dotprod)));
4241
4242 #endif // HAVE_NEON_DOTPROD
4243
4244 #if HAVE_SVE
4245
4246 #if CONFIG_AV1_HIGHBITDEPTH
4247 const VarianceParams kArrayHBDVariance_sve[] = {
4248 VarianceParams(7, 7, &aom_highbd_12_variance128x128_sve, 12),
4249 VarianceParams(7, 6, &aom_highbd_12_variance128x64_sve, 12),
4250 VarianceParams(6, 7, &aom_highbd_12_variance64x128_sve, 12),
4251 VarianceParams(6, 6, &aom_highbd_12_variance64x64_sve, 12),
4252 VarianceParams(6, 5, &aom_highbd_12_variance64x32_sve, 12),
4253 VarianceParams(5, 6, &aom_highbd_12_variance32x64_sve, 12),
4254 VarianceParams(5, 5, &aom_highbd_12_variance32x32_sve, 12),
4255 VarianceParams(5, 4, &aom_highbd_12_variance32x16_sve, 12),
4256 VarianceParams(4, 5, &aom_highbd_12_variance16x32_sve, 12),
4257 VarianceParams(4, 4, &aom_highbd_12_variance16x16_sve, 12),
4258 VarianceParams(4, 3, &aom_highbd_12_variance16x8_sve, 12),
4259 VarianceParams(3, 4, &aom_highbd_12_variance8x16_sve, 12),
4260 VarianceParams(3, 3, &aom_highbd_12_variance8x8_sve, 12),
4261 VarianceParams(3, 2, &aom_highbd_12_variance8x4_sve, 12),
4262 VarianceParams(2, 3, &aom_highbd_12_variance4x8_sve, 12),
4263 VarianceParams(2, 2, &aom_highbd_12_variance4x4_sve, 12),
4264 VarianceParams(7, 7, &aom_highbd_10_variance128x128_sve, 10),
4265 VarianceParams(7, 6, &aom_highbd_10_variance128x64_sve, 10),
4266 VarianceParams(6, 7, &aom_highbd_10_variance64x128_sve, 10),
4267 VarianceParams(6, 6, &aom_highbd_10_variance64x64_sve, 10),
4268 VarianceParams(6, 5, &aom_highbd_10_variance64x32_sve, 10),
4269 VarianceParams(5, 6, &aom_highbd_10_variance32x64_sve, 10),
4270 VarianceParams(5, 5, &aom_highbd_10_variance32x32_sve, 10),
4271 VarianceParams(5, 4, &aom_highbd_10_variance32x16_sve, 10),
4272 VarianceParams(4, 5, &aom_highbd_10_variance16x32_sve, 10),
4273 VarianceParams(4, 4, &aom_highbd_10_variance16x16_sve, 10),
4274 VarianceParams(4, 3, &aom_highbd_10_variance16x8_sve, 10),
4275 VarianceParams(3, 4, &aom_highbd_10_variance8x16_sve, 10),
4276 VarianceParams(3, 3, &aom_highbd_10_variance8x8_sve, 10),
4277 VarianceParams(3, 2, &aom_highbd_10_variance8x4_sve, 10),
4278 VarianceParams(2, 3, &aom_highbd_10_variance4x8_sve, 10),
4279 VarianceParams(2, 2, &aom_highbd_10_variance4x4_sve, 10),
4280 VarianceParams(7, 7, &aom_highbd_8_variance128x128_sve, 8),
4281 VarianceParams(7, 6, &aom_highbd_8_variance128x64_sve, 8),
4282 VarianceParams(6, 7, &aom_highbd_8_variance64x128_sve, 8),
4283 VarianceParams(6, 6, &aom_highbd_8_variance64x64_sve, 8),
4284 VarianceParams(6, 5, &aom_highbd_8_variance64x32_sve, 8),
4285 VarianceParams(5, 6, &aom_highbd_8_variance32x64_sve, 8),
4286 VarianceParams(5, 5, &aom_highbd_8_variance32x32_sve, 8),
4287 VarianceParams(5, 4, &aom_highbd_8_variance32x16_sve, 8),
4288 VarianceParams(4, 5, &aom_highbd_8_variance16x32_sve, 8),
4289 VarianceParams(4, 4, &aom_highbd_8_variance16x16_sve, 8),
4290 VarianceParams(4, 3, &aom_highbd_8_variance16x8_sve, 8),
4291 VarianceParams(3, 4, &aom_highbd_8_variance8x16_sve, 8),
4292 VarianceParams(3, 3, &aom_highbd_8_variance8x8_sve, 8),
4293 VarianceParams(3, 2, &aom_highbd_8_variance8x4_sve, 8),
4294 VarianceParams(2, 3, &aom_highbd_8_variance4x8_sve, 8),
4295 VarianceParams(2, 2, &aom_highbd_8_variance4x4_sve, 8),
4296 #if !CONFIG_REALTIME_ONLY
4297 VarianceParams(6, 4, &aom_highbd_12_variance64x16_sve, 12),
4298 VarianceParams(4, 6, &aom_highbd_12_variance16x64_sve, 12),
4299 VarianceParams(5, 3, &aom_highbd_12_variance32x8_sve, 12),
4300 VarianceParams(3, 5, &aom_highbd_12_variance8x32_sve, 12),
4301 VarianceParams(4, 2, &aom_highbd_12_variance16x4_sve, 12),
4302 VarianceParams(2, 4, &aom_highbd_12_variance4x16_sve, 12),
4303 VarianceParams(6, 4, &aom_highbd_10_variance64x16_sve, 10),
4304 VarianceParams(4, 6, &aom_highbd_10_variance16x64_sve, 10),
4305 VarianceParams(5, 3, &aom_highbd_10_variance32x8_sve, 10),
4306 VarianceParams(3, 5, &aom_highbd_10_variance8x32_sve, 10),
4307 VarianceParams(4, 2, &aom_highbd_10_variance16x4_sve, 10),
4308 VarianceParams(2, 4, &aom_highbd_10_variance4x16_sve, 10),
4309 VarianceParams(6, 4, &aom_highbd_8_variance64x16_sve, 8),
4310 VarianceParams(4, 6, &aom_highbd_8_variance16x64_sve, 8),
4311 VarianceParams(5, 3, &aom_highbd_8_variance32x8_sve, 8),
4312 VarianceParams(3, 5, &aom_highbd_8_variance8x32_sve, 8),
4313 VarianceParams(4, 2, &aom_highbd_8_variance16x4_sve, 8),
4314 VarianceParams(2, 4, &aom_highbd_8_variance4x16_sve, 8),
4315 #endif
4316 };
4317
4318 INSTANTIATE_TEST_SUITE_P(SVE, AvxHBDVarianceTest,
4319 ::testing::ValuesIn(kArrayHBDVariance_sve));
4320
4321 #endif // CONFIG_AV1_HIGHBITDEPTH
4322 #endif // HAVE_SVE
4323
4324 } // namespace
4325