xref: /aosp_15_r20/external/libaom/test/variance_test.cc (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <cstdlib>
13 #include <new>
14 #include <ostream>
15 #include <tuple>
16 
17 #include "gtest/gtest.h"
18 
19 #include "config/aom_config.h"
20 #include "config/aom_dsp_rtcd.h"
21 
22 #include "test/acm_random.h"
23 #include "test/register_state_check.h"
24 #include "aom/aom_codec.h"
25 #include "aom/aom_integer.h"
26 #include "aom_mem/aom_mem.h"
27 #include "aom_ports/aom_timer.h"
28 #include "aom_ports/mem.h"
29 #include "av1/common/cdef_block.h"
30 
31 namespace {
32 
33 typedef uint64_t (*MseWxH16bitFunc)(uint8_t *dst, int dstride, uint16_t *src,
34                                     int sstride, int w, int h);
35 typedef uint64_t (*Mse16xH16bitFunc)(uint8_t *dst, int dstride, uint16_t *src,
36                                      int w, int h);
37 typedef unsigned int (*VarianceMxNFunc)(const uint8_t *a, int a_stride,
38                                         const uint8_t *b, int b_stride,
39                                         unsigned int *sse);
40 typedef void (*GetSseSum8x8QuadFunc)(const uint8_t *a, int a_stride,
41                                      const uint8_t *b, int b_stride,
42                                      uint32_t *sse8x8, int *sum8x8,
43                                      unsigned int *tot_sse, int *tot_sum,
44                                      uint32_t *var8x8);
45 typedef void (*GetSseSum16x16DualFunc)(const uint8_t *a, int a_stride,
46                                        const uint8_t *b, int b_stride,
47                                        uint32_t *sse16x16,
48                                        unsigned int *tot_sse, int *tot_sum,
49                                        uint32_t *var16x16);
50 typedef unsigned int (*SubpixVarMxNFunc)(const uint8_t *a, int a_stride,
51                                          int xoffset, int yoffset,
52                                          const uint8_t *b, int b_stride,
53                                          unsigned int *sse);
54 typedef unsigned int (*SubpixAvgVarMxNFunc)(const uint8_t *a, int a_stride,
55                                             int xoffset, int yoffset,
56                                             const uint8_t *b, int b_stride,
57                                             uint32_t *sse,
58                                             const uint8_t *second_pred);
59 typedef unsigned int (*SumOfSquaresFunction)(const int16_t *src);
60 typedef unsigned int (*DistWtdSubpixAvgVarMxNFunc)(
61     const uint8_t *a, int a_stride, int xoffset, int yoffset, const uint8_t *b,
62     int b_stride, uint32_t *sse, const uint8_t *second_pred,
63     const DIST_WTD_COMP_PARAMS *jcp_param);
64 
65 #if !CONFIG_REALTIME_ONLY
66 typedef uint32_t (*ObmcSubpelVarFunc)(const uint8_t *pre, int pre_stride,
67                                       int xoffset, int yoffset,
68                                       const int32_t *wsrc, const int32_t *mask,
69                                       unsigned int *sse);
70 #endif
71 
72 using libaom_test::ACMRandom;
73 
74 // Truncate high bit depth results by downshifting (with rounding) by:
75 // 2 * (bit_depth - 8) for sse
76 // (bit_depth - 8) for se
RoundHighBitDepth(int bit_depth,int64_t * se,uint64_t * sse)77 static void RoundHighBitDepth(int bit_depth, int64_t *se, uint64_t *sse) {
78   switch (bit_depth) {
79     case AOM_BITS_12:
80       *sse = (*sse + 128) >> 8;
81       *se = (*se + 8) >> 4;
82       break;
83     case AOM_BITS_10:
84       *sse = (*sse + 8) >> 4;
85       *se = (*se + 2) >> 2;
86       break;
87     case AOM_BITS_8:
88     default: break;
89   }
90 }
91 
92 /* Note:
93  *  Our codebase calculates the "diff" value in the variance algorithm by
94  *  (src - ref).
95  */
variance_ref(const uint8_t * src,const uint8_t * ref,int l2w,int l2h,int src_stride,int ref_stride,uint32_t * sse_ptr,bool use_high_bit_depth_,aom_bit_depth_t bit_depth)96 static uint32_t variance_ref(const uint8_t *src, const uint8_t *ref, int l2w,
97                              int l2h, int src_stride, int ref_stride,
98                              uint32_t *sse_ptr, bool use_high_bit_depth_,
99                              aom_bit_depth_t bit_depth) {
100   int64_t se = 0;
101   uint64_t sse = 0;
102   const int w = 1 << l2w;
103   const int h = 1 << l2h;
104   for (int y = 0; y < h; y++) {
105     for (int x = 0; x < w; x++) {
106       int diff;
107       if (!use_high_bit_depth_) {
108         diff = src[y * src_stride + x] - ref[y * ref_stride + x];
109         se += diff;
110         sse += diff * diff;
111       } else {
112         diff = CONVERT_TO_SHORTPTR(src)[y * src_stride + x] -
113                CONVERT_TO_SHORTPTR(ref)[y * ref_stride + x];
114         se += diff;
115         sse += diff * diff;
116       }
117     }
118   }
119   RoundHighBitDepth(bit_depth, &se, &sse);
120   *sse_ptr = static_cast<uint32_t>(sse);
121   return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
122 }
123 
124 /* The subpel reference functions differ from the codec version in one aspect:
125  * they calculate the bilinear factors directly instead of using a lookup table
126  * and therefore upshift xoff and yoff by 1. Only every other calculated value
127  * is used so the codec version shrinks the table to save space.
128  */
subpel_variance_ref(const uint8_t * ref,const uint8_t * src,int l2w,int l2h,int xoff,int yoff,uint32_t * sse_ptr,bool use_high_bit_depth_,aom_bit_depth_t bit_depth)129 static uint32_t subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
130                                     int l2w, int l2h, int xoff, int yoff,
131                                     uint32_t *sse_ptr, bool use_high_bit_depth_,
132                                     aom_bit_depth_t bit_depth) {
133   int64_t se = 0;
134   uint64_t sse = 0;
135   const int w = 1 << l2w;
136   const int h = 1 << l2h;
137 
138   xoff <<= 1;
139   yoff <<= 1;
140 
141   for (int y = 0; y < h; y++) {
142     for (int x = 0; x < w; x++) {
143       // Bilinear interpolation at a 16th pel step.
144       if (!use_high_bit_depth_) {
145         const int a1 = ref[(w + 1) * (y + 0) + x + 0];
146         const int a2 = ref[(w + 1) * (y + 0) + x + 1];
147         const int b1 = ref[(w + 1) * (y + 1) + x + 0];
148         const int b2 = ref[(w + 1) * (y + 1) + x + 1];
149         const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
150         const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
151         const int r = a + (((b - a) * yoff + 8) >> 4);
152         const int diff = r - src[w * y + x];
153         se += diff;
154         sse += diff * diff;
155       } else {
156         uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
157         uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
158         const int a1 = ref16[(w + 1) * (y + 0) + x + 0];
159         const int a2 = ref16[(w + 1) * (y + 0) + x + 1];
160         const int b1 = ref16[(w + 1) * (y + 1) + x + 0];
161         const int b2 = ref16[(w + 1) * (y + 1) + x + 1];
162         const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
163         const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
164         const int r = a + (((b - a) * yoff + 8) >> 4);
165         const int diff = r - src16[w * y + x];
166         se += diff;
167         sse += diff * diff;
168       }
169     }
170   }
171   RoundHighBitDepth(bit_depth, &se, &sse);
172   *sse_ptr = static_cast<uint32_t>(sse);
173   return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
174 }
175 
subpel_avg_variance_ref(const uint8_t * ref,const uint8_t * src,const uint8_t * second_pred,int l2w,int l2h,int xoff,int yoff,uint32_t * sse_ptr,bool use_high_bit_depth,aom_bit_depth_t bit_depth)176 static uint32_t subpel_avg_variance_ref(const uint8_t *ref, const uint8_t *src,
177                                         const uint8_t *second_pred, int l2w,
178                                         int l2h, int xoff, int yoff,
179                                         uint32_t *sse_ptr,
180                                         bool use_high_bit_depth,
181                                         aom_bit_depth_t bit_depth) {
182   int64_t se = 0;
183   uint64_t sse = 0;
184   const int w = 1 << l2w;
185   const int h = 1 << l2h;
186 
187   xoff <<= 1;
188   yoff <<= 1;
189 
190   for (int y = 0; y < h; y++) {
191     for (int x = 0; x < w; x++) {
192       // bilinear interpolation at a 16th pel step
193       if (!use_high_bit_depth) {
194         const int a1 = ref[(w + 1) * (y + 0) + x + 0];
195         const int a2 = ref[(w + 1) * (y + 0) + x + 1];
196         const int b1 = ref[(w + 1) * (y + 1) + x + 0];
197         const int b2 = ref[(w + 1) * (y + 1) + x + 1];
198         const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
199         const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
200         const int r = a + (((b - a) * yoff + 8) >> 4);
201         const int diff =
202             ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x];
203         se += diff;
204         sse += diff * diff;
205       } else {
206         const uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
207         const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
208         const uint16_t *sec16 = CONVERT_TO_SHORTPTR(second_pred);
209         const int a1 = ref16[(w + 1) * (y + 0) + x + 0];
210         const int a2 = ref16[(w + 1) * (y + 0) + x + 1];
211         const int b1 = ref16[(w + 1) * (y + 1) + x + 0];
212         const int b2 = ref16[(w + 1) * (y + 1) + x + 1];
213         const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
214         const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
215         const int r = a + (((b - a) * yoff + 8) >> 4);
216         const int diff = ((r + sec16[w * y + x] + 1) >> 1) - src16[w * y + x];
217         se += diff;
218         sse += diff * diff;
219       }
220     }
221   }
222   RoundHighBitDepth(bit_depth, &se, &sse);
223   *sse_ptr = static_cast<uint32_t>(sse);
224   return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
225 }
226 
dist_wtd_subpel_avg_variance_ref(const uint8_t * ref,const uint8_t * src,const uint8_t * second_pred,int l2w,int l2h,int xoff,int yoff,uint32_t * sse_ptr,bool use_high_bit_depth,aom_bit_depth_t bit_depth,DIST_WTD_COMP_PARAMS * jcp_param)227 static uint32_t dist_wtd_subpel_avg_variance_ref(
228     const uint8_t *ref, const uint8_t *src, const uint8_t *second_pred, int l2w,
229     int l2h, int xoff, int yoff, uint32_t *sse_ptr, bool use_high_bit_depth,
230     aom_bit_depth_t bit_depth, DIST_WTD_COMP_PARAMS *jcp_param) {
231   int64_t se = 0;
232   uint64_t sse = 0;
233   const int w = 1 << l2w;
234   const int h = 1 << l2h;
235 
236   xoff <<= 1;
237   yoff <<= 1;
238 
239   for (int y = 0; y < h; y++) {
240     for (int x = 0; x < w; x++) {
241       // bilinear interpolation at a 16th pel step
242       if (!use_high_bit_depth) {
243         const int a1 = ref[(w + 0) * (y + 0) + x + 0];
244         const int a2 = ref[(w + 0) * (y + 0) + x + 1];
245         const int b1 = ref[(w + 0) * (y + 1) + x + 0];
246         const int b2 = ref[(w + 0) * (y + 1) + x + 1];
247         const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
248         const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
249         const int r = a + (((b - a) * yoff + 8) >> 4);
250         const int avg = ROUND_POWER_OF_TWO(
251             r * jcp_param->fwd_offset +
252                 second_pred[w * y + x] * jcp_param->bck_offset,
253             DIST_PRECISION_BITS);
254         const int diff = avg - src[w * y + x];
255 
256         se += diff;
257         sse += diff * diff;
258       } else {
259         const uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
260         const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
261         const uint16_t *sec16 = CONVERT_TO_SHORTPTR(second_pred);
262         const int a1 = ref16[(w + 0) * (y + 0) + x + 0];
263         const int a2 = ref16[(w + 0) * (y + 0) + x + 1];
264         const int b1 = ref16[(w + 0) * (y + 1) + x + 0];
265         const int b2 = ref16[(w + 0) * (y + 1) + x + 1];
266         const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
267         const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
268         const int r = a + (((b - a) * yoff + 8) >> 4);
269         const int avg =
270             ROUND_POWER_OF_TWO(r * jcp_param->fwd_offset +
271                                    sec16[w * y + x] * jcp_param->bck_offset,
272                                DIST_PRECISION_BITS);
273         const int diff = avg - src16[w * y + x];
274 
275         se += diff;
276         sse += diff * diff;
277       }
278     }
279   }
280   RoundHighBitDepth(bit_depth, &se, &sse);
281   *sse_ptr = static_cast<uint32_t>(sse);
282   return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
283 }
284 
285 #if !CONFIG_REALTIME_ONLY
obmc_subpel_variance_ref(const uint8_t * pre,int l2w,int l2h,int xoff,int yoff,const int32_t * wsrc,const int32_t * mask,uint32_t * sse_ptr,bool use_high_bit_depth_,aom_bit_depth_t bit_depth)286 static uint32_t obmc_subpel_variance_ref(const uint8_t *pre, int l2w, int l2h,
287                                          int xoff, int yoff,
288                                          const int32_t *wsrc,
289                                          const int32_t *mask, uint32_t *sse_ptr,
290                                          bool use_high_bit_depth_,
291                                          aom_bit_depth_t bit_depth) {
292   int64_t se = 0;
293   uint64_t sse = 0;
294   const int w = 1 << l2w;
295   const int h = 1 << l2h;
296 
297   xoff <<= 1;
298   yoff <<= 1;
299 
300   for (int y = 0; y < h; y++) {
301     for (int x = 0; x < w; x++) {
302       // Bilinear interpolation at a 16th pel step.
303       if (!use_high_bit_depth_) {
304         const int a1 = pre[(w + 1) * (y + 0) + x + 0];
305         const int a2 = pre[(w + 1) * (y + 0) + x + 1];
306         const int b1 = pre[(w + 1) * (y + 1) + x + 0];
307         const int b2 = pre[(w + 1) * (y + 1) + x + 1];
308         const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
309         const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
310         const int r = a + (((b - a) * yoff + 8) >> 4);
311         const int diff = ROUND_POWER_OF_TWO_SIGNED(
312             wsrc[w * y + x] - r * mask[w * y + x], 12);
313         se += diff;
314         sse += diff * diff;
315       } else {
316         uint16_t *pre16 = CONVERT_TO_SHORTPTR(pre);
317         const int a1 = pre16[(w + 1) * (y + 0) + x + 0];
318         const int a2 = pre16[(w + 1) * (y + 0) + x + 1];
319         const int b1 = pre16[(w + 1) * (y + 1) + x + 0];
320         const int b2 = pre16[(w + 1) * (y + 1) + x + 1];
321         const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
322         const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
323         const int r = a + (((b - a) * yoff + 8) >> 4);
324         const int diff = ROUND_POWER_OF_TWO_SIGNED(
325             wsrc[w * y + x] - r * mask[w * y + x], 12);
326         se += diff;
327         sse += diff * diff;
328       }
329     }
330   }
331   RoundHighBitDepth(bit_depth, &se, &sse);
332   *sse_ptr = static_cast<uint32_t>(sse);
333   return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
334 }
335 #endif
336 
337 ////////////////////////////////////////////////////////////////////////////////
338 
339 #if !CONFIG_REALTIME_ONLY
340 class SumOfSquaresTest : public ::testing::TestWithParam<SumOfSquaresFunction> {
341  public:
SumOfSquaresTest()342   SumOfSquaresTest() : func_(GetParam()) {}
343 
344   ~SumOfSquaresTest() override = default;
345 
346  protected:
347   void ConstTest();
348   void RefTest();
349 
350   SumOfSquaresFunction func_;
351   ACMRandom rnd_;
352 };
353 
ConstTest()354 void SumOfSquaresTest::ConstTest() {
355   int16_t mem[256];
356   unsigned int res;
357   for (int v = 0; v < 256; ++v) {
358     for (int i = 0; i < 256; ++i) {
359       mem[i] = v;
360     }
361     API_REGISTER_STATE_CHECK(res = func_(mem));
362     EXPECT_EQ(256u * (v * v), res);
363   }
364 }
365 
mb_ss_ref(const int16_t * src)366 unsigned int mb_ss_ref(const int16_t *src) {
367   unsigned int res = 0;
368   for (int i = 0; i < 256; ++i) {
369     res += src[i] * src[i];
370   }
371   return res;
372 }
373 
RefTest()374 void SumOfSquaresTest::RefTest() {
375   int16_t mem[256];
376   for (int i = 0; i < 100; ++i) {
377     for (int j = 0; j < 256; ++j) {
378       mem[j] = rnd_.Rand8() - rnd_.Rand8();
379     }
380 
381     const unsigned int expected = mb_ss_ref(mem);
382     unsigned int res;
383     API_REGISTER_STATE_CHECK(res = func_(mem));
384     EXPECT_EQ(expected, res);
385   }
386 }
387 #endif  // !CONFIG_REALTIME_ONLY
388 
389 ////////////////////////////////////////////////////////////////////////////////
390 // Encapsulating struct to store the function to test along with
391 // some testing context.
392 // Can be used for MSE, SSE, Variance, etc.
393 
394 template <typename Func>
395 struct TestParams {
TestParams__anone33b19b50111::TestParams396   TestParams(int log2w = 0, int log2h = 0, Func function = nullptr,
397              int bit_depth_value = 0)
398       : log2width(log2w), log2height(log2h), func(function) {
399     use_high_bit_depth = (bit_depth_value > 0);
400     if (use_high_bit_depth) {
401       bit_depth = static_cast<aom_bit_depth_t>(bit_depth_value);
402     } else {
403       bit_depth = AOM_BITS_8;
404     }
405     width = 1 << log2width;
406     height = 1 << log2height;
407     block_size = width * height;
408     mask = (1u << bit_depth) - 1;
409   }
410 
411   int log2width, log2height;
412   int width, height;
413   int block_size;
414   Func func;
415   aom_bit_depth_t bit_depth;
416   bool use_high_bit_depth;
417   uint32_t mask;
418 };
419 
420 template <typename Func>
operator <<(std::ostream & os,const TestParams<Func> & p)421 std::ostream &operator<<(std::ostream &os, const TestParams<Func> &p) {
422   return os << "width/height:" << p.width << "/" << p.height
423             << " function:" << reinterpret_cast<const void *>(p.func)
424             << " bit-depth:" << p.bit_depth;
425 }
426 
427 // Main class for testing a function type
428 template <typename FunctionType>
429 class MseWxHTestClass
430     : public ::testing::TestWithParam<TestParams<FunctionType> > {
431  public:
SetUp()432   void SetUp() override {
433     params_ = this->GetParam();
434 
435     rnd_.Reset(ACMRandom::DeterministicSeed());
436     src_ = reinterpret_cast<uint16_t *>(
437         aom_memalign(16, block_size() * sizeof(src_)));
438     dst_ = reinterpret_cast<uint8_t *>(
439         aom_memalign(16, block_size() * sizeof(dst_)));
440     ASSERT_NE(src_, nullptr);
441     ASSERT_NE(dst_, nullptr);
442   }
443 
TearDown()444   void TearDown() override {
445     aom_free(src_);
446     aom_free(dst_);
447     src_ = nullptr;
448     dst_ = nullptr;
449   }
450 
451  protected:
452   void RefMatchTestMse();
453   void SpeedTest();
454 
455  protected:
456   ACMRandom rnd_;
457   uint8_t *dst_;
458   uint16_t *src_;
459   TestParams<FunctionType> params_;
460 
461   // some relay helpers
block_size() const462   int block_size() const { return params_.block_size; }
width() const463   int width() const { return params_.width; }
height() const464   int height() const { return params_.height; }
d_stride() const465   int d_stride() const { return params_.width; }  // stride is same as width
s_stride() const466   int s_stride() const { return params_.width; }  // stride is same as width
467 };
468 
469 template <typename MseWxHFunctionType>
SpeedTest()470 void MseWxHTestClass<MseWxHFunctionType>::SpeedTest() {
471   aom_usec_timer ref_timer, test_timer;
472   double elapsed_time_c = 0;
473   double elapsed_time_simd = 0;
474   int run_time = 10000000;
475   int w = width();
476   int h = height();
477   int dstride = d_stride();
478   int sstride = s_stride();
479 
480   for (int k = 0; k < block_size(); ++k) {
481     dst_[k] = rnd_.Rand8();
482     src_[k] = rnd_.Rand8();
483   }
484   aom_usec_timer_start(&ref_timer);
485   for (int i = 0; i < run_time; i++) {
486     aom_mse_wxh_16bit_c(dst_, dstride, src_, sstride, w, h);
487   }
488   aom_usec_timer_mark(&ref_timer);
489   elapsed_time_c = static_cast<double>(aom_usec_timer_elapsed(&ref_timer));
490 
491   aom_usec_timer_start(&test_timer);
492   for (int i = 0; i < run_time; i++) {
493     params_.func(dst_, dstride, src_, sstride, w, h);
494   }
495   aom_usec_timer_mark(&test_timer);
496   elapsed_time_simd = static_cast<double>(aom_usec_timer_elapsed(&test_timer));
497 
498   printf("%dx%d\tc_time=%lf \t simd_time=%lf \t gain=%lf\n", width(), height(),
499          elapsed_time_c, elapsed_time_simd,
500          (elapsed_time_c / elapsed_time_simd));
501 }
502 
503 template <typename MseWxHFunctionType>
RefMatchTestMse()504 void MseWxHTestClass<MseWxHFunctionType>::RefMatchTestMse() {
505   uint64_t mse_ref = 0;
506   uint64_t mse_mod = 0;
507   int w = width();
508   int h = height();
509   int dstride = d_stride();
510   int sstride = s_stride();
511 
512   for (int i = 0; i < 10; i++) {
513     for (int k = 0; k < block_size(); ++k) {
514       dst_[k] = rnd_.Rand8();
515       src_[k] = rnd_.Rand8();
516     }
517     API_REGISTER_STATE_CHECK(
518         mse_ref = aom_mse_wxh_16bit_c(dst_, dstride, src_, sstride, w, h));
519     API_REGISTER_STATE_CHECK(
520         mse_mod = params_.func(dst_, dstride, src_, sstride, w, h));
521     EXPECT_EQ(mse_ref, mse_mod)
522         << "ref mse: " << mse_ref << " mod mse: " << mse_mod;
523   }
524 }
525 
526 template <typename FunctionType>
527 class Mse16xHTestClass
528     : public ::testing::TestWithParam<TestParams<FunctionType> > {
529  public:
530   // Memory required to compute mse of two 8x8 and four 4x4 blocks assigned for
531   // maximum width 16 and maximum height 8.
532   int mem_size = 16 * 8;
SetUp()533   void SetUp() override {
534     params_ = this->GetParam();
535     rnd_.Reset(ACMRandom::DeterministicSeed());
536     src_ = reinterpret_cast<uint16_t *>(
537         aom_memalign(16, mem_size * sizeof(*src_)));
538     dst_ =
539         reinterpret_cast<uint8_t *>(aom_memalign(16, mem_size * sizeof(*dst_)));
540     ASSERT_NE(src_, nullptr);
541     ASSERT_NE(dst_, nullptr);
542   }
543 
TearDown()544   void TearDown() override {
545     aom_free(src_);
546     aom_free(dst_);
547     src_ = nullptr;
548     dst_ = nullptr;
549   }
550 
RandBool()551   uint8_t RandBool() {
552     const uint32_t value = rnd_.Rand8();
553     return (value & 0x1);
554   }
555 
556  protected:
557   void RefMatchExtremeTestMse();
558   void RefMatchTestMse();
559   void SpeedTest();
560 
561  protected:
562   ACMRandom rnd_;
563   uint8_t *dst_;
564   uint16_t *src_;
565   TestParams<FunctionType> params_;
566 
567   // some relay helpers
width() const568   int width() const { return params_.width; }
height() const569   int height() const { return params_.height; }
d_stride() const570   int d_stride() const { return params_.width; }
571 };
572 
573 template <typename Mse16xHFunctionType>
SpeedTest()574 void Mse16xHTestClass<Mse16xHFunctionType>::SpeedTest() {
575   aom_usec_timer ref_timer, test_timer;
576   double elapsed_time_c = 0.0;
577   double elapsed_time_simd = 0.0;
578   const int loop_count = 10000000;
579   const int w = width();
580   const int h = height();
581   const int dstride = d_stride();
582 
583   for (int k = 0; k < mem_size; ++k) {
584     dst_[k] = rnd_.Rand8();
585     // Right shift by 6 is done to generate more input in range of [0,255] than
586     // CDEF_VERY_LARGE
587     int rnd_i10 = rnd_.Rand16() >> 6;
588     src_[k] = (rnd_i10 < 256) ? rnd_i10 : CDEF_VERY_LARGE;
589   }
590 
591   aom_usec_timer_start(&ref_timer);
592   for (int i = 0; i < loop_count; i++) {
593     aom_mse_16xh_16bit_c(dst_, dstride, src_, w, h);
594   }
595   aom_usec_timer_mark(&ref_timer);
596   elapsed_time_c = static_cast<double>(aom_usec_timer_elapsed(&ref_timer));
597 
598   aom_usec_timer_start(&test_timer);
599   for (int i = 0; i < loop_count; i++) {
600     params_.func(dst_, dstride, src_, w, h);
601   }
602   aom_usec_timer_mark(&test_timer);
603   elapsed_time_simd = static_cast<double>(aom_usec_timer_elapsed(&test_timer));
604 
605   printf("%dx%d\tc_time=%lf \t simd_time=%lf \t gain=%.31f\n", width(),
606          height(), elapsed_time_c, elapsed_time_simd,
607          (elapsed_time_c / elapsed_time_simd));
608 }
609 
610 template <typename Mse16xHFunctionType>
RefMatchTestMse()611 void Mse16xHTestClass<Mse16xHFunctionType>::RefMatchTestMse() {
612   uint64_t mse_ref = 0;
613   uint64_t mse_mod = 0;
614   const int w = width();
615   const int h = height();
616   const int dstride = d_stride();
617 
618   for (int i = 0; i < 10; i++) {
619     for (int k = 0; k < mem_size; ++k) {
620       dst_[k] = rnd_.Rand8();
621       // Right shift by 6 is done to generate more input in range of [0,255]
622       // than CDEF_VERY_LARGE
623       int rnd_i10 = rnd_.Rand16() >> 6;
624       src_[k] = (rnd_i10 < 256) ? rnd_i10 : CDEF_VERY_LARGE;
625     }
626 
627     API_REGISTER_STATE_CHECK(
628         mse_ref = aom_mse_16xh_16bit_c(dst_, dstride, src_, w, h));
629     API_REGISTER_STATE_CHECK(mse_mod = params_.func(dst_, dstride, src_, w, h));
630     EXPECT_EQ(mse_ref, mse_mod)
631         << "ref mse: " << mse_ref << " mod mse: " << mse_mod;
632   }
633 }
634 
635 template <typename Mse16xHFunctionType>
RefMatchExtremeTestMse()636 void Mse16xHTestClass<Mse16xHFunctionType>::RefMatchExtremeTestMse() {
637   uint64_t mse_ref = 0;
638   uint64_t mse_mod = 0;
639   const int w = width();
640   const int h = height();
641   const int dstride = d_stride();
642   const int iter = 10;
643 
644   // Fill the buffers with extreme values
645   for (int i = 0; i < iter; i++) {
646     for (int k = 0; k < mem_size; ++k) {
647       dst_[k] = static_cast<uint8_t>(RandBool() ? 0 : 255);
648       src_[k] = static_cast<uint16_t>(RandBool() ? 0 : CDEF_VERY_LARGE);
649     }
650 
651     API_REGISTER_STATE_CHECK(
652         mse_ref = aom_mse_16xh_16bit_c(dst_, dstride, src_, w, h));
653     API_REGISTER_STATE_CHECK(mse_mod = params_.func(dst_, dstride, src_, w, h));
654     EXPECT_EQ(mse_ref, mse_mod)
655         << "ref mse: " << mse_ref << " mod mse: " << mse_mod;
656   }
657 }
658 
659 // Main class for testing a function type
660 template <typename FunctionType>
661 class MainTestClass
662     : public ::testing::TestWithParam<TestParams<FunctionType> > {
663  public:
SetUp()664   void SetUp() override {
665     params_ = this->GetParam();
666 
667     rnd_.Reset(ACMRandom::DeterministicSeed());
668     const size_t unit =
669         use_high_bit_depth() ? sizeof(uint16_t) : sizeof(uint8_t);
670     src_ = reinterpret_cast<uint8_t *>(aom_memalign(16, block_size() * unit));
671     ref_ = new uint8_t[block_size() * unit];
672     ASSERT_NE(src_, nullptr);
673     ASSERT_NE(ref_, nullptr);
674     memset(src_, 0, block_size() * sizeof(src_[0]));
675     memset(ref_, 0, block_size() * sizeof(ref_[0]));
676     if (use_high_bit_depth()) {
677       // TODO(skal): remove!
678       src_ = CONVERT_TO_BYTEPTR(src_);
679       ref_ = CONVERT_TO_BYTEPTR(ref_);
680     }
681   }
682 
TearDown()683   void TearDown() override {
684     if (use_high_bit_depth()) {
685       // TODO(skal): remove!
686       src_ = reinterpret_cast<uint8_t *>(CONVERT_TO_SHORTPTR(src_));
687       ref_ = reinterpret_cast<uint8_t *>(CONVERT_TO_SHORTPTR(ref_));
688     }
689 
690     aom_free(src_);
691     delete[] ref_;
692     src_ = nullptr;
693     ref_ = nullptr;
694   }
695 
696  protected:
697   // We could sub-class MainTestClass into dedicated class for Variance
698   // and MSE/SSE, but it involves a lot of 'this->xxx' dereferencing
699   // to access top class fields xxx. That's cumbersome, so for now we'll just
700   // implement the testing methods here:
701 
702   // Variance tests
703   void ZeroTest();
704   void RefTest();
705   void RefStrideTest();
706   void OneQuarterTest();
707   void SpeedTest();
708 
709   // SSE&SUM tests
710   void RefTestSseSum();
711   void MinTestSseSum();
712   void MaxTestSseSum();
713   void SseSum_SpeedTest();
714 
715   // SSE&SUM dual tests
716   void RefTestSseSumDual();
717   void MinTestSseSumDual();
718   void MaxTestSseSumDual();
719   void SseSum_SpeedTestDual();
720 
721   // MSE/SSE tests
722   void RefTestMse();
723   void RefTestSse();
724   void MaxTestMse();
725   void MaxTestSse();
726 
727  protected:
728   ACMRandom rnd_;
729   uint8_t *src_;
730   uint8_t *ref_;
731   TestParams<FunctionType> params_;
732 
733   // some relay helpers
use_high_bit_depth() const734   bool use_high_bit_depth() const { return params_.use_high_bit_depth; }
byte_shift() const735   int byte_shift() const { return params_.bit_depth - 8; }
block_size() const736   int block_size() const { return params_.block_size; }
width() const737   int width() const { return params_.width; }
height() const738   int height() const { return params_.height; }
mask() const739   uint32_t mask() const { return params_.mask; }
740 };
741 
742 ////////////////////////////////////////////////////////////////////////////////
743 // Tests related to variance.
744 
745 template <typename VarianceFunctionType>
ZeroTest()746 void MainTestClass<VarianceFunctionType>::ZeroTest() {
747   for (int i = 0; i <= 255; ++i) {
748     if (!use_high_bit_depth()) {
749       memset(src_, i, block_size());
750     } else {
751       uint16_t *const src16 = CONVERT_TO_SHORTPTR(src_);
752       for (int k = 0; k < block_size(); ++k) src16[k] = i << byte_shift();
753     }
754     for (int j = 0; j <= 255; ++j) {
755       if (!use_high_bit_depth()) {
756         memset(ref_, j, block_size());
757       } else {
758         uint16_t *const ref16 = CONVERT_TO_SHORTPTR(ref_);
759         for (int k = 0; k < block_size(); ++k) ref16[k] = j << byte_shift();
760       }
761       unsigned int sse, var;
762       API_REGISTER_STATE_CHECK(
763           var = params_.func(src_, width(), ref_, width(), &sse));
764       EXPECT_EQ(0u, var) << "src values: " << i << " ref values: " << j;
765     }
766   }
767 }
768 
769 template <typename VarianceFunctionType>
RefTest()770 void MainTestClass<VarianceFunctionType>::RefTest() {
771   for (int i = 0; i < 10; ++i) {
772     for (int j = 0; j < block_size(); j++) {
773       if (!use_high_bit_depth()) {
774         src_[j] = rnd_.Rand8();
775         ref_[j] = rnd_.Rand8();
776       } else {
777         CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
778         CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
779       }
780     }
781     unsigned int sse1, sse2, var1, var2;
782     const int stride = width();
783     API_REGISTER_STATE_CHECK(
784         var1 = params_.func(src_, stride, ref_, stride, &sse1));
785     var2 =
786         variance_ref(src_, ref_, params_.log2width, params_.log2height, stride,
787                      stride, &sse2, use_high_bit_depth(), params_.bit_depth);
788     EXPECT_EQ(sse1, sse2) << "Error at test index: " << i;
789     EXPECT_EQ(var1, var2) << "Error at test index: " << i;
790   }
791 }
792 
793 template <typename VarianceFunctionType>
RefStrideTest()794 void MainTestClass<VarianceFunctionType>::RefStrideTest() {
795   for (int i = 0; i < 10; ++i) {
796     const int ref_stride = (i & 1) * width();
797     const int src_stride = ((i >> 1) & 1) * width();
798     for (int j = 0; j < block_size(); j++) {
799       const int ref_ind = (j / width()) * ref_stride + j % width();
800       const int src_ind = (j / width()) * src_stride + j % width();
801       if (!use_high_bit_depth()) {
802         src_[src_ind] = rnd_.Rand8();
803         ref_[ref_ind] = rnd_.Rand8();
804       } else {
805         CONVERT_TO_SHORTPTR(src_)[src_ind] = rnd_.Rand16() & mask();
806         CONVERT_TO_SHORTPTR(ref_)[ref_ind] = rnd_.Rand16() & mask();
807       }
808     }
809     unsigned int sse1, sse2;
810     unsigned int var1, var2;
811 
812     API_REGISTER_STATE_CHECK(
813         var1 = params_.func(src_, src_stride, ref_, ref_stride, &sse1));
814     var2 = variance_ref(src_, ref_, params_.log2width, params_.log2height,
815                         src_stride, ref_stride, &sse2, use_high_bit_depth(),
816                         params_.bit_depth);
817     EXPECT_EQ(sse1, sse2) << "Error at test index: " << i;
818     EXPECT_EQ(var1, var2) << "Error at test index: " << i;
819   }
820 }
821 
822 template <typename VarianceFunctionType>
OneQuarterTest()823 void MainTestClass<VarianceFunctionType>::OneQuarterTest() {
824   const int half = block_size() / 2;
825   if (!use_high_bit_depth()) {
826     memset(src_, 255, block_size());
827     memset(ref_, 255, half);
828     memset(ref_ + half, 0, half);
829   } else {
830     aom_memset16(CONVERT_TO_SHORTPTR(src_), 255 << byte_shift(), block_size());
831     aom_memset16(CONVERT_TO_SHORTPTR(ref_), 255 << byte_shift(), half);
832     aom_memset16(CONVERT_TO_SHORTPTR(ref_) + half, 0, half);
833   }
834   unsigned int sse, var, expected;
835   API_REGISTER_STATE_CHECK(
836       var = params_.func(src_, width(), ref_, width(), &sse));
837   expected = block_size() * 255 * 255 / 4;
838   EXPECT_EQ(expected, var);
839 }
840 
841 template <typename VarianceFunctionType>
SpeedTest()842 void MainTestClass<VarianceFunctionType>::SpeedTest() {
843   for (int j = 0; j < block_size(); j++) {
844     if (!use_high_bit_depth()) {
845       src_[j] = rnd_.Rand8();
846       ref_[j] = rnd_.Rand8();
847 #if CONFIG_AV1_HIGHBITDEPTH
848     } else {
849       CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
850       CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
851 #endif  // CONFIG_AV1_HIGHBITDEPTH
852     }
853   }
854   unsigned int sse;
855   const int stride = width();
856   int run_time = 1000000000 / block_size();
857   aom_usec_timer timer;
858   aom_usec_timer_start(&timer);
859   for (int i = 0; i < run_time; ++i) {
860     params_.func(src_, stride, ref_, stride, &sse);
861   }
862 
863   aom_usec_timer_mark(&timer);
864   const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
865   printf("Variance %dx%d : %d us\n", width(), height(), elapsed_time);
866 }
867 
868 template <typename GetSseSum8x8QuadFuncType>
RefTestSseSum()869 void MainTestClass<GetSseSum8x8QuadFuncType>::RefTestSseSum() {
870   for (int i = 0; i < 10; ++i) {
871     for (int j = 0; j < block_size(); ++j) {
872       src_[j] = rnd_.Rand8();
873       ref_[j] = rnd_.Rand8();
874     }
875     unsigned int sse1[256] = { 0 };
876     unsigned int sse2[256] = { 0 };
877     unsigned int var1[256] = { 0 };
878     unsigned int var2[256] = { 0 };
879     int sum1[256] = { 0 };
880     int sum2[256] = { 0 };
881     unsigned int sse_tot_c = 0;
882     unsigned int sse_tot_simd = 0;
883     int sum_tot_c = 0;
884     int sum_tot_simd = 0;
885     const int stride = width();
886     int k = 0;
887 
888     for (int row = 0; row < height(); row += 8) {
889       for (int col = 0; col < width(); col += 32) {
890         API_REGISTER_STATE_CHECK(params_.func(src_ + stride * row + col, stride,
891                                               ref_ + stride * row + col, stride,
892                                               &sse1[k], &sum1[k], &sse_tot_simd,
893                                               &sum_tot_simd, &var1[k]));
894         aom_get_var_sse_sum_8x8_quad_c(
895             src_ + stride * row + col, stride, ref_ + stride * row + col,
896             stride, &sse2[k], &sum2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
897         k += 4;
898       }
899     }
900     EXPECT_EQ(sse_tot_c, sse_tot_simd);
901     EXPECT_EQ(sum_tot_c, sum_tot_simd);
902     for (int p = 0; p < 256; p++) {
903       EXPECT_EQ(sse1[p], sse2[p]);
904       EXPECT_EQ(sum1[p], sum2[p]);
905       EXPECT_EQ(var1[p], var2[p]);
906     }
907   }
908 }
909 
910 template <typename GetSseSum8x8QuadFuncType>
MinTestSseSum()911 void MainTestClass<GetSseSum8x8QuadFuncType>::MinTestSseSum() {
912   memset(src_, 0, block_size());
913   memset(ref_, 255, block_size());
914   unsigned int sse1[256] = { 0 };
915   unsigned int sse2[256] = { 0 };
916   unsigned int var1[256] = { 0 };
917   unsigned int var2[256] = { 0 };
918   int sum1[256] = { 0 };
919   int sum2[256] = { 0 };
920   unsigned int sse_tot_c = 0;
921   unsigned int sse_tot_simd = 0;
922   int sum_tot_c = 0;
923   int sum_tot_simd = 0;
924   const int stride = width();
925   int k = 0;
926 
927   for (int i = 0; i < height(); i += 8) {
928     for (int j = 0; j < width(); j += 32) {
929       API_REGISTER_STATE_CHECK(params_.func(
930           src_ + stride * i + j, stride, ref_ + stride * i + j, stride,
931           &sse1[k], &sum1[k], &sse_tot_simd, &sum_tot_simd, &var1[k]));
932       aom_get_var_sse_sum_8x8_quad_c(
933           src_ + stride * i + j, stride, ref_ + stride * i + j, stride,
934           &sse2[k], &sum2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
935       k += 4;
936     }
937   }
938   EXPECT_EQ(sse_tot_simd, sse_tot_c);
939   EXPECT_EQ(sum_tot_simd, sum_tot_c);
940   for (int p = 0; p < 256; p++) {
941     EXPECT_EQ(sse1[p], sse2[p]);
942     EXPECT_EQ(sum1[p], sum2[p]);
943     EXPECT_EQ(var1[p], var2[p]);
944   }
945 }
946 
947 template <typename GetSseSum8x8QuadFuncType>
MaxTestSseSum()948 void MainTestClass<GetSseSum8x8QuadFuncType>::MaxTestSseSum() {
949   memset(src_, 255, block_size());
950   memset(ref_, 0, block_size());
951   unsigned int sse1[256] = { 0 };
952   unsigned int sse2[256] = { 0 };
953   unsigned int var1[256] = { 0 };
954   unsigned int var2[256] = { 0 };
955   int sum1[256] = { 0 };
956   int sum2[256] = { 0 };
957   unsigned int sse_tot_c = 0;
958   unsigned int sse_tot_simd = 0;
959   int sum_tot_c = 0;
960   int sum_tot_simd = 0;
961   const int stride = width();
962   int k = 0;
963 
964   for (int i = 0; i < height(); i += 8) {
965     for (int j = 0; j < width(); j += 32) {
966       API_REGISTER_STATE_CHECK(params_.func(
967           src_ + stride * i + j, stride, ref_ + stride * i + j, stride,
968           &sse1[k], &sum1[k], &sse_tot_simd, &sum_tot_simd, &var1[k]));
969       aom_get_var_sse_sum_8x8_quad_c(
970           src_ + stride * i + j, stride, ref_ + stride * i + j, stride,
971           &sse2[k], &sum2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
972       k += 4;
973     }
974   }
975   EXPECT_EQ(sse_tot_c, sse_tot_simd);
976   EXPECT_EQ(sum_tot_c, sum_tot_simd);
977 
978   for (int p = 0; p < 256; p++) {
979     EXPECT_EQ(sse1[p], sse2[p]);
980     EXPECT_EQ(sum1[p], sum2[p]);
981     EXPECT_EQ(var1[p], var2[p]);
982   }
983 }
984 
985 template <typename GetSseSum8x8QuadFuncType>
SseSum_SpeedTest()986 void MainTestClass<GetSseSum8x8QuadFuncType>::SseSum_SpeedTest() {
987   const int loop_count = 1000000000 / block_size();
988   for (int j = 0; j < block_size(); ++j) {
989     src_[j] = rnd_.Rand8();
990     ref_[j] = rnd_.Rand8();
991   }
992 
993   unsigned int sse1[4] = { 0 };
994   unsigned int sse2[4] = { 0 };
995   unsigned int var1[4] = { 0 };
996   unsigned int var2[4] = { 0 };
997   int sum1[4] = { 0 };
998   int sum2[4] = { 0 };
999   unsigned int sse_tot_c = 0;
1000   unsigned int sse_tot_simd = 0;
1001   int sum_tot_c = 0;
1002   int sum_tot_simd = 0;
1003   const int stride = width();
1004 
1005   aom_usec_timer timer;
1006   aom_usec_timer_start(&timer);
1007   for (int r = 0; r < loop_count; ++r) {
1008     for (int i = 0; i < height(); i += 8) {
1009       for (int j = 0; j < width(); j += 32) {
1010         aom_get_var_sse_sum_8x8_quad_c(src_ + stride * i + j, stride,
1011                                        ref_ + stride * i + j, stride, sse2,
1012                                        sum2, &sse_tot_c, &sum_tot_c, var2);
1013       }
1014     }
1015   }
1016   aom_usec_timer_mark(&timer);
1017   const double elapsed_time_ref =
1018       static_cast<double>(aom_usec_timer_elapsed(&timer));
1019 
1020   aom_usec_timer_start(&timer);
1021   for (int r = 0; r < loop_count; ++r) {
1022     for (int i = 0; i < height(); i += 8) {
1023       for (int j = 0; j < width(); j += 32) {
1024         params_.func(src_ + stride * i + j, stride, ref_ + stride * i + j,
1025                      stride, sse1, sum1, &sse_tot_simd, &sum_tot_simd, var1);
1026       }
1027     }
1028   }
1029   aom_usec_timer_mark(&timer);
1030   const double elapsed_time_simd =
1031       static_cast<double>(aom_usec_timer_elapsed(&timer));
1032 
1033   printf(
1034       "aom_getvar_8x8_quad for block=%dx%d : ref_time=%lf \t simd_time=%lf \t "
1035       "gain=%lf \n",
1036       width(), height(), elapsed_time_ref, elapsed_time_simd,
1037       elapsed_time_ref / elapsed_time_simd);
1038 }
1039 
1040 template <typename GetSseSum16x16DualFuncType>
RefTestSseSumDual()1041 void MainTestClass<GetSseSum16x16DualFuncType>::RefTestSseSumDual() {
1042   for (int iter = 0; iter < 10; ++iter) {
1043     for (int idx = 0; idx < block_size(); ++idx) {
1044       src_[idx] = rnd_.Rand8();
1045       ref_[idx] = rnd_.Rand8();
1046     }
1047     unsigned int sse1[64] = { 0 };
1048     unsigned int sse2[64] = { 0 };
1049     unsigned int var1[64] = { 0 };
1050     unsigned int var2[64] = { 0 };
1051     unsigned int sse_tot_c = 0;
1052     unsigned int sse_tot_simd = 0;
1053     int sum_tot_c = 0;
1054     int sum_tot_simd = 0;
1055     const int stride = width();
1056     int k = 0;
1057 
1058     for (int row = 0; row < height(); row += 16) {
1059       for (int col = 0; col < width(); col += 32) {
1060         API_REGISTER_STATE_CHECK(params_.func(
1061             src_ + stride * row + col, stride, ref_ + stride * row + col,
1062             stride, &sse1[k], &sse_tot_simd, &sum_tot_simd, &var1[k]));
1063         aom_get_var_sse_sum_16x16_dual_c(
1064             src_ + stride * row + col, stride, ref_ + stride * row + col,
1065             stride, &sse2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
1066         k += 2;
1067       }
1068     }
1069     EXPECT_EQ(sse_tot_c, sse_tot_simd);
1070     EXPECT_EQ(sum_tot_c, sum_tot_simd);
1071     for (int p = 0; p < 64; p++) {
1072       EXPECT_EQ(sse1[p], sse2[p]);
1073       EXPECT_EQ(sse_tot_simd, sse_tot_c);
1074       EXPECT_EQ(sum_tot_simd, sum_tot_c);
1075       EXPECT_EQ(var1[p], var2[p]);
1076     }
1077   }
1078 }
1079 
1080 template <typename GetSseSum16x16DualFuncType>
MinTestSseSumDual()1081 void MainTestClass<GetSseSum16x16DualFuncType>::MinTestSseSumDual() {
1082   memset(src_, 0, block_size());
1083   memset(ref_, 255, block_size());
1084   unsigned int sse1[64] = { 0 };
1085   unsigned int sse2[64] = { 0 };
1086   unsigned int var1[64] = { 0 };
1087   unsigned int var2[64] = { 0 };
1088   unsigned int sse_tot_c = 0;
1089   unsigned int sse_tot_simd = 0;
1090   int sum_tot_c = 0;
1091   int sum_tot_simd = 0;
1092   const int stride = width();
1093   int k = 0;
1094 
1095   for (int row = 0; row < height(); row += 16) {
1096     for (int col = 0; col < width(); col += 32) {
1097       API_REGISTER_STATE_CHECK(params_.func(
1098           src_ + stride * row + col, stride, ref_ + stride * row + col, stride,
1099           &sse1[k], &sse_tot_simd, &sum_tot_simd, &var1[k]));
1100       aom_get_var_sse_sum_16x16_dual_c(
1101           src_ + stride * row + col, stride, ref_ + stride * row + col, stride,
1102           &sse2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
1103       k += 2;
1104     }
1105   }
1106   EXPECT_EQ(sse_tot_simd, sse_tot_c);
1107   EXPECT_EQ(sum_tot_simd, sum_tot_c);
1108   for (int p = 0; p < 64; p++) {
1109     EXPECT_EQ(sse1[p], sse2[p]);
1110     EXPECT_EQ(var1[p], var2[p]);
1111   }
1112 }
1113 
1114 template <typename GetSseSum16x16DualFuncType>
MaxTestSseSumDual()1115 void MainTestClass<GetSseSum16x16DualFuncType>::MaxTestSseSumDual() {
1116   memset(src_, 255, block_size());
1117   memset(ref_, 0, block_size());
1118   unsigned int sse1[64] = { 0 };
1119   unsigned int sse2[64] = { 0 };
1120   unsigned int var1[64] = { 0 };
1121   unsigned int var2[64] = { 0 };
1122   unsigned int sse_tot_c = 0;
1123   unsigned int sse_tot_simd = 0;
1124   int sum_tot_c = 0;
1125   int sum_tot_simd = 0;
1126   const int stride = width();
1127   int k = 0;
1128 
1129   for (int row = 0; row < height(); row += 16) {
1130     for (int col = 0; col < width(); col += 32) {
1131       API_REGISTER_STATE_CHECK(params_.func(
1132           src_ + stride * row + col, stride, ref_ + stride * row + col, stride,
1133           &sse1[k], &sse_tot_simd, &sum_tot_simd, &var1[k]));
1134       aom_get_var_sse_sum_16x16_dual_c(
1135           src_ + stride * row + col, stride, ref_ + stride * row + col, stride,
1136           &sse2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
1137       k += 2;
1138     }
1139   }
1140   EXPECT_EQ(sse_tot_c, sse_tot_simd);
1141   EXPECT_EQ(sum_tot_c, sum_tot_simd);
1142 
1143   for (int p = 0; p < 64; p++) {
1144     EXPECT_EQ(sse1[p], sse2[p]);
1145     EXPECT_EQ(var1[p], var2[p]);
1146   }
1147 }
1148 
1149 template <typename GetSseSum16x16DualFuncType>
SseSum_SpeedTestDual()1150 void MainTestClass<GetSseSum16x16DualFuncType>::SseSum_SpeedTestDual() {
1151   const int loop_count = 1000000000 / block_size();
1152   for (int idx = 0; idx < block_size(); ++idx) {
1153     src_[idx] = rnd_.Rand8();
1154     ref_[idx] = rnd_.Rand8();
1155   }
1156 
1157   unsigned int sse1[2] = { 0 };
1158   unsigned int sse2[2] = { 0 };
1159   unsigned int var1[2] = { 0 };
1160   unsigned int var2[2] = { 0 };
1161   unsigned int sse_tot_c = 0;
1162   unsigned int sse_tot_simd = 0;
1163   int sum_tot_c = 0;
1164   int sum_tot_simd = 0;
1165   const int stride = width();
1166 
1167   aom_usec_timer timer;
1168   aom_usec_timer_start(&timer);
1169   for (int r = 0; r < loop_count; ++r) {
1170     for (int row = 0; row < height(); row += 16) {
1171       for (int col = 0; col < width(); col += 32) {
1172         aom_get_var_sse_sum_16x16_dual_c(src_ + stride * row + col, stride,
1173                                          ref_ + stride * row + col, stride,
1174                                          sse2, &sse_tot_c, &sum_tot_c, var2);
1175       }
1176     }
1177   }
1178   aom_usec_timer_mark(&timer);
1179   const double elapsed_time_ref =
1180       static_cast<double>(aom_usec_timer_elapsed(&timer));
1181 
1182   aom_usec_timer_start(&timer);
1183   for (int r = 0; r < loop_count; ++r) {
1184     for (int row = 0; row < height(); row += 16) {
1185       for (int col = 0; col < width(); col += 32) {
1186         params_.func(src_ + stride * row + col, stride,
1187                      ref_ + stride * row + col, stride, sse1, &sse_tot_simd,
1188                      &sum_tot_simd, var1);
1189       }
1190     }
1191   }
1192   aom_usec_timer_mark(&timer);
1193   const double elapsed_time_simd =
1194       static_cast<double>(aom_usec_timer_elapsed(&timer));
1195 
1196   printf(
1197       "aom_getvar_16x16_dual for block=%dx%d : ref_time=%lf \t simd_time=%lf "
1198       "\t "
1199       "gain=%lf \n",
1200       width(), height(), elapsed_time_ref, elapsed_time_simd,
1201       elapsed_time_ref / elapsed_time_simd);
1202 }
1203 
1204 ////////////////////////////////////////////////////////////////////////////////
1205 // Tests related to MSE / SSE.
1206 
1207 template <typename FunctionType>
RefTestMse()1208 void MainTestClass<FunctionType>::RefTestMse() {
1209   for (int i = 0; i < 10; ++i) {
1210     for (int j = 0; j < block_size(); ++j) {
1211       if (!use_high_bit_depth()) {
1212         src_[j] = rnd_.Rand8();
1213         ref_[j] = rnd_.Rand8();
1214 #if CONFIG_AV1_HIGHBITDEPTH
1215       } else {
1216         CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
1217         CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
1218 #endif  // CONFIG_AV1_HIGHBITDEPTH
1219       }
1220     }
1221     unsigned int sse1, sse2;
1222     const int stride = width();
1223     API_REGISTER_STATE_CHECK(params_.func(src_, stride, ref_, stride, &sse1));
1224     variance_ref(src_, ref_, params_.log2width, params_.log2height, stride,
1225                  stride, &sse2, use_high_bit_depth(), params_.bit_depth);
1226     EXPECT_EQ(sse1, sse2);
1227   }
1228 }
1229 
1230 template <typename FunctionType>
RefTestSse()1231 void MainTestClass<FunctionType>::RefTestSse() {
1232   for (int i = 0; i < 10; ++i) {
1233     for (int j = 0; j < block_size(); ++j) {
1234       src_[j] = rnd_.Rand8();
1235       ref_[j] = rnd_.Rand8();
1236     }
1237     unsigned int sse2;
1238     unsigned int var1;
1239     const int stride = width();
1240     API_REGISTER_STATE_CHECK(var1 = params_.func(src_, stride, ref_, stride));
1241     variance_ref(src_, ref_, params_.log2width, params_.log2height, stride,
1242                  stride, &sse2, false, AOM_BITS_8);
1243     EXPECT_EQ(var1, sse2);
1244   }
1245 }
1246 
1247 template <typename FunctionType>
MaxTestMse()1248 void MainTestClass<FunctionType>::MaxTestMse() {
1249   int max_value = (1 << params_.bit_depth) - 1;
1250   if (!use_high_bit_depth()) {
1251     memset(src_, max_value, block_size());
1252     memset(ref_, 0, block_size());
1253 #if CONFIG_AV1_HIGHBITDEPTH
1254   } else {
1255     aom_memset16(CONVERT_TO_SHORTPTR(src_), max_value, block_size());
1256     aom_memset16(CONVERT_TO_SHORTPTR(ref_), 0, block_size());
1257 #endif  // CONFIG_AV1_HIGHBITDEPTH
1258   }
1259   unsigned int sse;
1260   API_REGISTER_STATE_CHECK(params_.func(src_, width(), ref_, width(), &sse));
1261   unsigned int expected = (unsigned int)block_size() * max_value * max_value;
1262   switch (params_.bit_depth) {
1263     case AOM_BITS_12: expected = ROUND_POWER_OF_TWO(expected, 8); break;
1264     case AOM_BITS_10: expected = ROUND_POWER_OF_TWO(expected, 4); break;
1265     case AOM_BITS_8:
1266     default: break;
1267   }
1268   EXPECT_EQ(expected, sse);
1269 }
1270 
1271 template <typename FunctionType>
MaxTestSse()1272 void MainTestClass<FunctionType>::MaxTestSse() {
1273   memset(src_, 255, block_size());
1274   memset(ref_, 0, block_size());
1275   unsigned int var;
1276   API_REGISTER_STATE_CHECK(var = params_.func(src_, width(), ref_, width()));
1277   const unsigned int expected = block_size() * 255 * 255;
1278   EXPECT_EQ(expected, var);
1279 }
1280 
1281 ////////////////////////////////////////////////////////////////////////////////
1282 
1283 using std::get;
1284 using std::make_tuple;
1285 using std::tuple;
1286 
1287 template <typename FunctionType>
1288 class SubpelVarianceTest
1289     : public ::testing::TestWithParam<TestParams<FunctionType> > {
1290  public:
SetUp()1291   void SetUp() override {
1292     params_ = this->GetParam();
1293 
1294     rnd_.Reset(ACMRandom::DeterministicSeed());
1295     if (!use_high_bit_depth()) {
1296       src_ = reinterpret_cast<uint8_t *>(aom_memalign(32, block_size()));
1297       sec_ = reinterpret_cast<uint8_t *>(aom_memalign(32, block_size()));
1298       ref_ = reinterpret_cast<uint8_t *>(
1299           aom_memalign(32, block_size() + width() + height() + 1));
1300     } else {
1301       src_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(
1302           aom_memalign(32, block_size() * sizeof(uint16_t))));
1303       sec_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(
1304           aom_memalign(32, block_size() * sizeof(uint16_t))));
1305       ref_ = CONVERT_TO_BYTEPTR(aom_memalign(
1306           32, (block_size() + width() + height() + 1) * sizeof(uint16_t)));
1307     }
1308     ASSERT_NE(src_, nullptr);
1309     ASSERT_NE(sec_, nullptr);
1310     ASSERT_NE(ref_, nullptr);
1311   }
1312 
TearDown()1313   void TearDown() override {
1314     if (!use_high_bit_depth()) {
1315       aom_free(src_);
1316       aom_free(ref_);
1317       aom_free(sec_);
1318     } else {
1319       aom_free(CONVERT_TO_SHORTPTR(src_));
1320       aom_free(CONVERT_TO_SHORTPTR(ref_));
1321       aom_free(CONVERT_TO_SHORTPTR(sec_));
1322     }
1323   }
1324 
1325  protected:
1326   void RefTest();
1327   void ExtremeRefTest();
1328   void SpeedTest();
1329 
1330   ACMRandom rnd_;
1331   uint8_t *src_;
1332   uint8_t *ref_;
1333   uint8_t *sec_;
1334   TestParams<FunctionType> params_;
1335   DIST_WTD_COMP_PARAMS jcp_param_;
1336 
1337   // some relay helpers
use_high_bit_depth() const1338   bool use_high_bit_depth() const { return params_.use_high_bit_depth; }
byte_shift() const1339   int byte_shift() const { return params_.bit_depth - 8; }
block_size() const1340   int block_size() const { return params_.block_size; }
width() const1341   int width() const { return params_.width; }
height() const1342   int height() const { return params_.height; }
mask() const1343   uint32_t mask() const { return params_.mask; }
1344 };
1345 
1346 template <typename SubpelVarianceFunctionType>
RefTest()1347 void SubpelVarianceTest<SubpelVarianceFunctionType>::RefTest() {
1348   for (int x = 0; x < 8; ++x) {
1349     for (int y = 0; y < 8; ++y) {
1350       if (!use_high_bit_depth()) {
1351         for (int j = 0; j < block_size(); j++) {
1352           src_[j] = rnd_.Rand8();
1353         }
1354         for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1355           ref_[j] = rnd_.Rand8();
1356         }
1357       } else {
1358         for (int j = 0; j < block_size(); j++) {
1359           CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
1360         }
1361         for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1362           CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
1363         }
1364       }
1365       unsigned int sse1, sse2;
1366       unsigned int var1;
1367       API_REGISTER_STATE_CHECK(
1368           var1 = params_.func(ref_, width() + 1, x, y, src_, width(), &sse1));
1369       const unsigned int var2 = subpel_variance_ref(
1370           ref_, src_, params_.log2width, params_.log2height, x, y, &sse2,
1371           use_high_bit_depth(), params_.bit_depth);
1372       EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
1373       EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
1374     }
1375   }
1376 }
1377 
1378 template <typename SubpelVarianceFunctionType>
ExtremeRefTest()1379 void SubpelVarianceTest<SubpelVarianceFunctionType>::ExtremeRefTest() {
1380   // Compare against reference.
1381   // Src: Set the first half of values to 0, the second half to the maximum.
1382   // Ref: Set the first half of values to the maximum, the second half to 0.
1383   for (int x = 0; x < 8; ++x) {
1384     for (int y = 0; y < 8; ++y) {
1385       const int half = block_size() / 2;
1386       if (!use_high_bit_depth()) {
1387         memset(src_, 0, half);
1388         memset(src_ + half, 255, half);
1389         memset(ref_, 255, half);
1390         memset(ref_ + half, 0, half + width() + height() + 1);
1391       } else {
1392         aom_memset16(CONVERT_TO_SHORTPTR(src_), mask(), half);
1393         aom_memset16(CONVERT_TO_SHORTPTR(src_) + half, 0, half);
1394         aom_memset16(CONVERT_TO_SHORTPTR(ref_), 0, half);
1395         aom_memset16(CONVERT_TO_SHORTPTR(ref_) + half, mask(),
1396                      half + width() + height() + 1);
1397       }
1398       unsigned int sse1, sse2;
1399       unsigned int var1;
1400       API_REGISTER_STATE_CHECK(
1401           var1 = params_.func(ref_, width() + 1, x, y, src_, width(), &sse1));
1402       const unsigned int var2 = subpel_variance_ref(
1403           ref_, src_, params_.log2width, params_.log2height, x, y, &sse2,
1404           use_high_bit_depth(), params_.bit_depth);
1405       EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y;
1406       EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y;
1407     }
1408   }
1409 }
1410 
1411 template <typename SubpelVarianceFunctionType>
SpeedTest()1412 void SubpelVarianceTest<SubpelVarianceFunctionType>::SpeedTest() {
1413   if (!use_high_bit_depth()) {
1414     for (int j = 0; j < block_size(); j++) {
1415       src_[j] = rnd_.Rand8();
1416     }
1417     for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1418       ref_[j] = rnd_.Rand8();
1419     }
1420   } else {
1421     for (int j = 0; j < block_size(); j++) {
1422       CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
1423     }
1424     for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1425       CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
1426     }
1427   }
1428 
1429   unsigned int sse1, sse2;
1430   int run_time = 1000000000 / block_size();
1431   aom_usec_timer timer;
1432 
1433   aom_usec_timer_start(&timer);
1434   for (int i = 0; i < run_time; ++i) {
1435     int x = rnd_(8);
1436     int y = rnd_(8);
1437     params_.func(ref_, width() + 1, x, y, src_, width(), &sse1);
1438   }
1439   aom_usec_timer_mark(&timer);
1440 
1441   const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
1442 
1443   aom_usec_timer timer_c;
1444 
1445   aom_usec_timer_start(&timer_c);
1446   for (int i = 0; i < run_time; ++i) {
1447     int x = rnd_(8);
1448     int y = rnd_(8);
1449     subpel_variance_ref(ref_, src_, params_.log2width, params_.log2height, x, y,
1450                         &sse2, use_high_bit_depth(), params_.bit_depth);
1451   }
1452   aom_usec_timer_mark(&timer_c);
1453 
1454   const int elapsed_time_c = static_cast<int>(aom_usec_timer_elapsed(&timer_c));
1455 
1456   printf(
1457       "sub_pixel_variance_%dx%d_%d: ref_time=%d us opt_time=%d us gain=%d \n",
1458       width(), height(), params_.bit_depth, elapsed_time_c, elapsed_time,
1459       elapsed_time_c / elapsed_time);
1460 }
1461 
1462 template <>
RefTest()1463 void SubpelVarianceTest<SubpixAvgVarMxNFunc>::RefTest() {
1464   for (int x = 0; x < 8; ++x) {
1465     for (int y = 0; y < 8; ++y) {
1466       if (!use_high_bit_depth()) {
1467         for (int j = 0; j < block_size(); j++) {
1468           src_[j] = rnd_.Rand8();
1469           sec_[j] = rnd_.Rand8();
1470         }
1471         for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1472           ref_[j] = rnd_.Rand8();
1473         }
1474       } else {
1475         for (int j = 0; j < block_size(); j++) {
1476           CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
1477           CONVERT_TO_SHORTPTR(sec_)[j] = rnd_.Rand16() & mask();
1478         }
1479         for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1480           CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
1481         }
1482       }
1483       uint32_t sse1, sse2;
1484       uint32_t var1, var2;
1485       API_REGISTER_STATE_CHECK(var1 = params_.func(ref_, width() + 1, x, y,
1486                                                    src_, width(), &sse1, sec_));
1487       var2 = subpel_avg_variance_ref(ref_, src_, sec_, params_.log2width,
1488                                      params_.log2height, x, y, &sse2,
1489                                      use_high_bit_depth(), params_.bit_depth);
1490       EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
1491       EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
1492     }
1493   }
1494 }
1495 
1496 template <>
RefTest()1497 void SubpelVarianceTest<DistWtdSubpixAvgVarMxNFunc>::RefTest() {
1498   for (int x = 0; x < 8; ++x) {
1499     for (int y = 0; y < 8; ++y) {
1500       if (!use_high_bit_depth()) {
1501         for (int j = 0; j < block_size(); j++) {
1502           src_[j] = rnd_.Rand8();
1503           sec_[j] = rnd_.Rand8();
1504         }
1505         for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1506           ref_[j] = rnd_.Rand8();
1507         }
1508       } else {
1509         for (int j = 0; j < block_size(); j++) {
1510           CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
1511           CONVERT_TO_SHORTPTR(sec_)[j] = rnd_.Rand16() & mask();
1512         }
1513         for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1514           CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
1515         }
1516       }
1517       for (int x0 = 0; x0 < 2; ++x0) {
1518         for (int y0 = 0; y0 < 4; ++y0) {
1519           uint32_t sse1, sse2;
1520           uint32_t var1, var2;
1521           jcp_param_.fwd_offset = quant_dist_lookup_table[y0][x0];
1522           jcp_param_.bck_offset = quant_dist_lookup_table[y0][1 - x0];
1523           API_REGISTER_STATE_CHECK(var1 = params_.func(ref_, width() + 0, x, y,
1524                                                        src_, width(), &sse1,
1525                                                        sec_, &jcp_param_));
1526           var2 = dist_wtd_subpel_avg_variance_ref(
1527               ref_, src_, sec_, params_.log2width, params_.log2height, x, y,
1528               &sse2, use_high_bit_depth(), params_.bit_depth, &jcp_param_);
1529           EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
1530           EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
1531         }
1532       }
1533     }
1534   }
1535 }
1536 
1537 ////////////////////////////////////////////////////////////////////////////////
1538 
1539 #if !CONFIG_REALTIME_ONLY
1540 
1541 static const int kMaskMax = 64;
1542 
1543 typedef TestParams<ObmcSubpelVarFunc> ObmcSubpelVarianceParams;
1544 
1545 template <typename FunctionType>
1546 class ObmcVarianceTest
1547     : public ::testing::TestWithParam<TestParams<FunctionType> > {
1548  public:
SetUp()1549   void SetUp() override {
1550     params_ = this->GetParam();
1551 
1552     rnd_.Reset(ACMRandom::DeterministicSeed());
1553     if (!use_high_bit_depth()) {
1554       pre_ = reinterpret_cast<uint8_t *>(
1555           aom_memalign(32, block_size() + width() + height() + 1));
1556     } else {
1557       pre_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(aom_memalign(
1558           32, (block_size() + width() + height() + 1) * sizeof(uint16_t))));
1559     }
1560     wsrc_ = reinterpret_cast<int32_t *>(
1561         aom_memalign(32, block_size() * sizeof(uint32_t)));
1562     mask_ = reinterpret_cast<int32_t *>(
1563         aom_memalign(32, block_size() * sizeof(uint32_t)));
1564     ASSERT_NE(pre_, nullptr);
1565     ASSERT_NE(wsrc_, nullptr);
1566     ASSERT_NE(mask_, nullptr);
1567   }
1568 
TearDown()1569   void TearDown() override {
1570     if (!use_high_bit_depth()) {
1571       aom_free(pre_);
1572     } else {
1573       aom_free(CONVERT_TO_SHORTPTR(pre_));
1574     }
1575     aom_free(wsrc_);
1576     aom_free(mask_);
1577   }
1578 
1579  protected:
1580   void RefTest();
1581   void ExtremeRefTest();
1582   void SpeedTest();
1583 
1584   ACMRandom rnd_;
1585   uint8_t *pre_;
1586   int32_t *wsrc_;
1587   int32_t *mask_;
1588   TestParams<FunctionType> params_;
1589 
1590   // some relay helpers
use_high_bit_depth() const1591   bool use_high_bit_depth() const { return params_.use_high_bit_depth; }
byte_shift() const1592   int byte_shift() const { return params_.bit_depth - 8; }
block_size() const1593   int block_size() const { return params_.block_size; }
width() const1594   int width() const { return params_.width; }
height() const1595   int height() const { return params_.height; }
bd_mask() const1596   uint32_t bd_mask() const { return params_.mask; }
1597 };
1598 
1599 template <>
RefTest()1600 void ObmcVarianceTest<ObmcSubpelVarFunc>::RefTest() {
1601   for (int x = 0; x < 8; ++x) {
1602     for (int y = 0; y < 8; ++y) {
1603       if (!use_high_bit_depth())
1604         for (int j = 0; j < block_size() + width() + height() + 1; j++)
1605           pre_[j] = rnd_.Rand8();
1606       else
1607         for (int j = 0; j < block_size() + width() + height() + 1; j++)
1608           CONVERT_TO_SHORTPTR(pre_)[j] = rnd_.Rand16() & bd_mask();
1609       for (int j = 0; j < block_size(); j++) {
1610         wsrc_[j] = (rnd_.Rand16() & bd_mask()) * rnd_(kMaskMax * kMaskMax + 1);
1611         mask_[j] = rnd_(kMaskMax * kMaskMax + 1);
1612       }
1613 
1614       uint32_t sse1, sse2;
1615       uint32_t var1, var2;
1616       API_REGISTER_STATE_CHECK(
1617           var1 = params_.func(pre_, width() + 1, x, y, wsrc_, mask_, &sse1));
1618       var2 = obmc_subpel_variance_ref(
1619           pre_, params_.log2width, params_.log2height, x, y, wsrc_, mask_,
1620           &sse2, use_high_bit_depth(), params_.bit_depth);
1621       EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y;
1622       EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y;
1623     }
1624   }
1625 }
1626 
1627 template <>
ExtremeRefTest()1628 void ObmcVarianceTest<ObmcSubpelVarFunc>::ExtremeRefTest() {
1629   // Pre: Set the first half of values to the maximum, the second half to 0.
1630   // Mask: same as above
1631   // WSrc: Set the first half of values to 0, the second half to the maximum.
1632   for (int x = 0; x < 8; ++x) {
1633     for (int y = 0; y < 8; ++y) {
1634       const int half = block_size() / 2;
1635       if (!use_high_bit_depth()) {
1636         memset(pre_, 255, half);
1637         memset(pre_ + half, 0, half + width() + height() + 1);
1638       } else {
1639         aom_memset16(CONVERT_TO_SHORTPTR(pre_), bd_mask(), half);
1640         aom_memset16(CONVERT_TO_SHORTPTR(pre_) + half, 0,
1641                      half + width() + height() + 1);
1642       }
1643       for (int j = 0; j < half; j++) {
1644         wsrc_[j] = bd_mask() * kMaskMax * kMaskMax;
1645         mask_[j] = 0;
1646       }
1647       for (int j = half; j < block_size(); j++) {
1648         wsrc_[j] = 0;
1649         mask_[j] = kMaskMax * kMaskMax;
1650       }
1651 
1652       uint32_t sse1, sse2;
1653       uint32_t var1, var2;
1654       API_REGISTER_STATE_CHECK(
1655           var1 = params_.func(pre_, width() + 1, x, y, wsrc_, mask_, &sse1));
1656       var2 = obmc_subpel_variance_ref(
1657           pre_, params_.log2width, params_.log2height, x, y, wsrc_, mask_,
1658           &sse2, use_high_bit_depth(), params_.bit_depth);
1659       EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y;
1660       EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y;
1661     }
1662   }
1663 }
1664 
1665 template <>
SpeedTest()1666 void ObmcVarianceTest<ObmcSubpelVarFunc>::SpeedTest() {
1667   if (!use_high_bit_depth())
1668     for (int j = 0; j < block_size() + width() + height() + 1; j++)
1669       pre_[j] = rnd_.Rand8();
1670   else
1671     for (int j = 0; j < block_size() + width() + height() + 1; j++)
1672       CONVERT_TO_SHORTPTR(pre_)[j] = rnd_.Rand16() & bd_mask();
1673   for (int j = 0; j < block_size(); j++) {
1674     wsrc_[j] = (rnd_.Rand16() & bd_mask()) * rnd_(kMaskMax * kMaskMax + 1);
1675     mask_[j] = rnd_(kMaskMax * kMaskMax + 1);
1676   }
1677   unsigned int sse1;
1678   const int stride = width() + 1;
1679   int run_time = 1000000000 / block_size();
1680   aom_usec_timer timer;
1681 
1682   aom_usec_timer_start(&timer);
1683   for (int i = 0; i < run_time; ++i) {
1684     int x = rnd_(8);
1685     int y = rnd_(8);
1686     API_REGISTER_STATE_CHECK(
1687         params_.func(pre_, stride, x, y, wsrc_, mask_, &sse1));
1688   }
1689   aom_usec_timer_mark(&timer);
1690 
1691   const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
1692   printf("obmc_sub_pixel_variance_%dx%d_%d: %d us\n", width(), height(),
1693          params_.bit_depth, elapsed_time);
1694 }
1695 
1696 #endif  // !CONFIG_REALTIME_ONLY
1697 
1698 typedef MseWxHTestClass<MseWxH16bitFunc> MseWxHTest;
1699 typedef Mse16xHTestClass<Mse16xH16bitFunc> Mse16xHTest;
1700 typedef MainTestClass<VarianceMxNFunc> AvxMseTest;
1701 typedef MainTestClass<VarianceMxNFunc> AvxVarianceTest;
1702 typedef MainTestClass<GetSseSum8x8QuadFunc> GetSseSum8x8QuadTest;
1703 typedef MainTestClass<GetSseSum16x16DualFunc> GetSseSum16x16DualTest;
1704 typedef SubpelVarianceTest<SubpixVarMxNFunc> AvxSubpelVarianceTest;
1705 typedef SubpelVarianceTest<SubpixAvgVarMxNFunc> AvxSubpelAvgVarianceTest;
1706 typedef SubpelVarianceTest<DistWtdSubpixAvgVarMxNFunc>
1707     AvxDistWtdSubpelAvgVarianceTest;
1708 #if !CONFIG_REALTIME_ONLY
1709 typedef ObmcVarianceTest<ObmcSubpelVarFunc> AvxObmcSubpelVarianceTest;
1710 #endif
1711 typedef TestParams<MseWxH16bitFunc> MseWxHParams;
1712 typedef TestParams<Mse16xH16bitFunc> Mse16xHParams;
1713 
TEST_P(MseWxHTest,RefMse)1714 TEST_P(MseWxHTest, RefMse) { RefMatchTestMse(); }
TEST_P(MseWxHTest,DISABLED_SpeedMse)1715 TEST_P(MseWxHTest, DISABLED_SpeedMse) { SpeedTest(); }
TEST_P(Mse16xHTest,RefMse)1716 TEST_P(Mse16xHTest, RefMse) { RefMatchTestMse(); }
TEST_P(Mse16xHTest,RefMseExtreme)1717 TEST_P(Mse16xHTest, RefMseExtreme) { RefMatchExtremeTestMse(); }
TEST_P(Mse16xHTest,DISABLED_SpeedMse)1718 TEST_P(Mse16xHTest, DISABLED_SpeedMse) { SpeedTest(); }
TEST_P(AvxMseTest,RefMse)1719 TEST_P(AvxMseTest, RefMse) { RefTestMse(); }
TEST_P(AvxMseTest,MaxMse)1720 TEST_P(AvxMseTest, MaxMse) { MaxTestMse(); }
TEST_P(AvxVarianceTest,Zero)1721 TEST_P(AvxVarianceTest, Zero) { ZeroTest(); }
TEST_P(AvxVarianceTest,Ref)1722 TEST_P(AvxVarianceTest, Ref) { RefTest(); }
TEST_P(AvxVarianceTest,RefStride)1723 TEST_P(AvxVarianceTest, RefStride) { RefStrideTest(); }
TEST_P(AvxVarianceTest,OneQuarter)1724 TEST_P(AvxVarianceTest, OneQuarter) { OneQuarterTest(); }
TEST_P(AvxVarianceTest,DISABLED_Speed)1725 TEST_P(AvxVarianceTest, DISABLED_Speed) { SpeedTest(); }
TEST_P(GetSseSum8x8QuadTest,RefMseSum)1726 TEST_P(GetSseSum8x8QuadTest, RefMseSum) { RefTestSseSum(); }
TEST_P(GetSseSum8x8QuadTest,MinSseSum)1727 TEST_P(GetSseSum8x8QuadTest, MinSseSum) { MinTestSseSum(); }
TEST_P(GetSseSum8x8QuadTest,MaxMseSum)1728 TEST_P(GetSseSum8x8QuadTest, MaxMseSum) { MaxTestSseSum(); }
TEST_P(GetSseSum8x8QuadTest,DISABLED_Speed)1729 TEST_P(GetSseSum8x8QuadTest, DISABLED_Speed) { SseSum_SpeedTest(); }
TEST_P(GetSseSum16x16DualTest,RefMseSum)1730 TEST_P(GetSseSum16x16DualTest, RefMseSum) { RefTestSseSumDual(); }
TEST_P(GetSseSum16x16DualTest,MinSseSum)1731 TEST_P(GetSseSum16x16DualTest, MinSseSum) { MinTestSseSumDual(); }
TEST_P(GetSseSum16x16DualTest,MaxMseSum)1732 TEST_P(GetSseSum16x16DualTest, MaxMseSum) { MaxTestSseSumDual(); }
TEST_P(GetSseSum16x16DualTest,DISABLED_Speed)1733 TEST_P(GetSseSum16x16DualTest, DISABLED_Speed) { SseSum_SpeedTestDual(); }
1734 #if !CONFIG_REALTIME_ONLY
TEST_P(SumOfSquaresTest,Const)1735 TEST_P(SumOfSquaresTest, Const) { ConstTest(); }
TEST_P(SumOfSquaresTest,Ref)1736 TEST_P(SumOfSquaresTest, Ref) { RefTest(); }
1737 #endif  // !CONFIG_REALTIME_ONLY
TEST_P(AvxSubpelVarianceTest,Ref)1738 TEST_P(AvxSubpelVarianceTest, Ref) { RefTest(); }
TEST_P(AvxSubpelVarianceTest,ExtremeRef)1739 TEST_P(AvxSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
TEST_P(AvxSubpelVarianceTest,DISABLED_Speed)1740 TEST_P(AvxSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); }
TEST_P(AvxSubpelAvgVarianceTest,Ref)1741 TEST_P(AvxSubpelAvgVarianceTest, Ref) { RefTest(); }
TEST_P(AvxDistWtdSubpelAvgVarianceTest,Ref)1742 TEST_P(AvxDistWtdSubpelAvgVarianceTest, Ref) { RefTest(); }
1743 #if !CONFIG_REALTIME_ONLY
TEST_P(AvxObmcSubpelVarianceTest,Ref)1744 TEST_P(AvxObmcSubpelVarianceTest, Ref) { RefTest(); }
TEST_P(AvxObmcSubpelVarianceTest,ExtremeRef)1745 TEST_P(AvxObmcSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
TEST_P(AvxObmcSubpelVarianceTest,DISABLED_Speed)1746 TEST_P(AvxObmcSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); }
1747 #endif
1748 
1749 INSTANTIATE_TEST_SUITE_P(
1750     C, MseWxHTest,
1751     ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_c, 8),
1752                       MseWxHParams(3, 2, &aom_mse_wxh_16bit_c, 8),
1753                       MseWxHParams(2, 3, &aom_mse_wxh_16bit_c, 8),
1754                       MseWxHParams(2, 2, &aom_mse_wxh_16bit_c, 8)));
1755 
1756 INSTANTIATE_TEST_SUITE_P(
1757     C, Mse16xHTest,
1758     ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_c, 8),
1759                       Mse16xHParams(3, 2, &aom_mse_16xh_16bit_c, 8),
1760                       Mse16xHParams(2, 3, &aom_mse_16xh_16bit_c, 8),
1761                       Mse16xHParams(2, 2, &aom_mse_16xh_16bit_c, 8)));
1762 
1763 #if !CONFIG_REALTIME_ONLY
1764 INSTANTIATE_TEST_SUITE_P(C, SumOfSquaresTest,
1765                          ::testing::Values(aom_get_mb_ss_c));
1766 #endif  // !CONFIG_REALTIME_ONLY
1767 
1768 typedef TestParams<VarianceMxNFunc> MseParams;
1769 INSTANTIATE_TEST_SUITE_P(C, AvxMseTest,
1770                          ::testing::Values(MseParams(4, 4, &aom_mse16x16_c),
1771                                            MseParams(4, 3, &aom_mse16x8_c),
1772                                            MseParams(3, 4, &aom_mse8x16_c),
1773                                            MseParams(3, 3, &aom_mse8x8_c)));
1774 
1775 typedef TestParams<VarianceMxNFunc> VarianceParams;
1776 const VarianceParams kArrayVariance_c[] = {
1777   VarianceParams(7, 7, &aom_variance128x128_c),
1778   VarianceParams(7, 6, &aom_variance128x64_c),
1779   VarianceParams(6, 7, &aom_variance64x128_c),
1780   VarianceParams(6, 6, &aom_variance64x64_c),
1781   VarianceParams(6, 5, &aom_variance64x32_c),
1782   VarianceParams(5, 6, &aom_variance32x64_c),
1783   VarianceParams(5, 5, &aom_variance32x32_c),
1784   VarianceParams(5, 4, &aom_variance32x16_c),
1785   VarianceParams(4, 5, &aom_variance16x32_c),
1786   VarianceParams(4, 4, &aom_variance16x16_c),
1787   VarianceParams(4, 3, &aom_variance16x8_c),
1788   VarianceParams(3, 4, &aom_variance8x16_c),
1789   VarianceParams(3, 3, &aom_variance8x8_c),
1790   VarianceParams(3, 2, &aom_variance8x4_c),
1791   VarianceParams(2, 3, &aom_variance4x8_c),
1792   VarianceParams(2, 2, &aom_variance4x4_c),
1793 #if !CONFIG_REALTIME_ONLY
1794   VarianceParams(6, 4, &aom_variance64x16_c),
1795   VarianceParams(4, 6, &aom_variance16x64_c),
1796   VarianceParams(5, 3, &aom_variance32x8_c),
1797   VarianceParams(3, 5, &aom_variance8x32_c),
1798   VarianceParams(4, 2, &aom_variance16x4_c),
1799   VarianceParams(2, 4, &aom_variance4x16_c),
1800 #endif
1801 };
1802 INSTANTIATE_TEST_SUITE_P(C, AvxVarianceTest,
1803                          ::testing::ValuesIn(kArrayVariance_c));
1804 
1805 typedef TestParams<GetSseSum8x8QuadFunc> GetSseSumParams;
1806 const GetSseSumParams kArrayGetSseSum8x8Quad_c[] = {
1807   GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_c, 0),
1808   GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_c, 0),
1809   GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_c, 0),
1810   GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_c, 0)
1811 };
1812 INSTANTIATE_TEST_SUITE_P(C, GetSseSum8x8QuadTest,
1813                          ::testing::ValuesIn(kArrayGetSseSum8x8Quad_c));
1814 
1815 typedef TestParams<GetSseSum16x16DualFunc> GetSseSumParamsDual;
1816 const GetSseSumParamsDual kArrayGetSseSum16x16Dual_c[] = {
1817   GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_c, 0),
1818   GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_c, 0),
1819   GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_c, 0),
1820   GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_c, 0)
1821 };
1822 
1823 INSTANTIATE_TEST_SUITE_P(C, GetSseSum16x16DualTest,
1824                          ::testing::ValuesIn(kArrayGetSseSum16x16Dual_c));
1825 
1826 typedef TestParams<SubpixVarMxNFunc> SubpelVarianceParams;
1827 const SubpelVarianceParams kArraySubpelVariance_c[] = {
1828   SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_c, 0),
1829   SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_c, 0),
1830   SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_c, 0),
1831   SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_c, 0),
1832   SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_c, 0),
1833   SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_c, 0),
1834   SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_c, 0),
1835   SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_c, 0),
1836   SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_c, 0),
1837   SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_c, 0),
1838   SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_c, 0),
1839   SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_c, 0),
1840   SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_c, 0),
1841   SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_c, 0),
1842   SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_c, 0),
1843   SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_c, 0),
1844 #if !CONFIG_REALTIME_ONLY
1845   SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_c, 0),
1846   SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_c, 0),
1847   SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_c, 0),
1848   SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_c, 0),
1849   SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_c, 0),
1850   SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_c, 0),
1851 #endif
1852 };
1853 INSTANTIATE_TEST_SUITE_P(C, AvxSubpelVarianceTest,
1854                          ::testing::ValuesIn(kArraySubpelVariance_c));
1855 
1856 typedef TestParams<SubpixAvgVarMxNFunc> SubpelAvgVarianceParams;
1857 const SubpelAvgVarianceParams kArraySubpelAvgVariance_c[] = {
1858   SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_c, 0),
1859   SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_c, 0),
1860   SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_c, 0),
1861   SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_c, 0),
1862   SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_c, 0),
1863   SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_c, 0),
1864   SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_c, 0),
1865   SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_c, 0),
1866   SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_c, 0),
1867   SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_c, 0),
1868   SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_c, 0),
1869   SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_c, 0),
1870   SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_c, 0),
1871   SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_c, 0),
1872   SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_c, 0),
1873   SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_c, 0),
1874 #if !CONFIG_REALTIME_ONLY
1875   SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_c, 0),
1876   SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_c, 0),
1877   SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_c, 0),
1878   SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_c, 0),
1879   SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_c, 0),
1880   SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_c, 0),
1881 #endif
1882 };
1883 INSTANTIATE_TEST_SUITE_P(C, AvxSubpelAvgVarianceTest,
1884                          ::testing::ValuesIn(kArraySubpelAvgVariance_c));
1885 
1886 typedef TestParams<DistWtdSubpixAvgVarMxNFunc> DistWtdSubpelAvgVarianceParams;
1887 const DistWtdSubpelAvgVarianceParams kArrayDistWtdSubpelAvgVariance_c[] = {
1888   DistWtdSubpelAvgVarianceParams(
1889       6, 6, &aom_dist_wtd_sub_pixel_avg_variance64x64_c, 0),
1890   DistWtdSubpelAvgVarianceParams(
1891       6, 5, &aom_dist_wtd_sub_pixel_avg_variance64x32_c, 0),
1892   DistWtdSubpelAvgVarianceParams(
1893       5, 6, &aom_dist_wtd_sub_pixel_avg_variance32x64_c, 0),
1894   DistWtdSubpelAvgVarianceParams(
1895       5, 5, &aom_dist_wtd_sub_pixel_avg_variance32x32_c, 0),
1896   DistWtdSubpelAvgVarianceParams(
1897       5, 4, &aom_dist_wtd_sub_pixel_avg_variance32x16_c, 0),
1898   DistWtdSubpelAvgVarianceParams(
1899       4, 5, &aom_dist_wtd_sub_pixel_avg_variance16x32_c, 0),
1900   DistWtdSubpelAvgVarianceParams(
1901       4, 4, &aom_dist_wtd_sub_pixel_avg_variance16x16_c, 0),
1902   DistWtdSubpelAvgVarianceParams(4, 3,
1903                                  &aom_dist_wtd_sub_pixel_avg_variance16x8_c, 0),
1904   DistWtdSubpelAvgVarianceParams(3, 4,
1905                                  &aom_dist_wtd_sub_pixel_avg_variance8x16_c, 0),
1906   DistWtdSubpelAvgVarianceParams(3, 3,
1907                                  &aom_dist_wtd_sub_pixel_avg_variance8x8_c, 0),
1908   DistWtdSubpelAvgVarianceParams(3, 2,
1909                                  &aom_dist_wtd_sub_pixel_avg_variance8x4_c, 0),
1910   DistWtdSubpelAvgVarianceParams(2, 3,
1911                                  &aom_dist_wtd_sub_pixel_avg_variance4x8_c, 0),
1912   DistWtdSubpelAvgVarianceParams(2, 2,
1913                                  &aom_dist_wtd_sub_pixel_avg_variance4x4_c, 0),
1914 #if !CONFIG_REALTIME_ONLY
1915 
1916   DistWtdSubpelAvgVarianceParams(
1917       6, 4, &aom_dist_wtd_sub_pixel_avg_variance64x16_c, 0),
1918   DistWtdSubpelAvgVarianceParams(
1919       4, 6, &aom_dist_wtd_sub_pixel_avg_variance16x64_c, 0),
1920   DistWtdSubpelAvgVarianceParams(5, 3,
1921                                  &aom_dist_wtd_sub_pixel_avg_variance32x8_c, 0),
1922   DistWtdSubpelAvgVarianceParams(3, 5,
1923                                  &aom_dist_wtd_sub_pixel_avg_variance8x32_c, 0),
1924   DistWtdSubpelAvgVarianceParams(4, 2,
1925                                  &aom_dist_wtd_sub_pixel_avg_variance16x4_c, 0),
1926   DistWtdSubpelAvgVarianceParams(2, 4,
1927                                  &aom_dist_wtd_sub_pixel_avg_variance4x16_c, 0),
1928 #endif
1929 };
1930 INSTANTIATE_TEST_SUITE_P(C, AvxDistWtdSubpelAvgVarianceTest,
1931                          ::testing::ValuesIn(kArrayDistWtdSubpelAvgVariance_c));
1932 
1933 #if !CONFIG_REALTIME_ONLY
1934 INSTANTIATE_TEST_SUITE_P(
1935     C, AvxObmcSubpelVarianceTest,
1936     ::testing::Values(
1937         ObmcSubpelVarianceParams(7, 7, &aom_obmc_sub_pixel_variance128x128_c,
1938                                  0),
1939         ObmcSubpelVarianceParams(7, 6, &aom_obmc_sub_pixel_variance128x64_c, 0),
1940         ObmcSubpelVarianceParams(6, 7, &aom_obmc_sub_pixel_variance64x128_c, 0),
1941         ObmcSubpelVarianceParams(6, 6, &aom_obmc_sub_pixel_variance64x64_c, 0),
1942         ObmcSubpelVarianceParams(6, 5, &aom_obmc_sub_pixel_variance64x32_c, 0),
1943         ObmcSubpelVarianceParams(5, 6, &aom_obmc_sub_pixel_variance32x64_c, 0),
1944         ObmcSubpelVarianceParams(5, 5, &aom_obmc_sub_pixel_variance32x32_c, 0),
1945         ObmcSubpelVarianceParams(5, 4, &aom_obmc_sub_pixel_variance32x16_c, 0),
1946         ObmcSubpelVarianceParams(4, 5, &aom_obmc_sub_pixel_variance16x32_c, 0),
1947         ObmcSubpelVarianceParams(4, 4, &aom_obmc_sub_pixel_variance16x16_c, 0),
1948         ObmcSubpelVarianceParams(4, 3, &aom_obmc_sub_pixel_variance16x8_c, 0),
1949         ObmcSubpelVarianceParams(3, 4, &aom_obmc_sub_pixel_variance8x16_c, 0),
1950         ObmcSubpelVarianceParams(3, 3, &aom_obmc_sub_pixel_variance8x8_c, 0),
1951         ObmcSubpelVarianceParams(3, 2, &aom_obmc_sub_pixel_variance8x4_c, 0),
1952         ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_c, 0),
1953         ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_c, 0),
1954 
1955         ObmcSubpelVarianceParams(6, 4, &aom_obmc_sub_pixel_variance64x16_c, 0),
1956         ObmcSubpelVarianceParams(4, 6, &aom_obmc_sub_pixel_variance16x64_c, 0),
1957         ObmcSubpelVarianceParams(5, 3, &aom_obmc_sub_pixel_variance32x8_c, 0),
1958         ObmcSubpelVarianceParams(3, 5, &aom_obmc_sub_pixel_variance8x32_c, 0),
1959         ObmcSubpelVarianceParams(4, 2, &aom_obmc_sub_pixel_variance16x4_c, 0),
1960         ObmcSubpelVarianceParams(2, 4, &aom_obmc_sub_pixel_variance4x16_c, 0)));
1961 #endif
1962 
1963 #if CONFIG_AV1_HIGHBITDEPTH
1964 typedef uint64_t (*MseHBDWxH16bitFunc)(uint16_t *dst, int dstride,
1965                                        uint16_t *src, int sstride, int w,
1966                                        int h);
1967 
1968 template <typename FunctionType>
1969 class MseHBDWxHTestClass
1970     : public ::testing::TestWithParam<TestParams<FunctionType> > {
1971  public:
SetUp()1972   void SetUp() override {
1973     params_ = this->GetParam();
1974 
1975     rnd_.Reset(ACMRandom::DeterministicSeed());
1976     src_ = reinterpret_cast<uint16_t *>(
1977         aom_memalign(16, block_size() * sizeof(src_)));
1978     dst_ = reinterpret_cast<uint16_t *>(
1979         aom_memalign(16, block_size() * sizeof(dst_)));
1980     ASSERT_NE(src_, nullptr);
1981     ASSERT_NE(dst_, nullptr);
1982   }
1983 
TearDown()1984   void TearDown() override {
1985     aom_free(src_);
1986     aom_free(dst_);
1987     src_ = nullptr;
1988     dst_ = nullptr;
1989   }
1990 
1991  protected:
1992   void RefMatchTestMse();
1993   void SpeedTest();
1994 
1995  protected:
1996   ACMRandom rnd_;
1997   uint16_t *dst_;
1998   uint16_t *src_;
1999   TestParams<FunctionType> params_;
2000 
2001   // some relay helpers
block_size() const2002   int block_size() const { return params_.block_size; }
width() const2003   int width() const { return params_.width; }
d_stride() const2004   int d_stride() const { return params_.width; }  // stride is same as width
s_stride() const2005   int s_stride() const { return params_.width; }  // stride is same as width
height() const2006   int height() const { return params_.height; }
mask() const2007   int mask() const { return params_.mask; }
2008 };
2009 
2010 template <typename MseHBDWxHFunctionType>
SpeedTest()2011 void MseHBDWxHTestClass<MseHBDWxHFunctionType>::SpeedTest() {
2012   aom_usec_timer ref_timer, test_timer;
2013   double elapsed_time_c = 0;
2014   double elapsed_time_simd = 0;
2015   int run_time = 10000000;
2016   int w = width();
2017   int h = height();
2018   int dstride = d_stride();
2019   int sstride = s_stride();
2020   for (int k = 0; k < block_size(); ++k) {
2021     dst_[k] = rnd_.Rand16() & mask();
2022     src_[k] = rnd_.Rand16() & mask();
2023   }
2024   aom_usec_timer_start(&ref_timer);
2025   for (int i = 0; i < run_time; i++) {
2026     aom_mse_wxh_16bit_highbd_c(dst_, dstride, src_, sstride, w, h);
2027   }
2028   aom_usec_timer_mark(&ref_timer);
2029   elapsed_time_c = static_cast<double>(aom_usec_timer_elapsed(&ref_timer));
2030 
2031   aom_usec_timer_start(&test_timer);
2032   for (int i = 0; i < run_time; i++) {
2033     params_.func(dst_, dstride, src_, sstride, w, h);
2034   }
2035   aom_usec_timer_mark(&test_timer);
2036   elapsed_time_simd = static_cast<double>(aom_usec_timer_elapsed(&test_timer));
2037 
2038   printf("%dx%d\tc_time=%lf \t simd_time=%lf \t gain=%lf\n", width(), height(),
2039          elapsed_time_c, elapsed_time_simd,
2040          (elapsed_time_c / elapsed_time_simd));
2041 }
2042 
2043 template <typename MseHBDWxHFunctionType>
RefMatchTestMse()2044 void MseHBDWxHTestClass<MseHBDWxHFunctionType>::RefMatchTestMse() {
2045   uint64_t mse_ref = 0;
2046   uint64_t mse_mod = 0;
2047   int w = width();
2048   int h = height();
2049   int dstride = d_stride();
2050   int sstride = s_stride();
2051   for (int i = 0; i < 10; i++) {
2052     for (int k = 0; k < block_size(); ++k) {
2053       dst_[k] = rnd_.Rand16() & mask();
2054       src_[k] = rnd_.Rand16() & mask();
2055     }
2056     API_REGISTER_STATE_CHECK(mse_ref = aom_mse_wxh_16bit_highbd_c(
2057                                  dst_, dstride, src_, sstride, w, h));
2058     API_REGISTER_STATE_CHECK(
2059         mse_mod = params_.func(dst_, dstride, src_, sstride, w, h));
2060     EXPECT_EQ(mse_ref, mse_mod)
2061         << "ref mse: " << mse_ref << " mod mse: " << mse_mod;
2062   }
2063 }
2064 
2065 typedef TestParams<MseHBDWxH16bitFunc> MseHBDWxHParams;
2066 typedef MseHBDWxHTestClass<MseHBDWxH16bitFunc> MseHBDWxHTest;
2067 typedef MainTestClass<VarianceMxNFunc> AvxHBDMseTest;
2068 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AvxHBDMseTest);
2069 typedef MainTestClass<VarianceMxNFunc> AvxHBDVarianceTest;
2070 typedef SubpelVarianceTest<SubpixVarMxNFunc> AvxHBDSubpelVarianceTest;
2071 typedef SubpelVarianceTest<SubpixAvgVarMxNFunc> AvxHBDSubpelAvgVarianceTest;
2072 typedef SubpelVarianceTest<DistWtdSubpixAvgVarMxNFunc>
2073     AvxHBDDistWtdSubpelAvgVarianceTest;
2074 #if !CONFIG_REALTIME_ONLY
2075 typedef ObmcVarianceTest<ObmcSubpelVarFunc> AvxHBDObmcSubpelVarianceTest;
2076 #endif
2077 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AvxHBDObmcSubpelVarianceTest);
2078 
TEST_P(MseHBDWxHTest,RefMse)2079 TEST_P(MseHBDWxHTest, RefMse) { RefMatchTestMse(); }
TEST_P(MseHBDWxHTest,DISABLED_SpeedMse)2080 TEST_P(MseHBDWxHTest, DISABLED_SpeedMse) { SpeedTest(); }
TEST_P(AvxHBDMseTest,RefMse)2081 TEST_P(AvxHBDMseTest, RefMse) { RefTestMse(); }
TEST_P(AvxHBDMseTest,MaxMse)2082 TEST_P(AvxHBDMseTest, MaxMse) { MaxTestMse(); }
TEST_P(AvxHBDMseTest,DISABLED_SpeedMse)2083 TEST_P(AvxHBDMseTest, DISABLED_SpeedMse) { SpeedTest(); }
TEST_P(AvxHBDVarianceTest,Zero)2084 TEST_P(AvxHBDVarianceTest, Zero) { ZeroTest(); }
TEST_P(AvxHBDVarianceTest,Ref)2085 TEST_P(AvxHBDVarianceTest, Ref) { RefTest(); }
TEST_P(AvxHBDVarianceTest,RefStride)2086 TEST_P(AvxHBDVarianceTest, RefStride) { RefStrideTest(); }
TEST_P(AvxHBDVarianceTest,OneQuarter)2087 TEST_P(AvxHBDVarianceTest, OneQuarter) { OneQuarterTest(); }
TEST_P(AvxHBDVarianceTest,DISABLED_Speed)2088 TEST_P(AvxHBDVarianceTest, DISABLED_Speed) { SpeedTest(); }
TEST_P(AvxHBDSubpelVarianceTest,Ref)2089 TEST_P(AvxHBDSubpelVarianceTest, Ref) { RefTest(); }
TEST_P(AvxHBDSubpelVarianceTest,ExtremeRef)2090 TEST_P(AvxHBDSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
TEST_P(AvxHBDSubpelVarianceTest,DISABLED_Speed)2091 TEST_P(AvxHBDSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); }
TEST_P(AvxHBDSubpelAvgVarianceTest,Ref)2092 TEST_P(AvxHBDSubpelAvgVarianceTest, Ref) { RefTest(); }
TEST_P(AvxHBDDistWtdSubpelAvgVarianceTest,Ref)2093 TEST_P(AvxHBDDistWtdSubpelAvgVarianceTest, Ref) { RefTest(); }
2094 #if !CONFIG_REALTIME_ONLY
TEST_P(AvxHBDObmcSubpelVarianceTest,Ref)2095 TEST_P(AvxHBDObmcSubpelVarianceTest, Ref) { RefTest(); }
TEST_P(AvxHBDObmcSubpelVarianceTest,ExtremeRef)2096 TEST_P(AvxHBDObmcSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
TEST_P(AvxHBDObmcSubpelVarianceTest,DISABLED_Speed)2097 TEST_P(AvxHBDObmcSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); }
2098 #endif
2099 
2100 INSTANTIATE_TEST_SUITE_P(
2101     C, MseHBDWxHTest,
2102     ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_c, 10),
2103                       MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_c, 10),
2104                       MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_c, 10),
2105                       MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_c, 10)));
2106 
2107 INSTANTIATE_TEST_SUITE_P(
2108     C, AvxHBDMseTest,
2109     ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_c, 12),
2110                       MseParams(4, 3, &aom_highbd_12_mse16x8_c, 12),
2111                       MseParams(3, 4, &aom_highbd_12_mse8x16_c, 12),
2112                       MseParams(3, 3, &aom_highbd_12_mse8x8_c, 12),
2113                       MseParams(4, 4, &aom_highbd_10_mse16x16_c, 10),
2114                       MseParams(4, 3, &aom_highbd_10_mse16x8_c, 10),
2115                       MseParams(3, 4, &aom_highbd_10_mse8x16_c, 10),
2116                       MseParams(3, 3, &aom_highbd_10_mse8x8_c, 10),
2117                       MseParams(4, 4, &aom_highbd_8_mse16x16_c, 8),
2118                       MseParams(4, 3, &aom_highbd_8_mse16x8_c, 8),
2119                       MseParams(3, 4, &aom_highbd_8_mse8x16_c, 8),
2120                       MseParams(3, 3, &aom_highbd_8_mse8x8_c, 8)));
2121 
2122 #if HAVE_NEON
2123 INSTANTIATE_TEST_SUITE_P(
2124     NEON, MseHBDWxHTest,
2125     ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_neon, 10),
2126                       MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_neon, 10),
2127                       MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_neon, 10),
2128                       MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_neon,
2129                                       10)));
2130 
2131 INSTANTIATE_TEST_SUITE_P(
2132     NEON, AvxHBDMseTest,
2133     ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_neon, 12),
2134                       MseParams(4, 3, &aom_highbd_12_mse16x8_neon, 12),
2135                       MseParams(3, 4, &aom_highbd_12_mse8x16_neon, 12),
2136                       MseParams(3, 3, &aom_highbd_12_mse8x8_neon, 12),
2137                       MseParams(4, 4, &aom_highbd_10_mse16x16_neon, 10),
2138                       MseParams(4, 3, &aom_highbd_10_mse16x8_neon, 10),
2139                       MseParams(3, 4, &aom_highbd_10_mse8x16_neon, 10),
2140                       MseParams(3, 3, &aom_highbd_10_mse8x8_neon, 10),
2141                       MseParams(4, 4, &aom_highbd_8_mse16x16_neon, 8),
2142                       MseParams(4, 3, &aom_highbd_8_mse16x8_neon, 8),
2143                       MseParams(3, 4, &aom_highbd_8_mse8x16_neon, 8),
2144                       MseParams(3, 3, &aom_highbd_8_mse8x8_neon, 8)));
2145 #endif  // HAVE_NEON
2146 
2147 #if HAVE_NEON_DOTPROD
2148 INSTANTIATE_TEST_SUITE_P(
2149     NEON_DOTPROD, AvxHBDMseTest,
2150     ::testing::Values(MseParams(4, 4, &aom_highbd_8_mse16x16_neon_dotprod, 8),
2151                       MseParams(4, 3, &aom_highbd_8_mse16x8_neon_dotprod, 8),
2152                       MseParams(3, 4, &aom_highbd_8_mse8x16_neon_dotprod, 8),
2153                       MseParams(3, 3, &aom_highbd_8_mse8x8_neon_dotprod, 8)));
2154 #endif  // HAVE_NEON_DOTPROD
2155 
2156 #if HAVE_SVE
2157 INSTANTIATE_TEST_SUITE_P(
2158     SVE, MseHBDWxHTest,
2159     ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_sve, 10),
2160                       MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_sve, 10),
2161                       MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_sve, 10),
2162                       MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_sve,
2163                                       10)));
2164 
2165 INSTANTIATE_TEST_SUITE_P(
2166     SVE, AvxHBDMseTest,
2167     ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_sve, 12),
2168                       MseParams(4, 3, &aom_highbd_12_mse16x8_sve, 12),
2169                       MseParams(3, 4, &aom_highbd_12_mse8x16_sve, 12),
2170                       MseParams(3, 3, &aom_highbd_12_mse8x8_sve, 12),
2171                       MseParams(4, 4, &aom_highbd_10_mse16x16_sve, 10),
2172                       MseParams(4, 3, &aom_highbd_10_mse16x8_sve, 10),
2173                       MseParams(3, 4, &aom_highbd_10_mse8x16_sve, 10),
2174                       MseParams(3, 3, &aom_highbd_10_mse8x8_sve, 10)));
2175 #endif  // HAVE_SVE
2176 
2177 const VarianceParams kArrayHBDVariance_c[] = {
2178   VarianceParams(7, 7, &aom_highbd_12_variance128x128_c, 12),
2179   VarianceParams(7, 6, &aom_highbd_12_variance128x64_c, 12),
2180   VarianceParams(6, 7, &aom_highbd_12_variance64x128_c, 12),
2181   VarianceParams(6, 6, &aom_highbd_12_variance64x64_c, 12),
2182   VarianceParams(6, 5, &aom_highbd_12_variance64x32_c, 12),
2183   VarianceParams(5, 6, &aom_highbd_12_variance32x64_c, 12),
2184   VarianceParams(5, 5, &aom_highbd_12_variance32x32_c, 12),
2185   VarianceParams(5, 4, &aom_highbd_12_variance32x16_c, 12),
2186   VarianceParams(4, 5, &aom_highbd_12_variance16x32_c, 12),
2187   VarianceParams(4, 4, &aom_highbd_12_variance16x16_c, 12),
2188   VarianceParams(4, 3, &aom_highbd_12_variance16x8_c, 12),
2189   VarianceParams(3, 4, &aom_highbd_12_variance8x16_c, 12),
2190   VarianceParams(3, 3, &aom_highbd_12_variance8x8_c, 12),
2191   VarianceParams(3, 2, &aom_highbd_12_variance8x4_c, 12),
2192   VarianceParams(2, 3, &aom_highbd_12_variance4x8_c, 12),
2193   VarianceParams(2, 2, &aom_highbd_12_variance4x4_c, 12),
2194   VarianceParams(7, 7, &aom_highbd_10_variance128x128_c, 10),
2195   VarianceParams(7, 6, &aom_highbd_10_variance128x64_c, 10),
2196   VarianceParams(6, 7, &aom_highbd_10_variance64x128_c, 10),
2197   VarianceParams(6, 6, &aom_highbd_10_variance64x64_c, 10),
2198   VarianceParams(6, 5, &aom_highbd_10_variance64x32_c, 10),
2199   VarianceParams(5, 6, &aom_highbd_10_variance32x64_c, 10),
2200   VarianceParams(5, 5, &aom_highbd_10_variance32x32_c, 10),
2201   VarianceParams(5, 4, &aom_highbd_10_variance32x16_c, 10),
2202   VarianceParams(4, 5, &aom_highbd_10_variance16x32_c, 10),
2203   VarianceParams(4, 4, &aom_highbd_10_variance16x16_c, 10),
2204   VarianceParams(4, 3, &aom_highbd_10_variance16x8_c, 10),
2205   VarianceParams(3, 4, &aom_highbd_10_variance8x16_c, 10),
2206   VarianceParams(3, 3, &aom_highbd_10_variance8x8_c, 10),
2207   VarianceParams(3, 2, &aom_highbd_10_variance8x4_c, 10),
2208   VarianceParams(2, 3, &aom_highbd_10_variance4x8_c, 10),
2209   VarianceParams(2, 2, &aom_highbd_10_variance4x4_c, 10),
2210   VarianceParams(7, 7, &aom_highbd_8_variance128x128_c, 8),
2211   VarianceParams(7, 6, &aom_highbd_8_variance128x64_c, 8),
2212   VarianceParams(6, 7, &aom_highbd_8_variance64x128_c, 8),
2213   VarianceParams(6, 6, &aom_highbd_8_variance64x64_c, 8),
2214   VarianceParams(6, 5, &aom_highbd_8_variance64x32_c, 8),
2215   VarianceParams(5, 6, &aom_highbd_8_variance32x64_c, 8),
2216   VarianceParams(5, 5, &aom_highbd_8_variance32x32_c, 8),
2217   VarianceParams(5, 4, &aom_highbd_8_variance32x16_c, 8),
2218   VarianceParams(4, 5, &aom_highbd_8_variance16x32_c, 8),
2219   VarianceParams(4, 4, &aom_highbd_8_variance16x16_c, 8),
2220   VarianceParams(4, 3, &aom_highbd_8_variance16x8_c, 8),
2221   VarianceParams(3, 4, &aom_highbd_8_variance8x16_c, 8),
2222   VarianceParams(3, 3, &aom_highbd_8_variance8x8_c, 8),
2223   VarianceParams(3, 2, &aom_highbd_8_variance8x4_c, 8),
2224   VarianceParams(2, 3, &aom_highbd_8_variance4x8_c, 8),
2225   VarianceParams(2, 2, &aom_highbd_8_variance4x4_c, 8),
2226 #if !CONFIG_REALTIME_ONLY
2227   VarianceParams(6, 4, &aom_highbd_12_variance64x16_c, 12),
2228   VarianceParams(4, 6, &aom_highbd_12_variance16x64_c, 12),
2229   VarianceParams(5, 3, &aom_highbd_12_variance32x8_c, 12),
2230   VarianceParams(3, 5, &aom_highbd_12_variance8x32_c, 12),
2231   VarianceParams(4, 2, &aom_highbd_12_variance16x4_c, 12),
2232   VarianceParams(2, 4, &aom_highbd_12_variance4x16_c, 12),
2233   VarianceParams(6, 4, &aom_highbd_10_variance64x16_c, 10),
2234   VarianceParams(4, 6, &aom_highbd_10_variance16x64_c, 10),
2235   VarianceParams(5, 3, &aom_highbd_10_variance32x8_c, 10),
2236   VarianceParams(3, 5, &aom_highbd_10_variance8x32_c, 10),
2237   VarianceParams(4, 2, &aom_highbd_10_variance16x4_c, 10),
2238   VarianceParams(2, 4, &aom_highbd_10_variance4x16_c, 10),
2239   VarianceParams(6, 4, &aom_highbd_8_variance64x16_c, 8),
2240   VarianceParams(4, 6, &aom_highbd_8_variance16x64_c, 8),
2241   VarianceParams(5, 3, &aom_highbd_8_variance32x8_c, 8),
2242   VarianceParams(3, 5, &aom_highbd_8_variance8x32_c, 8),
2243   VarianceParams(4, 2, &aom_highbd_8_variance16x4_c, 8),
2244   VarianceParams(2, 4, &aom_highbd_8_variance4x16_c, 8),
2245 #endif
2246 };
2247 INSTANTIATE_TEST_SUITE_P(C, AvxHBDVarianceTest,
2248                          ::testing::ValuesIn(kArrayHBDVariance_c));
2249 
2250 #if HAVE_SSE4_1
2251 INSTANTIATE_TEST_SUITE_P(
2252     SSE4_1, AvxHBDVarianceTest,
2253     ::testing::Values(
2254         VarianceParams(2, 2, &aom_highbd_8_variance4x4_sse4_1, 8),
2255         VarianceParams(2, 2, &aom_highbd_10_variance4x4_sse4_1, 10),
2256         VarianceParams(2, 2, &aom_highbd_12_variance4x4_sse4_1, 12)));
2257 #endif  // HAVE_SSE4_1
2258 
2259 const SubpelVarianceParams kArrayHBDSubpelVariance_c[] = {
2260   SubpelVarianceParams(7, 7, &aom_highbd_8_sub_pixel_variance128x128_c, 8),
2261   SubpelVarianceParams(7, 6, &aom_highbd_8_sub_pixel_variance128x64_c, 8),
2262   SubpelVarianceParams(6, 7, &aom_highbd_8_sub_pixel_variance64x128_c, 8),
2263   SubpelVarianceParams(6, 6, &aom_highbd_8_sub_pixel_variance64x64_c, 8),
2264   SubpelVarianceParams(6, 5, &aom_highbd_8_sub_pixel_variance64x32_c, 8),
2265   SubpelVarianceParams(5, 6, &aom_highbd_8_sub_pixel_variance32x64_c, 8),
2266   SubpelVarianceParams(5, 5, &aom_highbd_8_sub_pixel_variance32x32_c, 8),
2267   SubpelVarianceParams(5, 4, &aom_highbd_8_sub_pixel_variance32x16_c, 8),
2268   SubpelVarianceParams(4, 5, &aom_highbd_8_sub_pixel_variance16x32_c, 8),
2269   SubpelVarianceParams(4, 4, &aom_highbd_8_sub_pixel_variance16x16_c, 8),
2270   SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_c, 8),
2271   SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_c, 8),
2272   SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_c, 8),
2273   SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_c, 8),
2274   SubpelVarianceParams(2, 3, &aom_highbd_8_sub_pixel_variance4x8_c, 8),
2275   SubpelVarianceParams(2, 2, &aom_highbd_8_sub_pixel_variance4x4_c, 8),
2276   SubpelVarianceParams(7, 7, &aom_highbd_10_sub_pixel_variance128x128_c, 10),
2277   SubpelVarianceParams(7, 6, &aom_highbd_10_sub_pixel_variance128x64_c, 10),
2278   SubpelVarianceParams(6, 7, &aom_highbd_10_sub_pixel_variance64x128_c, 10),
2279   SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_c, 10),
2280   SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_c, 10),
2281   SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_c, 10),
2282   SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_c, 10),
2283   SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_c, 10),
2284   SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_c, 10),
2285   SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_c, 10),
2286   SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_c, 10),
2287   SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_c, 10),
2288   SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_c, 10),
2289   SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_c, 10),
2290   SubpelVarianceParams(2, 3, &aom_highbd_10_sub_pixel_variance4x8_c, 10),
2291   SubpelVarianceParams(2, 2, &aom_highbd_10_sub_pixel_variance4x4_c, 10),
2292   SubpelVarianceParams(7, 7, &aom_highbd_12_sub_pixel_variance128x128_c, 12),
2293   SubpelVarianceParams(7, 6, &aom_highbd_12_sub_pixel_variance128x64_c, 12),
2294   SubpelVarianceParams(6, 7, &aom_highbd_12_sub_pixel_variance64x128_c, 12),
2295   SubpelVarianceParams(6, 6, &aom_highbd_12_sub_pixel_variance64x64_c, 12),
2296   SubpelVarianceParams(6, 5, &aom_highbd_12_sub_pixel_variance64x32_c, 12),
2297   SubpelVarianceParams(5, 6, &aom_highbd_12_sub_pixel_variance32x64_c, 12),
2298   SubpelVarianceParams(5, 5, &aom_highbd_12_sub_pixel_variance32x32_c, 12),
2299   SubpelVarianceParams(5, 4, &aom_highbd_12_sub_pixel_variance32x16_c, 12),
2300   SubpelVarianceParams(4, 5, &aom_highbd_12_sub_pixel_variance16x32_c, 12),
2301   SubpelVarianceParams(4, 4, &aom_highbd_12_sub_pixel_variance16x16_c, 12),
2302   SubpelVarianceParams(4, 3, &aom_highbd_12_sub_pixel_variance16x8_c, 12),
2303   SubpelVarianceParams(3, 4, &aom_highbd_12_sub_pixel_variance8x16_c, 12),
2304   SubpelVarianceParams(3, 3, &aom_highbd_12_sub_pixel_variance8x8_c, 12),
2305   SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_c, 12),
2306   SubpelVarianceParams(2, 3, &aom_highbd_12_sub_pixel_variance4x8_c, 12),
2307   SubpelVarianceParams(2, 2, &aom_highbd_12_sub_pixel_variance4x4_c, 12),
2308 #if !CONFIG_REALTIME_ONLY
2309   SubpelVarianceParams(6, 4, &aom_highbd_8_sub_pixel_variance64x16_c, 8),
2310   SubpelVarianceParams(4, 6, &aom_highbd_8_sub_pixel_variance16x64_c, 8),
2311   SubpelVarianceParams(5, 3, &aom_highbd_8_sub_pixel_variance32x8_c, 8),
2312   SubpelVarianceParams(3, 5, &aom_highbd_8_sub_pixel_variance8x32_c, 8),
2313   SubpelVarianceParams(4, 2, &aom_highbd_8_sub_pixel_variance16x4_c, 8),
2314   SubpelVarianceParams(2, 4, &aom_highbd_8_sub_pixel_variance4x16_c, 8),
2315   SubpelVarianceParams(6, 4, &aom_highbd_10_sub_pixel_variance64x16_c, 10),
2316   SubpelVarianceParams(4, 6, &aom_highbd_10_sub_pixel_variance16x64_c, 10),
2317   SubpelVarianceParams(5, 3, &aom_highbd_10_sub_pixel_variance32x8_c, 10),
2318   SubpelVarianceParams(3, 5, &aom_highbd_10_sub_pixel_variance8x32_c, 10),
2319   SubpelVarianceParams(4, 2, &aom_highbd_10_sub_pixel_variance16x4_c, 10),
2320   SubpelVarianceParams(2, 4, &aom_highbd_10_sub_pixel_variance4x16_c, 10),
2321   SubpelVarianceParams(6, 4, &aom_highbd_12_sub_pixel_variance64x16_c, 12),
2322   SubpelVarianceParams(4, 6, &aom_highbd_12_sub_pixel_variance16x64_c, 12),
2323   SubpelVarianceParams(5, 3, &aom_highbd_12_sub_pixel_variance32x8_c, 12),
2324   SubpelVarianceParams(3, 5, &aom_highbd_12_sub_pixel_variance8x32_c, 12),
2325   SubpelVarianceParams(4, 2, &aom_highbd_12_sub_pixel_variance16x4_c, 12),
2326   SubpelVarianceParams(2, 4, &aom_highbd_12_sub_pixel_variance4x16_c, 12),
2327 #endif
2328 };
2329 INSTANTIATE_TEST_SUITE_P(C, AvxHBDSubpelVarianceTest,
2330                          ::testing::ValuesIn(kArrayHBDSubpelVariance_c));
2331 
2332 const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_c[] = {
2333   SubpelAvgVarianceParams(7, 7, &aom_highbd_8_sub_pixel_avg_variance128x128_c,
2334                           8),
2335   SubpelAvgVarianceParams(7, 6, &aom_highbd_8_sub_pixel_avg_variance128x64_c,
2336                           8),
2337   SubpelAvgVarianceParams(6, 7, &aom_highbd_8_sub_pixel_avg_variance64x128_c,
2338                           8),
2339   SubpelAvgVarianceParams(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_c, 8),
2340   SubpelAvgVarianceParams(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_c, 8),
2341   SubpelAvgVarianceParams(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_c, 8),
2342   SubpelAvgVarianceParams(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_c, 8),
2343   SubpelAvgVarianceParams(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_c, 8),
2344   SubpelAvgVarianceParams(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_c, 8),
2345   SubpelAvgVarianceParams(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_c, 8),
2346   SubpelAvgVarianceParams(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_c, 8),
2347   SubpelAvgVarianceParams(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_c, 8),
2348   SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_c, 8),
2349   SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_c, 8),
2350   SubpelAvgVarianceParams(2, 3, &aom_highbd_8_sub_pixel_avg_variance4x8_c, 8),
2351   SubpelAvgVarianceParams(2, 2, &aom_highbd_8_sub_pixel_avg_variance4x4_c, 8),
2352   SubpelAvgVarianceParams(7, 7, &aom_highbd_10_sub_pixel_avg_variance128x128_c,
2353                           10),
2354   SubpelAvgVarianceParams(7, 6, &aom_highbd_10_sub_pixel_avg_variance128x64_c,
2355                           10),
2356   SubpelAvgVarianceParams(6, 7, &aom_highbd_10_sub_pixel_avg_variance64x128_c,
2357                           10),
2358   SubpelAvgVarianceParams(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_c,
2359                           10),
2360   SubpelAvgVarianceParams(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_c,
2361                           10),
2362   SubpelAvgVarianceParams(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_c,
2363                           10),
2364   SubpelAvgVarianceParams(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_c,
2365                           10),
2366   SubpelAvgVarianceParams(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_c,
2367                           10),
2368   SubpelAvgVarianceParams(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_c,
2369                           10),
2370   SubpelAvgVarianceParams(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_c,
2371                           10),
2372   SubpelAvgVarianceParams(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_c,
2373                           10),
2374   SubpelAvgVarianceParams(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_c,
2375                           10),
2376   SubpelAvgVarianceParams(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_c, 10),
2377   SubpelAvgVarianceParams(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_c, 10),
2378   SubpelAvgVarianceParams(2, 3, &aom_highbd_10_sub_pixel_avg_variance4x8_c, 10),
2379   SubpelAvgVarianceParams(2, 2, &aom_highbd_10_sub_pixel_avg_variance4x4_c, 10),
2380   SubpelAvgVarianceParams(7, 7, &aom_highbd_12_sub_pixel_avg_variance128x128_c,
2381                           12),
2382   SubpelAvgVarianceParams(7, 6, &aom_highbd_12_sub_pixel_avg_variance128x64_c,
2383                           12),
2384   SubpelAvgVarianceParams(6, 7, &aom_highbd_12_sub_pixel_avg_variance64x128_c,
2385                           12),
2386   SubpelAvgVarianceParams(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_c,
2387                           12),
2388   SubpelAvgVarianceParams(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_c,
2389                           12),
2390   SubpelAvgVarianceParams(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_c,
2391                           12),
2392   SubpelAvgVarianceParams(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_c,
2393                           12),
2394   SubpelAvgVarianceParams(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_c,
2395                           12),
2396   SubpelAvgVarianceParams(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_c,
2397                           12),
2398   SubpelAvgVarianceParams(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_c,
2399                           12),
2400   SubpelAvgVarianceParams(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_c,
2401                           12),
2402   SubpelAvgVarianceParams(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_c,
2403                           12),
2404   SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_c, 12),
2405   SubpelAvgVarianceParams(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_c, 12),
2406   SubpelAvgVarianceParams(2, 3, &aom_highbd_12_sub_pixel_avg_variance4x8_c, 12),
2407   SubpelAvgVarianceParams(2, 2, &aom_highbd_12_sub_pixel_avg_variance4x4_c, 12),
2408 
2409 #if !CONFIG_REALTIME_ONLY
2410   SubpelAvgVarianceParams(6, 4, &aom_highbd_8_sub_pixel_avg_variance64x16_c, 8),
2411   SubpelAvgVarianceParams(4, 6, &aom_highbd_8_sub_pixel_avg_variance16x64_c, 8),
2412   SubpelAvgVarianceParams(5, 3, &aom_highbd_8_sub_pixel_avg_variance32x8_c, 8),
2413   SubpelAvgVarianceParams(3, 5, &aom_highbd_8_sub_pixel_avg_variance8x32_c, 8),
2414   SubpelAvgVarianceParams(4, 2, &aom_highbd_8_sub_pixel_avg_variance16x4_c, 8),
2415   SubpelAvgVarianceParams(2, 4, &aom_highbd_8_sub_pixel_avg_variance4x16_c, 8),
2416   SubpelAvgVarianceParams(6, 4, &aom_highbd_10_sub_pixel_avg_variance64x16_c,
2417                           10),
2418   SubpelAvgVarianceParams(4, 6, &aom_highbd_10_sub_pixel_avg_variance16x64_c,
2419                           10),
2420   SubpelAvgVarianceParams(5, 3, &aom_highbd_10_sub_pixel_avg_variance32x8_c,
2421                           10),
2422   SubpelAvgVarianceParams(3, 5, &aom_highbd_10_sub_pixel_avg_variance8x32_c,
2423                           10),
2424   SubpelAvgVarianceParams(4, 2, &aom_highbd_10_sub_pixel_avg_variance16x4_c,
2425                           10),
2426   SubpelAvgVarianceParams(2, 4, &aom_highbd_10_sub_pixel_avg_variance4x16_c,
2427                           10),
2428   SubpelAvgVarianceParams(6, 4, &aom_highbd_12_sub_pixel_avg_variance64x16_c,
2429                           12),
2430   SubpelAvgVarianceParams(4, 6, &aom_highbd_12_sub_pixel_avg_variance16x64_c,
2431                           12),
2432   SubpelAvgVarianceParams(5, 3, &aom_highbd_12_sub_pixel_avg_variance32x8_c,
2433                           12),
2434   SubpelAvgVarianceParams(3, 5, &aom_highbd_12_sub_pixel_avg_variance8x32_c,
2435                           12),
2436   SubpelAvgVarianceParams(4, 2, &aom_highbd_12_sub_pixel_avg_variance16x4_c,
2437                           12),
2438   SubpelAvgVarianceParams(2, 4, &aom_highbd_12_sub_pixel_avg_variance4x16_c,
2439                           12),
2440 #endif
2441 };
2442 INSTANTIATE_TEST_SUITE_P(C, AvxHBDSubpelAvgVarianceTest,
2443                          ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_c));
2444 
2445 const DistWtdSubpelAvgVarianceParams kArrayHBDDistWtdSubpelAvgVariance_c[] = {
2446   DistWtdSubpelAvgVarianceParams(
2447       7, 7, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance128x128_c, 8),
2448   DistWtdSubpelAvgVarianceParams(
2449       7, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance128x64_c, 8),
2450   DistWtdSubpelAvgVarianceParams(
2451       6, 7, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x128_c, 8),
2452   DistWtdSubpelAvgVarianceParams(
2453       6, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x64_c, 8),
2454   DistWtdSubpelAvgVarianceParams(
2455       6, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x32_c, 8),
2456   DistWtdSubpelAvgVarianceParams(
2457       5, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x64_c, 8),
2458   DistWtdSubpelAvgVarianceParams(
2459       5, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x32_c, 8),
2460   DistWtdSubpelAvgVarianceParams(
2461       5, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x16_c, 8),
2462   DistWtdSubpelAvgVarianceParams(
2463       4, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x32_c, 8),
2464   DistWtdSubpelAvgVarianceParams(
2465       4, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x16_c, 8),
2466   DistWtdSubpelAvgVarianceParams(
2467       4, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x8_c, 8),
2468   DistWtdSubpelAvgVarianceParams(
2469       3, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x16_c, 8),
2470   DistWtdSubpelAvgVarianceParams(
2471       3, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x8_c, 8),
2472   DistWtdSubpelAvgVarianceParams(
2473       3, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x4_c, 8),
2474   DistWtdSubpelAvgVarianceParams(
2475       2, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x8_c, 8),
2476   DistWtdSubpelAvgVarianceParams(
2477       2, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x4_c, 8),
2478   DistWtdSubpelAvgVarianceParams(
2479       7, 7, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance128x128_c, 10),
2480   DistWtdSubpelAvgVarianceParams(
2481       7, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance128x64_c, 10),
2482   DistWtdSubpelAvgVarianceParams(
2483       6, 7, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x128_c, 10),
2484   DistWtdSubpelAvgVarianceParams(
2485       6, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x64_c, 10),
2486   DistWtdSubpelAvgVarianceParams(
2487       6, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x32_c, 10),
2488   DistWtdSubpelAvgVarianceParams(
2489       5, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x64_c, 10),
2490   DistWtdSubpelAvgVarianceParams(
2491       5, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x32_c, 10),
2492   DistWtdSubpelAvgVarianceParams(
2493       5, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x16_c, 10),
2494   DistWtdSubpelAvgVarianceParams(
2495       4, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x32_c, 10),
2496   DistWtdSubpelAvgVarianceParams(
2497       4, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x16_c, 10),
2498   DistWtdSubpelAvgVarianceParams(
2499       4, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x8_c, 10),
2500   DistWtdSubpelAvgVarianceParams(
2501       3, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x16_c, 10),
2502   DistWtdSubpelAvgVarianceParams(
2503       3, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x8_c, 10),
2504   DistWtdSubpelAvgVarianceParams(
2505       3, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x4_c, 10),
2506   DistWtdSubpelAvgVarianceParams(
2507       2, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x8_c, 10),
2508   DistWtdSubpelAvgVarianceParams(
2509       2, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x4_c, 10),
2510   DistWtdSubpelAvgVarianceParams(
2511       7, 7, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance128x128_c, 12),
2512   DistWtdSubpelAvgVarianceParams(
2513       7, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance128x64_c, 12),
2514   DistWtdSubpelAvgVarianceParams(
2515       6, 7, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x128_c, 12),
2516   DistWtdSubpelAvgVarianceParams(
2517       6, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x64_c, 12),
2518   DistWtdSubpelAvgVarianceParams(
2519       6, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x32_c, 12),
2520   DistWtdSubpelAvgVarianceParams(
2521       5, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x64_c, 12),
2522   DistWtdSubpelAvgVarianceParams(
2523       5, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x32_c, 12),
2524   DistWtdSubpelAvgVarianceParams(
2525       5, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x16_c, 12),
2526   DistWtdSubpelAvgVarianceParams(
2527       4, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x32_c, 12),
2528   DistWtdSubpelAvgVarianceParams(
2529       4, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x16_c, 12),
2530   DistWtdSubpelAvgVarianceParams(
2531       4, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x8_c, 12),
2532   DistWtdSubpelAvgVarianceParams(
2533       3, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x16_c, 12),
2534   DistWtdSubpelAvgVarianceParams(
2535       3, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x8_c, 12),
2536   DistWtdSubpelAvgVarianceParams(
2537       3, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x4_c, 12),
2538   DistWtdSubpelAvgVarianceParams(
2539       2, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x8_c, 12),
2540   DistWtdSubpelAvgVarianceParams(
2541       2, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x4_c, 12),
2542 
2543 #if !CONFIG_REALTIME_ONLY
2544   DistWtdSubpelAvgVarianceParams(
2545       6, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x16_c, 8),
2546   DistWtdSubpelAvgVarianceParams(
2547       4, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x64_c, 8),
2548   DistWtdSubpelAvgVarianceParams(
2549       5, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x8_c, 8),
2550   DistWtdSubpelAvgVarianceParams(
2551       3, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x32_c, 8),
2552   DistWtdSubpelAvgVarianceParams(
2553       4, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x4_c, 8),
2554   DistWtdSubpelAvgVarianceParams(
2555       2, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x16_c, 8),
2556   DistWtdSubpelAvgVarianceParams(
2557       6, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x16_c, 10),
2558   DistWtdSubpelAvgVarianceParams(
2559       4, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x64_c, 10),
2560   DistWtdSubpelAvgVarianceParams(
2561       5, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x8_c, 10),
2562   DistWtdSubpelAvgVarianceParams(
2563       3, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x32_c, 10),
2564   DistWtdSubpelAvgVarianceParams(
2565       4, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x4_c, 10),
2566   DistWtdSubpelAvgVarianceParams(
2567       2, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x16_c, 10),
2568   DistWtdSubpelAvgVarianceParams(
2569       6, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x16_c, 12),
2570   DistWtdSubpelAvgVarianceParams(
2571       4, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x64_c, 12),
2572   DistWtdSubpelAvgVarianceParams(
2573       5, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x8_c, 12),
2574   DistWtdSubpelAvgVarianceParams(
2575       3, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x32_c, 12),
2576   DistWtdSubpelAvgVarianceParams(
2577       4, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x4_c, 12),
2578   DistWtdSubpelAvgVarianceParams(
2579       2, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x16_c, 12),
2580 #endif
2581 };
2582 INSTANTIATE_TEST_SUITE_P(
2583     C, AvxHBDDistWtdSubpelAvgVarianceTest,
2584     ::testing::ValuesIn(kArrayHBDDistWtdSubpelAvgVariance_c));
2585 
2586 #if !CONFIG_REALTIME_ONLY
2587 const ObmcSubpelVarianceParams kArrayHBDObmcSubpelVariance_c[] = {
2588   ObmcSubpelVarianceParams(7, 7, &aom_highbd_8_obmc_sub_pixel_variance128x128_c,
2589                            8),
2590   ObmcSubpelVarianceParams(7, 6, &aom_highbd_8_obmc_sub_pixel_variance128x64_c,
2591                            8),
2592   ObmcSubpelVarianceParams(6, 7, &aom_highbd_8_obmc_sub_pixel_variance64x128_c,
2593                            8),
2594   ObmcSubpelVarianceParams(6, 6, &aom_highbd_8_obmc_sub_pixel_variance64x64_c,
2595                            8),
2596   ObmcSubpelVarianceParams(6, 5, &aom_highbd_8_obmc_sub_pixel_variance64x32_c,
2597                            8),
2598   ObmcSubpelVarianceParams(5, 6, &aom_highbd_8_obmc_sub_pixel_variance32x64_c,
2599                            8),
2600   ObmcSubpelVarianceParams(5, 5, &aom_highbd_8_obmc_sub_pixel_variance32x32_c,
2601                            8),
2602   ObmcSubpelVarianceParams(5, 4, &aom_highbd_8_obmc_sub_pixel_variance32x16_c,
2603                            8),
2604   ObmcSubpelVarianceParams(4, 5, &aom_highbd_8_obmc_sub_pixel_variance16x32_c,
2605                            8),
2606   ObmcSubpelVarianceParams(4, 4, &aom_highbd_8_obmc_sub_pixel_variance16x16_c,
2607                            8),
2608   ObmcSubpelVarianceParams(4, 3, &aom_highbd_8_obmc_sub_pixel_variance16x8_c,
2609                            8),
2610   ObmcSubpelVarianceParams(3, 4, &aom_highbd_8_obmc_sub_pixel_variance8x16_c,
2611                            8),
2612   ObmcSubpelVarianceParams(3, 3, &aom_highbd_8_obmc_sub_pixel_variance8x8_c, 8),
2613   ObmcSubpelVarianceParams(3, 2, &aom_highbd_8_obmc_sub_pixel_variance8x4_c, 8),
2614   ObmcSubpelVarianceParams(2, 3, &aom_highbd_8_obmc_sub_pixel_variance4x8_c, 8),
2615   ObmcSubpelVarianceParams(2, 2, &aom_highbd_8_obmc_sub_pixel_variance4x4_c, 8),
2616   ObmcSubpelVarianceParams(7, 7,
2617                            &aom_highbd_10_obmc_sub_pixel_variance128x128_c, 10),
2618   ObmcSubpelVarianceParams(7, 6, &aom_highbd_10_obmc_sub_pixel_variance128x64_c,
2619                            10),
2620   ObmcSubpelVarianceParams(6, 7, &aom_highbd_10_obmc_sub_pixel_variance64x128_c,
2621                            10),
2622   ObmcSubpelVarianceParams(6, 6, &aom_highbd_10_obmc_sub_pixel_variance64x64_c,
2623                            10),
2624   ObmcSubpelVarianceParams(6, 5, &aom_highbd_10_obmc_sub_pixel_variance64x32_c,
2625                            10),
2626   ObmcSubpelVarianceParams(5, 6, &aom_highbd_10_obmc_sub_pixel_variance32x64_c,
2627                            10),
2628   ObmcSubpelVarianceParams(5, 5, &aom_highbd_10_obmc_sub_pixel_variance32x32_c,
2629                            10),
2630   ObmcSubpelVarianceParams(5, 4, &aom_highbd_10_obmc_sub_pixel_variance32x16_c,
2631                            10),
2632   ObmcSubpelVarianceParams(4, 5, &aom_highbd_10_obmc_sub_pixel_variance16x32_c,
2633                            10),
2634   ObmcSubpelVarianceParams(4, 4, &aom_highbd_10_obmc_sub_pixel_variance16x16_c,
2635                            10),
2636   ObmcSubpelVarianceParams(4, 3, &aom_highbd_10_obmc_sub_pixel_variance16x8_c,
2637                            10),
2638   ObmcSubpelVarianceParams(3, 4, &aom_highbd_10_obmc_sub_pixel_variance8x16_c,
2639                            10),
2640   ObmcSubpelVarianceParams(3, 3, &aom_highbd_10_obmc_sub_pixel_variance8x8_c,
2641                            10),
2642   ObmcSubpelVarianceParams(3, 2, &aom_highbd_10_obmc_sub_pixel_variance8x4_c,
2643                            10),
2644   ObmcSubpelVarianceParams(2, 3, &aom_highbd_10_obmc_sub_pixel_variance4x8_c,
2645                            10),
2646   ObmcSubpelVarianceParams(2, 2, &aom_highbd_10_obmc_sub_pixel_variance4x4_c,
2647                            10),
2648   ObmcSubpelVarianceParams(7, 7,
2649                            &aom_highbd_12_obmc_sub_pixel_variance128x128_c, 12),
2650   ObmcSubpelVarianceParams(7, 6, &aom_highbd_12_obmc_sub_pixel_variance128x64_c,
2651                            12),
2652   ObmcSubpelVarianceParams(6, 7, &aom_highbd_12_obmc_sub_pixel_variance64x128_c,
2653                            12),
2654   ObmcSubpelVarianceParams(6, 6, &aom_highbd_12_obmc_sub_pixel_variance64x64_c,
2655                            12),
2656   ObmcSubpelVarianceParams(6, 5, &aom_highbd_12_obmc_sub_pixel_variance64x32_c,
2657                            12),
2658   ObmcSubpelVarianceParams(5, 6, &aom_highbd_12_obmc_sub_pixel_variance32x64_c,
2659                            12),
2660   ObmcSubpelVarianceParams(5, 5, &aom_highbd_12_obmc_sub_pixel_variance32x32_c,
2661                            12),
2662   ObmcSubpelVarianceParams(5, 4, &aom_highbd_12_obmc_sub_pixel_variance32x16_c,
2663                            12),
2664   ObmcSubpelVarianceParams(4, 5, &aom_highbd_12_obmc_sub_pixel_variance16x32_c,
2665                            12),
2666   ObmcSubpelVarianceParams(4, 4, &aom_highbd_12_obmc_sub_pixel_variance16x16_c,
2667                            12),
2668   ObmcSubpelVarianceParams(4, 3, &aom_highbd_12_obmc_sub_pixel_variance16x8_c,
2669                            12),
2670   ObmcSubpelVarianceParams(3, 4, &aom_highbd_12_obmc_sub_pixel_variance8x16_c,
2671                            12),
2672   ObmcSubpelVarianceParams(3, 3, &aom_highbd_12_obmc_sub_pixel_variance8x8_c,
2673                            12),
2674   ObmcSubpelVarianceParams(3, 2, &aom_highbd_12_obmc_sub_pixel_variance8x4_c,
2675                            12),
2676   ObmcSubpelVarianceParams(2, 3, &aom_highbd_12_obmc_sub_pixel_variance4x8_c,
2677                            12),
2678   ObmcSubpelVarianceParams(2, 2, &aom_highbd_12_obmc_sub_pixel_variance4x4_c,
2679                            12),
2680 
2681   ObmcSubpelVarianceParams(6, 4, &aom_highbd_8_obmc_sub_pixel_variance64x16_c,
2682                            8),
2683   ObmcSubpelVarianceParams(4, 6, &aom_highbd_8_obmc_sub_pixel_variance16x64_c,
2684                            8),
2685   ObmcSubpelVarianceParams(5, 3, &aom_highbd_8_obmc_sub_pixel_variance32x8_c,
2686                            8),
2687   ObmcSubpelVarianceParams(3, 5, &aom_highbd_8_obmc_sub_pixel_variance8x32_c,
2688                            8),
2689   ObmcSubpelVarianceParams(4, 2, &aom_highbd_8_obmc_sub_pixel_variance16x4_c,
2690                            8),
2691   ObmcSubpelVarianceParams(2, 4, &aom_highbd_8_obmc_sub_pixel_variance4x16_c,
2692                            8),
2693   ObmcSubpelVarianceParams(6, 4, &aom_highbd_10_obmc_sub_pixel_variance64x16_c,
2694                            10),
2695   ObmcSubpelVarianceParams(4, 6, &aom_highbd_10_obmc_sub_pixel_variance16x64_c,
2696                            10),
2697   ObmcSubpelVarianceParams(5, 3, &aom_highbd_10_obmc_sub_pixel_variance32x8_c,
2698                            10),
2699   ObmcSubpelVarianceParams(3, 5, &aom_highbd_10_obmc_sub_pixel_variance8x32_c,
2700                            10),
2701   ObmcSubpelVarianceParams(4, 2, &aom_highbd_10_obmc_sub_pixel_variance16x4_c,
2702                            10),
2703   ObmcSubpelVarianceParams(2, 4, &aom_highbd_10_obmc_sub_pixel_variance4x16_c,
2704                            10),
2705   ObmcSubpelVarianceParams(6, 4, &aom_highbd_12_obmc_sub_pixel_variance64x16_c,
2706                            12),
2707   ObmcSubpelVarianceParams(4, 6, &aom_highbd_12_obmc_sub_pixel_variance16x64_c,
2708                            12),
2709   ObmcSubpelVarianceParams(5, 3, &aom_highbd_12_obmc_sub_pixel_variance32x8_c,
2710                            12),
2711   ObmcSubpelVarianceParams(3, 5, &aom_highbd_12_obmc_sub_pixel_variance8x32_c,
2712                            12),
2713   ObmcSubpelVarianceParams(4, 2, &aom_highbd_12_obmc_sub_pixel_variance16x4_c,
2714                            12),
2715   ObmcSubpelVarianceParams(2, 4, &aom_highbd_12_obmc_sub_pixel_variance4x16_c,
2716                            12),
2717 };
2718 INSTANTIATE_TEST_SUITE_P(C, AvxHBDObmcSubpelVarianceTest,
2719                          ::testing::ValuesIn(kArrayHBDObmcSubpelVariance_c));
2720 #endif  // !CONFIG_REALTIME_ONLY
2721 #endif  // CONFIG_AV1_HIGHBITDEPTH
2722 
2723 #if HAVE_SSE2
2724 INSTANTIATE_TEST_SUITE_P(
2725     SSE2, MseWxHTest,
2726     ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_sse2, 8),
2727                       MseWxHParams(3, 2, &aom_mse_wxh_16bit_sse2, 8),
2728                       MseWxHParams(2, 3, &aom_mse_wxh_16bit_sse2, 8),
2729                       MseWxHParams(2, 2, &aom_mse_wxh_16bit_sse2, 8)));
2730 
2731 INSTANTIATE_TEST_SUITE_P(
2732     SSE2, Mse16xHTest,
2733     ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_sse2, 8),
2734                       Mse16xHParams(3, 2, &aom_mse_16xh_16bit_sse2, 8),
2735                       Mse16xHParams(2, 3, &aom_mse_16xh_16bit_sse2, 8),
2736                       Mse16xHParams(2, 2, &aom_mse_16xh_16bit_sse2, 8)));
2737 
2738 #if !CONFIG_REALTIME_ONLY
2739 INSTANTIATE_TEST_SUITE_P(SSE2, SumOfSquaresTest,
2740                          ::testing::Values(aom_get_mb_ss_sse2));
2741 #endif  // !CONFIG_REALTIME_ONLY
2742 
2743 INSTANTIATE_TEST_SUITE_P(SSE2, AvxMseTest,
2744                          ::testing::Values(MseParams(4, 4, &aom_mse16x16_sse2),
2745                                            MseParams(4, 3, &aom_mse16x8_sse2),
2746                                            MseParams(3, 4, &aom_mse8x16_sse2),
2747                                            MseParams(3, 3, &aom_mse8x8_sse2)));
2748 
2749 const VarianceParams kArrayVariance_sse2[] = {
2750   VarianceParams(7, 7, &aom_variance128x128_sse2),
2751   VarianceParams(7, 6, &aom_variance128x64_sse2),
2752   VarianceParams(6, 7, &aom_variance64x128_sse2),
2753   VarianceParams(6, 6, &aom_variance64x64_sse2),
2754   VarianceParams(6, 5, &aom_variance64x32_sse2),
2755   VarianceParams(5, 6, &aom_variance32x64_sse2),
2756   VarianceParams(5, 5, &aom_variance32x32_sse2),
2757   VarianceParams(5, 4, &aom_variance32x16_sse2),
2758   VarianceParams(4, 5, &aom_variance16x32_sse2),
2759   VarianceParams(4, 4, &aom_variance16x16_sse2),
2760   VarianceParams(4, 3, &aom_variance16x8_sse2),
2761   VarianceParams(3, 4, &aom_variance8x16_sse2),
2762   VarianceParams(3, 3, &aom_variance8x8_sse2),
2763   VarianceParams(3, 2, &aom_variance8x4_sse2),
2764   VarianceParams(2, 3, &aom_variance4x8_sse2),
2765   VarianceParams(2, 2, &aom_variance4x4_sse2),
2766 #if !CONFIG_REALTIME_ONLY
2767   VarianceParams(6, 4, &aom_variance64x16_sse2),
2768   VarianceParams(5, 3, &aom_variance32x8_sse2),
2769   VarianceParams(4, 6, &aom_variance16x64_sse2),
2770   VarianceParams(4, 2, &aom_variance16x4_sse2),
2771   VarianceParams(3, 5, &aom_variance8x32_sse2),
2772   VarianceParams(2, 4, &aom_variance4x16_sse2),
2773 #endif
2774 };
2775 INSTANTIATE_TEST_SUITE_P(SSE2, AvxVarianceTest,
2776                          ::testing::ValuesIn(kArrayVariance_sse2));
2777 
2778 const GetSseSumParams kArrayGetSseSum8x8Quad_sse2[] = {
2779   GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_sse2, 0),
2780   GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_sse2, 0),
2781   GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_sse2, 0),
2782   GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_sse2, 0)
2783 };
2784 INSTANTIATE_TEST_SUITE_P(SSE2, GetSseSum8x8QuadTest,
2785                          ::testing::ValuesIn(kArrayGetSseSum8x8Quad_sse2));
2786 
2787 const GetSseSumParamsDual kArrayGetSseSum16x16Dual_sse2[] = {
2788   GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_sse2, 0),
2789   GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_sse2, 0),
2790   GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_sse2, 0),
2791   GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_sse2, 0)
2792 };
2793 INSTANTIATE_TEST_SUITE_P(SSE2, GetSseSum16x16DualTest,
2794                          ::testing::ValuesIn(kArrayGetSseSum16x16Dual_sse2));
2795 
2796 #if CONFIG_AV1_HIGHBITDEPTH
2797 #if HAVE_SSE2
2798 INSTANTIATE_TEST_SUITE_P(
2799     SSE2, MseHBDWxHTest,
2800     ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_sse2, 10),
2801                       MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_sse2, 10),
2802                       MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_sse2, 10),
2803                       MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_sse2,
2804                                       10)));
2805 
2806 INSTANTIATE_TEST_SUITE_P(
2807     SSE2, AvxHBDMseTest,
2808     ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_sse2, 12),
2809                       MseParams(3, 3, &aom_highbd_12_mse8x8_sse2, 12),
2810                       MseParams(4, 4, &aom_highbd_10_mse16x16_sse2, 10),
2811                       MseParams(3, 3, &aom_highbd_10_mse8x8_sse2, 10),
2812                       MseParams(4, 4, &aom_highbd_8_mse16x16_sse2, 8),
2813                       MseParams(3, 3, &aom_highbd_8_mse8x8_sse2, 8)));
2814 #endif  // HAVE_SSE2
2815 #if HAVE_SSE4_1
2816 INSTANTIATE_TEST_SUITE_P(
2817     SSE4_1, AvxSubpelVarianceTest,
2818     ::testing::Values(
2819         SubpelVarianceParams(2, 2, &aom_highbd_8_sub_pixel_variance4x4_sse4_1,
2820                              8),
2821         SubpelVarianceParams(2, 2, &aom_highbd_10_sub_pixel_variance4x4_sse4_1,
2822                              10),
2823         SubpelVarianceParams(2, 2, &aom_highbd_12_sub_pixel_variance4x4_sse4_1,
2824                              12)));
2825 
2826 INSTANTIATE_TEST_SUITE_P(
2827     SSE4_1, AvxSubpelAvgVarianceTest,
2828     ::testing::Values(
2829         SubpelAvgVarianceParams(2, 2,
2830                                 &aom_highbd_8_sub_pixel_avg_variance4x4_sse4_1,
2831                                 8),
2832         SubpelAvgVarianceParams(2, 2,
2833                                 &aom_highbd_10_sub_pixel_avg_variance4x4_sse4_1,
2834                                 10),
2835         SubpelAvgVarianceParams(2, 2,
2836                                 &aom_highbd_12_sub_pixel_avg_variance4x4_sse4_1,
2837                                 12)));
2838 #endif  // HAVE_SSE4_1
2839 
2840 #if HAVE_AVX2
2841 INSTANTIATE_TEST_SUITE_P(
2842     AVX2, AvxHBDMseTest,
2843     ::testing::Values(MseParams(4, 4, &aom_highbd_10_mse16x16_avx2, 10)));
2844 #endif  // HAVE_AVX2
2845 
2846 const VarianceParams kArrayHBDVariance_sse2[] = {
2847   VarianceParams(7, 7, &aom_highbd_12_variance128x128_sse2, 12),
2848   VarianceParams(7, 6, &aom_highbd_12_variance128x64_sse2, 12),
2849   VarianceParams(6, 7, &aom_highbd_12_variance64x128_sse2, 12),
2850   VarianceParams(6, 6, &aom_highbd_12_variance64x64_sse2, 12),
2851   VarianceParams(6, 5, &aom_highbd_12_variance64x32_sse2, 12),
2852   VarianceParams(5, 6, &aom_highbd_12_variance32x64_sse2, 12),
2853   VarianceParams(5, 5, &aom_highbd_12_variance32x32_sse2, 12),
2854   VarianceParams(5, 4, &aom_highbd_12_variance32x16_sse2, 12),
2855   VarianceParams(4, 5, &aom_highbd_12_variance16x32_sse2, 12),
2856   VarianceParams(4, 4, &aom_highbd_12_variance16x16_sse2, 12),
2857   VarianceParams(4, 3, &aom_highbd_12_variance16x8_sse2, 12),
2858   VarianceParams(3, 4, &aom_highbd_12_variance8x16_sse2, 12),
2859   VarianceParams(3, 3, &aom_highbd_12_variance8x8_sse2, 12),
2860   VarianceParams(7, 7, &aom_highbd_10_variance128x128_sse2, 10),
2861   VarianceParams(7, 6, &aom_highbd_10_variance128x64_sse2, 10),
2862   VarianceParams(6, 7, &aom_highbd_10_variance64x128_sse2, 10),
2863   VarianceParams(6, 6, &aom_highbd_10_variance64x64_sse2, 10),
2864   VarianceParams(6, 5, &aom_highbd_10_variance64x32_sse2, 10),
2865   VarianceParams(5, 6, &aom_highbd_10_variance32x64_sse2, 10),
2866   VarianceParams(5, 5, &aom_highbd_10_variance32x32_sse2, 10),
2867   VarianceParams(5, 4, &aom_highbd_10_variance32x16_sse2, 10),
2868   VarianceParams(4, 5, &aom_highbd_10_variance16x32_sse2, 10),
2869   VarianceParams(4, 4, &aom_highbd_10_variance16x16_sse2, 10),
2870   VarianceParams(4, 3, &aom_highbd_10_variance16x8_sse2, 10),
2871   VarianceParams(3, 4, &aom_highbd_10_variance8x16_sse2, 10),
2872   VarianceParams(3, 3, &aom_highbd_10_variance8x8_sse2, 10),
2873   VarianceParams(7, 7, &aom_highbd_8_variance128x128_sse2, 8),
2874   VarianceParams(7, 6, &aom_highbd_8_variance128x64_sse2, 8),
2875   VarianceParams(6, 7, &aom_highbd_8_variance64x128_sse2, 8),
2876   VarianceParams(6, 6, &aom_highbd_8_variance64x64_sse2, 8),
2877   VarianceParams(6, 5, &aom_highbd_8_variance64x32_sse2, 8),
2878   VarianceParams(5, 6, &aom_highbd_8_variance32x64_sse2, 8),
2879   VarianceParams(5, 5, &aom_highbd_8_variance32x32_sse2, 8),
2880   VarianceParams(5, 4, &aom_highbd_8_variance32x16_sse2, 8),
2881   VarianceParams(4, 5, &aom_highbd_8_variance16x32_sse2, 8),
2882   VarianceParams(4, 4, &aom_highbd_8_variance16x16_sse2, 8),
2883   VarianceParams(4, 3, &aom_highbd_8_variance16x8_sse2, 8),
2884   VarianceParams(3, 4, &aom_highbd_8_variance8x16_sse2, 8),
2885   VarianceParams(3, 3, &aom_highbd_8_variance8x8_sse2, 8),
2886 #if !CONFIG_REALTIME_ONLY
2887   VarianceParams(6, 4, &aom_highbd_12_variance64x16_sse2, 12),
2888   VarianceParams(4, 6, &aom_highbd_12_variance16x64_sse2, 12),
2889   VarianceParams(5, 3, &aom_highbd_12_variance32x8_sse2, 12),
2890   VarianceParams(3, 5, &aom_highbd_12_variance8x32_sse2, 12),
2891   // VarianceParams(4, 2, &aom_highbd_12_variance16x4_sse2, 12),
2892   // VarianceParams(2, 4, &aom_highbd_12_variance4x16_sse2, 12),
2893   VarianceParams(6, 4, &aom_highbd_10_variance64x16_sse2, 10),
2894   VarianceParams(4, 6, &aom_highbd_10_variance16x64_sse2, 10),
2895   VarianceParams(5, 3, &aom_highbd_10_variance32x8_sse2, 10),
2896   VarianceParams(3, 5, &aom_highbd_10_variance8x32_sse2, 10),
2897   // VarianceParams(4, 2, &aom_highbd_10_variance16x4_sse2, 10),
2898   // VarianceParams(2, 4, &aom_highbd_10_variance4x16_sse2, 10),
2899   VarianceParams(6, 4, &aom_highbd_8_variance64x16_sse2, 8),
2900   VarianceParams(4, 6, &aom_highbd_8_variance16x64_sse2, 8),
2901   VarianceParams(5, 3, &aom_highbd_8_variance32x8_sse2, 8),
2902   VarianceParams(3, 5, &aom_highbd_8_variance8x32_sse2, 8),
2903 // VarianceParams(4, 2, &aom_highbd_8_variance16x4_sse2, 8),
2904 // VarianceParams(2, 4, &aom_highbd_8_variance4x16_sse2, 8),
2905 #endif
2906 };
2907 INSTANTIATE_TEST_SUITE_P(SSE2, AvxHBDVarianceTest,
2908                          ::testing::ValuesIn(kArrayHBDVariance_sse2));
2909 
2910 #if HAVE_AVX2
2911 
2912 INSTANTIATE_TEST_SUITE_P(
2913     AVX2, MseHBDWxHTest,
2914     ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_avx2, 10),
2915                       MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_avx2, 10),
2916                       MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_avx2, 10),
2917                       MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_avx2,
2918                                       10)));
2919 
2920 const VarianceParams kArrayHBDVariance_avx2[] = {
2921   VarianceParams(7, 7, &aom_highbd_10_variance128x128_avx2, 10),
2922   VarianceParams(7, 6, &aom_highbd_10_variance128x64_avx2, 10),
2923   VarianceParams(6, 7, &aom_highbd_10_variance64x128_avx2, 10),
2924   VarianceParams(6, 6, &aom_highbd_10_variance64x64_avx2, 10),
2925   VarianceParams(6, 5, &aom_highbd_10_variance64x32_avx2, 10),
2926   VarianceParams(5, 6, &aom_highbd_10_variance32x64_avx2, 10),
2927   VarianceParams(5, 5, &aom_highbd_10_variance32x32_avx2, 10),
2928   VarianceParams(5, 4, &aom_highbd_10_variance32x16_avx2, 10),
2929   VarianceParams(4, 5, &aom_highbd_10_variance16x32_avx2, 10),
2930   VarianceParams(4, 4, &aom_highbd_10_variance16x16_avx2, 10),
2931   VarianceParams(4, 3, &aom_highbd_10_variance16x8_avx2, 10),
2932   VarianceParams(3, 4, &aom_highbd_10_variance8x16_avx2, 10),
2933   VarianceParams(3, 3, &aom_highbd_10_variance8x8_avx2, 10),
2934 #if !CONFIG_REALTIME_ONLY
2935   VarianceParams(6, 4, &aom_highbd_10_variance64x16_avx2, 10),
2936   VarianceParams(5, 3, &aom_highbd_10_variance32x8_avx2, 10),
2937   VarianceParams(4, 6, &aom_highbd_10_variance16x64_avx2, 10),
2938   VarianceParams(3, 5, &aom_highbd_10_variance8x32_avx2, 10),
2939 #endif
2940 };
2941 
2942 INSTANTIATE_TEST_SUITE_P(AVX2, AvxHBDVarianceTest,
2943                          ::testing::ValuesIn(kArrayHBDVariance_avx2));
2944 
2945 const SubpelVarianceParams kArrayHBDSubpelVariance_avx2[] = {
2946   SubpelVarianceParams(7, 7, &aom_highbd_10_sub_pixel_variance128x128_avx2, 10),
2947   SubpelVarianceParams(7, 6, &aom_highbd_10_sub_pixel_variance128x64_avx2, 10),
2948   SubpelVarianceParams(6, 7, &aom_highbd_10_sub_pixel_variance64x128_avx2, 10),
2949   SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_avx2, 10),
2950   SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_avx2, 10),
2951   SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_avx2, 10),
2952   SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_avx2, 10),
2953   SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_avx2, 10),
2954   SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_avx2, 10),
2955   SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_avx2, 10),
2956   SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_avx2, 10),
2957   SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_avx2, 10),
2958   SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_avx2, 10),
2959 };
2960 
2961 INSTANTIATE_TEST_SUITE_P(AVX2, AvxHBDSubpelVarianceTest,
2962                          ::testing::ValuesIn(kArrayHBDSubpelVariance_avx2));
2963 #endif  // HAVE_AVX2
2964 
2965 const SubpelVarianceParams kArrayHBDSubpelVariance_sse2[] = {
2966   SubpelVarianceParams(7, 7, &aom_highbd_12_sub_pixel_variance128x128_sse2, 12),
2967   SubpelVarianceParams(7, 6, &aom_highbd_12_sub_pixel_variance128x64_sse2, 12),
2968   SubpelVarianceParams(6, 7, &aom_highbd_12_sub_pixel_variance64x128_sse2, 12),
2969   SubpelVarianceParams(6, 6, &aom_highbd_12_sub_pixel_variance64x64_sse2, 12),
2970   SubpelVarianceParams(6, 5, &aom_highbd_12_sub_pixel_variance64x32_sse2, 12),
2971   SubpelVarianceParams(5, 6, &aom_highbd_12_sub_pixel_variance32x64_sse2, 12),
2972   SubpelVarianceParams(5, 5, &aom_highbd_12_sub_pixel_variance32x32_sse2, 12),
2973   SubpelVarianceParams(5, 4, &aom_highbd_12_sub_pixel_variance32x16_sse2, 12),
2974   SubpelVarianceParams(4, 5, &aom_highbd_12_sub_pixel_variance16x32_sse2, 12),
2975   SubpelVarianceParams(4, 4, &aom_highbd_12_sub_pixel_variance16x16_sse2, 12),
2976   SubpelVarianceParams(4, 3, &aom_highbd_12_sub_pixel_variance16x8_sse2, 12),
2977   SubpelVarianceParams(3, 4, &aom_highbd_12_sub_pixel_variance8x16_sse2, 12),
2978   SubpelVarianceParams(3, 3, &aom_highbd_12_sub_pixel_variance8x8_sse2, 12),
2979   SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_sse2, 12),
2980   SubpelVarianceParams(7, 7, &aom_highbd_10_sub_pixel_variance128x128_sse2, 10),
2981   SubpelVarianceParams(7, 6, &aom_highbd_10_sub_pixel_variance128x64_sse2, 10),
2982   SubpelVarianceParams(6, 7, &aom_highbd_10_sub_pixel_variance64x128_sse2, 10),
2983   SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_sse2, 10),
2984   SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_sse2, 10),
2985   SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_sse2, 10),
2986   SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_sse2, 10),
2987   SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_sse2, 10),
2988   SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_sse2, 10),
2989   SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_sse2, 10),
2990   SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_sse2, 10),
2991   SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_sse2, 10),
2992   SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_sse2, 10),
2993   SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_sse2, 10),
2994   SubpelVarianceParams(7, 7, &aom_highbd_8_sub_pixel_variance128x128_sse2, 8),
2995   SubpelVarianceParams(7, 6, &aom_highbd_8_sub_pixel_variance128x64_sse2, 8),
2996   SubpelVarianceParams(6, 7, &aom_highbd_8_sub_pixel_variance64x128_sse2, 8),
2997   SubpelVarianceParams(6, 6, &aom_highbd_8_sub_pixel_variance64x64_sse2, 8),
2998   SubpelVarianceParams(6, 5, &aom_highbd_8_sub_pixel_variance64x32_sse2, 8),
2999   SubpelVarianceParams(5, 6, &aom_highbd_8_sub_pixel_variance32x64_sse2, 8),
3000   SubpelVarianceParams(5, 5, &aom_highbd_8_sub_pixel_variance32x32_sse2, 8),
3001   SubpelVarianceParams(5, 4, &aom_highbd_8_sub_pixel_variance32x16_sse2, 8),
3002   SubpelVarianceParams(4, 5, &aom_highbd_8_sub_pixel_variance16x32_sse2, 8),
3003   SubpelVarianceParams(4, 4, &aom_highbd_8_sub_pixel_variance16x16_sse2, 8),
3004   SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_sse2, 8),
3005   SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_sse2, 8),
3006   SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_sse2, 8),
3007   SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_sse2, 8),
3008 #if !CONFIG_REALTIME_ONLY
3009   SubpelVarianceParams(6, 4, &aom_highbd_12_sub_pixel_variance64x16_sse2, 12),
3010   SubpelVarianceParams(4, 6, &aom_highbd_12_sub_pixel_variance16x64_sse2, 12),
3011   SubpelVarianceParams(5, 3, &aom_highbd_12_sub_pixel_variance32x8_sse2, 12),
3012   SubpelVarianceParams(3, 5, &aom_highbd_12_sub_pixel_variance8x32_sse2, 12),
3013   SubpelVarianceParams(4, 2, &aom_highbd_12_sub_pixel_variance16x4_sse2, 12),
3014   // SubpelVarianceParams(2, 4, &aom_highbd_12_sub_pixel_variance4x16_sse2, 12),
3015   SubpelVarianceParams(6, 4, &aom_highbd_10_sub_pixel_variance64x16_sse2, 10),
3016   SubpelVarianceParams(4, 6, &aom_highbd_10_sub_pixel_variance16x64_sse2, 10),
3017   SubpelVarianceParams(5, 3, &aom_highbd_10_sub_pixel_variance32x8_sse2, 10),
3018   SubpelVarianceParams(3, 5, &aom_highbd_10_sub_pixel_variance8x32_sse2, 10),
3019   SubpelVarianceParams(4, 2, &aom_highbd_10_sub_pixel_variance16x4_sse2, 10),
3020   // SubpelVarianceParams(2, 4, &aom_highbd_10_sub_pixel_variance4x16_sse2, 10),
3021   SubpelVarianceParams(6, 4, &aom_highbd_8_sub_pixel_variance64x16_sse2, 8),
3022   SubpelVarianceParams(4, 6, &aom_highbd_8_sub_pixel_variance16x64_sse2, 8),
3023   SubpelVarianceParams(5, 3, &aom_highbd_8_sub_pixel_variance32x8_sse2, 8),
3024   SubpelVarianceParams(3, 5, &aom_highbd_8_sub_pixel_variance8x32_sse2, 8),
3025   SubpelVarianceParams(4, 2, &aom_highbd_8_sub_pixel_variance16x4_sse2, 8),
3026 // SubpelVarianceParams(2, 4, &aom_highbd_8_sub_pixel_variance4x16_sse2, 8),
3027 #endif
3028 };
3029 INSTANTIATE_TEST_SUITE_P(SSE2, AvxHBDSubpelVarianceTest,
3030                          ::testing::ValuesIn(kArrayHBDSubpelVariance_sse2));
3031 
3032 const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_sse2[] = {
3033   SubpelAvgVarianceParams(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_sse2,
3034                           12),
3035   SubpelAvgVarianceParams(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_sse2,
3036                           12),
3037   SubpelAvgVarianceParams(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_sse2,
3038                           12),
3039   SubpelAvgVarianceParams(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_sse2,
3040                           12),
3041   SubpelAvgVarianceParams(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_sse2,
3042                           12),
3043   SubpelAvgVarianceParams(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_sse2,
3044                           12),
3045   SubpelAvgVarianceParams(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_sse2,
3046                           12),
3047   SubpelAvgVarianceParams(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_sse2,
3048                           12),
3049   SubpelAvgVarianceParams(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_sse2,
3050                           12),
3051   SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_sse2,
3052                           12),
3053   SubpelAvgVarianceParams(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_sse2,
3054                           12),
3055   SubpelAvgVarianceParams(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_sse2,
3056                           10),
3057   SubpelAvgVarianceParams(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_sse2,
3058                           10),
3059   SubpelAvgVarianceParams(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_sse2,
3060                           10),
3061   SubpelAvgVarianceParams(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_sse2,
3062                           10),
3063   SubpelAvgVarianceParams(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_sse2,
3064                           10),
3065   SubpelAvgVarianceParams(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_sse2,
3066                           10),
3067   SubpelAvgVarianceParams(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_sse2,
3068                           10),
3069   SubpelAvgVarianceParams(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_sse2,
3070                           10),
3071   SubpelAvgVarianceParams(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_sse2,
3072                           10),
3073   SubpelAvgVarianceParams(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_sse2,
3074                           10),
3075   SubpelAvgVarianceParams(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_sse2,
3076                           10),
3077   SubpelAvgVarianceParams(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_sse2,
3078                           8),
3079   SubpelAvgVarianceParams(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_sse2,
3080                           8),
3081   SubpelAvgVarianceParams(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_sse2,
3082                           8),
3083   SubpelAvgVarianceParams(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_sse2,
3084                           8),
3085   SubpelAvgVarianceParams(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_sse2,
3086                           8),
3087   SubpelAvgVarianceParams(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_sse2,
3088                           8),
3089   SubpelAvgVarianceParams(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_sse2,
3090                           8),
3091   SubpelAvgVarianceParams(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_sse2,
3092                           8),
3093   SubpelAvgVarianceParams(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_sse2,
3094                           8),
3095   SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_sse2,
3096                           8),
3097   SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_sse2,
3098                           8),
3099 
3100 #if !CONFIG_REALTIME_ONLY
3101   SubpelAvgVarianceParams(6, 4, &aom_highbd_12_sub_pixel_avg_variance64x16_sse2,
3102                           12),
3103   SubpelAvgVarianceParams(4, 6, &aom_highbd_12_sub_pixel_avg_variance16x64_sse2,
3104                           12),
3105   SubpelAvgVarianceParams(5, 3, &aom_highbd_12_sub_pixel_avg_variance32x8_sse2,
3106                           12),
3107   SubpelAvgVarianceParams(3, 5, &aom_highbd_12_sub_pixel_avg_variance8x32_sse2,
3108                           12),
3109   SubpelAvgVarianceParams(4, 2, &aom_highbd_12_sub_pixel_avg_variance16x4_sse2,
3110                           12),
3111   // SubpelAvgVarianceParams(2, 4,
3112   // &aom_highbd_12_sub_pixel_avg_variance4x16_sse2, 12),
3113   SubpelAvgVarianceParams(6, 4, &aom_highbd_10_sub_pixel_avg_variance64x16_sse2,
3114                           10),
3115   SubpelAvgVarianceParams(4, 6, &aom_highbd_10_sub_pixel_avg_variance16x64_sse2,
3116                           10),
3117   SubpelAvgVarianceParams(5, 3, &aom_highbd_10_sub_pixel_avg_variance32x8_sse2,
3118                           10),
3119   SubpelAvgVarianceParams(3, 5, &aom_highbd_10_sub_pixel_avg_variance8x32_sse2,
3120                           10),
3121   SubpelAvgVarianceParams(4, 2, &aom_highbd_10_sub_pixel_avg_variance16x4_sse2,
3122                           10),
3123   // SubpelAvgVarianceParams(2, 4,
3124   // &aom_highbd_10_sub_pixel_avg_variance4x16_sse2, 10),
3125   SubpelAvgVarianceParams(6, 4, &aom_highbd_8_sub_pixel_avg_variance64x16_sse2,
3126                           8),
3127   SubpelAvgVarianceParams(4, 6, &aom_highbd_8_sub_pixel_avg_variance16x64_sse2,
3128                           8),
3129   SubpelAvgVarianceParams(5, 3, &aom_highbd_8_sub_pixel_avg_variance32x8_sse2,
3130                           8),
3131   SubpelAvgVarianceParams(3, 5, &aom_highbd_8_sub_pixel_avg_variance8x32_sse2,
3132                           8),
3133   SubpelAvgVarianceParams(4, 2, &aom_highbd_8_sub_pixel_avg_variance16x4_sse2,
3134                           8),
3135 // SubpelAvgVarianceParams(2, 4,
3136 // &aom_highbd_8_sub_pixel_avg_variance4x16_sse2, 8),
3137 #endif
3138 };
3139 
3140 INSTANTIATE_TEST_SUITE_P(SSE2, AvxHBDSubpelAvgVarianceTest,
3141                          ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_sse2));
3142 #endif  // HAVE_SSE2
3143 #endif  // CONFIG_AV1_HIGHBITDEPTH
3144 
3145 #if HAVE_SSSE3
3146 const SubpelVarianceParams kArraySubpelVariance_ssse3[] = {
3147   SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_ssse3, 0),
3148   SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_ssse3, 0),
3149   SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_ssse3, 0),
3150   SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_ssse3, 0),
3151   SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_ssse3, 0),
3152   SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_ssse3, 0),
3153   SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_ssse3, 0),
3154   SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_ssse3, 0),
3155   SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_ssse3, 0),
3156   SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_ssse3, 0),
3157   SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_ssse3, 0),
3158   SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_ssse3, 0),
3159   SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_ssse3, 0),
3160   SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_ssse3, 0),
3161   SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_ssse3, 0),
3162   SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_ssse3, 0),
3163 #if !CONFIG_REALTIME_ONLY
3164   SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_ssse3, 0),
3165   SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_ssse3, 0),
3166   SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_ssse3, 0),
3167   SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_ssse3, 0),
3168   SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_ssse3, 0),
3169   SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_ssse3, 0),
3170 #endif
3171 };
3172 INSTANTIATE_TEST_SUITE_P(SSSE3, AvxSubpelVarianceTest,
3173                          ::testing::ValuesIn(kArraySubpelVariance_ssse3));
3174 
3175 const SubpelAvgVarianceParams kArraySubpelAvgVariance_ssse3[] = {
3176   SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_ssse3, 0),
3177   SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_ssse3, 0),
3178   SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_ssse3, 0),
3179   SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_ssse3, 0),
3180   SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_ssse3, 0),
3181   SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_ssse3, 0),
3182   SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_ssse3, 0),
3183   SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_ssse3, 0),
3184   SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_ssse3, 0),
3185   SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_ssse3, 0),
3186   SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_ssse3, 0),
3187   SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_ssse3, 0),
3188   SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_ssse3, 0),
3189   SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_ssse3, 0),
3190   SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_ssse3, 0),
3191   SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_ssse3, 0),
3192 #if !CONFIG_REALTIME_ONLY
3193   SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_ssse3, 0),
3194   SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_ssse3, 0),
3195   SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_ssse3, 0),
3196   SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_ssse3, 0),
3197   SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_ssse3, 0),
3198   SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_ssse3, 0),
3199 #endif
3200 };
3201 INSTANTIATE_TEST_SUITE_P(SSSE3, AvxSubpelAvgVarianceTest,
3202                          ::testing::ValuesIn(kArraySubpelAvgVariance_ssse3));
3203 
3204 const DistWtdSubpelAvgVarianceParams kArrayDistWtdSubpelAvgVariance_ssse3[] = {
3205   DistWtdSubpelAvgVarianceParams(
3206       7, 7, &aom_dist_wtd_sub_pixel_avg_variance128x128_ssse3, 0),
3207   DistWtdSubpelAvgVarianceParams(
3208       7, 6, &aom_dist_wtd_sub_pixel_avg_variance128x64_ssse3, 0),
3209   DistWtdSubpelAvgVarianceParams(
3210       6, 7, &aom_dist_wtd_sub_pixel_avg_variance64x128_ssse3, 0),
3211   DistWtdSubpelAvgVarianceParams(
3212       6, 6, &aom_dist_wtd_sub_pixel_avg_variance64x64_ssse3, 0),
3213   DistWtdSubpelAvgVarianceParams(
3214       6, 5, &aom_dist_wtd_sub_pixel_avg_variance64x32_ssse3, 0),
3215   DistWtdSubpelAvgVarianceParams(
3216       5, 6, &aom_dist_wtd_sub_pixel_avg_variance32x64_ssse3, 0),
3217   DistWtdSubpelAvgVarianceParams(
3218       5, 5, &aom_dist_wtd_sub_pixel_avg_variance32x32_ssse3, 0),
3219   DistWtdSubpelAvgVarianceParams(
3220       5, 4, &aom_dist_wtd_sub_pixel_avg_variance32x16_ssse3, 0),
3221   DistWtdSubpelAvgVarianceParams(
3222       4, 5, &aom_dist_wtd_sub_pixel_avg_variance16x32_ssse3, 0),
3223   DistWtdSubpelAvgVarianceParams(
3224       4, 4, &aom_dist_wtd_sub_pixel_avg_variance16x16_ssse3, 0),
3225   DistWtdSubpelAvgVarianceParams(
3226       4, 3, &aom_dist_wtd_sub_pixel_avg_variance16x8_ssse3, 0),
3227   DistWtdSubpelAvgVarianceParams(
3228       3, 4, &aom_dist_wtd_sub_pixel_avg_variance8x16_ssse3, 0),
3229   DistWtdSubpelAvgVarianceParams(
3230       3, 3, &aom_dist_wtd_sub_pixel_avg_variance8x8_ssse3, 0),
3231   DistWtdSubpelAvgVarianceParams(
3232       3, 2, &aom_dist_wtd_sub_pixel_avg_variance8x4_ssse3, 0),
3233   DistWtdSubpelAvgVarianceParams(
3234       2, 3, &aom_dist_wtd_sub_pixel_avg_variance4x8_ssse3, 0),
3235   DistWtdSubpelAvgVarianceParams(
3236       2, 2, &aom_dist_wtd_sub_pixel_avg_variance4x4_ssse3, 0),
3237 #if !CONFIG_REALTIME_ONLY
3238   DistWtdSubpelAvgVarianceParams(
3239       6, 4, &aom_dist_wtd_sub_pixel_avg_variance64x16_ssse3, 0),
3240   DistWtdSubpelAvgVarianceParams(
3241       4, 6, &aom_dist_wtd_sub_pixel_avg_variance16x64_ssse3, 0),
3242   DistWtdSubpelAvgVarianceParams(
3243       5, 3, &aom_dist_wtd_sub_pixel_avg_variance32x8_ssse3, 0),
3244   DistWtdSubpelAvgVarianceParams(
3245       3, 5, &aom_dist_wtd_sub_pixel_avg_variance8x32_ssse3, 0),
3246   DistWtdSubpelAvgVarianceParams(
3247       4, 2, &aom_dist_wtd_sub_pixel_avg_variance16x4_ssse3, 0),
3248   DistWtdSubpelAvgVarianceParams(
3249       2, 4, &aom_dist_wtd_sub_pixel_avg_variance4x16_ssse3, 0),
3250 #endif
3251 };
3252 INSTANTIATE_TEST_SUITE_P(
3253     SSSE3, AvxDistWtdSubpelAvgVarianceTest,
3254     ::testing::ValuesIn(kArrayDistWtdSubpelAvgVariance_ssse3));
3255 #endif  // HAVE_SSSE3
3256 
3257 #if HAVE_SSE4_1
3258 #if !CONFIG_REALTIME_ONLY
3259 INSTANTIATE_TEST_SUITE_P(
3260     SSE4_1, AvxObmcSubpelVarianceTest,
3261     ::testing::Values(
3262         ObmcSubpelVarianceParams(7, 7,
3263                                  &aom_obmc_sub_pixel_variance128x128_sse4_1, 0),
3264         ObmcSubpelVarianceParams(7, 6,
3265                                  &aom_obmc_sub_pixel_variance128x64_sse4_1, 0),
3266         ObmcSubpelVarianceParams(6, 7,
3267                                  &aom_obmc_sub_pixel_variance64x128_sse4_1, 0),
3268         ObmcSubpelVarianceParams(6, 6, &aom_obmc_sub_pixel_variance64x64_sse4_1,
3269                                  0),
3270         ObmcSubpelVarianceParams(6, 5, &aom_obmc_sub_pixel_variance64x32_sse4_1,
3271                                  0),
3272         ObmcSubpelVarianceParams(5, 6, &aom_obmc_sub_pixel_variance32x64_sse4_1,
3273                                  0),
3274         ObmcSubpelVarianceParams(5, 5, &aom_obmc_sub_pixel_variance32x32_sse4_1,
3275                                  0),
3276         ObmcSubpelVarianceParams(5, 4, &aom_obmc_sub_pixel_variance32x16_sse4_1,
3277                                  0),
3278         ObmcSubpelVarianceParams(4, 5, &aom_obmc_sub_pixel_variance16x32_sse4_1,
3279                                  0),
3280         ObmcSubpelVarianceParams(4, 4, &aom_obmc_sub_pixel_variance16x16_sse4_1,
3281                                  0),
3282         ObmcSubpelVarianceParams(4, 3, &aom_obmc_sub_pixel_variance16x8_sse4_1,
3283                                  0),
3284         ObmcSubpelVarianceParams(3, 4, &aom_obmc_sub_pixel_variance8x16_sse4_1,
3285                                  0),
3286         ObmcSubpelVarianceParams(3, 3, &aom_obmc_sub_pixel_variance8x8_sse4_1,
3287                                  0),
3288         ObmcSubpelVarianceParams(3, 2, &aom_obmc_sub_pixel_variance8x4_sse4_1,
3289                                  0),
3290         ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_sse4_1,
3291                                  0),
3292         ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_sse4_1,
3293                                  0),
3294         ObmcSubpelVarianceParams(6, 4, &aom_obmc_sub_pixel_variance64x16_sse4_1,
3295                                  0),
3296         ObmcSubpelVarianceParams(4, 6, &aom_obmc_sub_pixel_variance16x64_sse4_1,
3297                                  0),
3298         ObmcSubpelVarianceParams(5, 3, &aom_obmc_sub_pixel_variance32x8_sse4_1,
3299                                  0),
3300         ObmcSubpelVarianceParams(3, 5, &aom_obmc_sub_pixel_variance8x32_sse4_1,
3301                                  0),
3302         ObmcSubpelVarianceParams(4, 2, &aom_obmc_sub_pixel_variance16x4_sse4_1,
3303                                  0),
3304         ObmcSubpelVarianceParams(2, 4, &aom_obmc_sub_pixel_variance4x16_sse4_1,
3305                                  0)));
3306 #endif
3307 #endif  // HAVE_SSE4_1
3308 
3309 #if HAVE_AVX2
3310 
3311 INSTANTIATE_TEST_SUITE_P(
3312     AVX2, MseWxHTest,
3313     ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_avx2, 8),
3314                       MseWxHParams(3, 2, &aom_mse_wxh_16bit_avx2, 8),
3315                       MseWxHParams(2, 3, &aom_mse_wxh_16bit_avx2, 8),
3316                       MseWxHParams(2, 2, &aom_mse_wxh_16bit_avx2, 8)));
3317 
3318 INSTANTIATE_TEST_SUITE_P(
3319     AVX2, Mse16xHTest,
3320     ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_avx2, 8),
3321                       Mse16xHParams(3, 2, &aom_mse_16xh_16bit_avx2, 8),
3322                       Mse16xHParams(2, 3, &aom_mse_16xh_16bit_avx2, 8),
3323                       Mse16xHParams(2, 2, &aom_mse_16xh_16bit_avx2, 8)));
3324 
3325 INSTANTIATE_TEST_SUITE_P(AVX2, AvxMseTest,
3326                          ::testing::Values(MseParams(4, 4,
3327                                                      &aom_mse16x16_avx2)));
3328 
3329 const VarianceParams kArrayVariance_avx2[] = {
3330   VarianceParams(7, 7, &aom_variance128x128_avx2),
3331   VarianceParams(7, 6, &aom_variance128x64_avx2),
3332   VarianceParams(6, 7, &aom_variance64x128_avx2),
3333   VarianceParams(6, 6, &aom_variance64x64_avx2),
3334   VarianceParams(6, 5, &aom_variance64x32_avx2),
3335   VarianceParams(5, 6, &aom_variance32x64_avx2),
3336   VarianceParams(5, 5, &aom_variance32x32_avx2),
3337   VarianceParams(5, 4, &aom_variance32x16_avx2),
3338   VarianceParams(4, 5, &aom_variance16x32_avx2),
3339   VarianceParams(4, 4, &aom_variance16x16_avx2),
3340   VarianceParams(4, 3, &aom_variance16x8_avx2),
3341 #if !CONFIG_REALTIME_ONLY
3342   VarianceParams(6, 4, &aom_variance64x16_avx2),
3343   VarianceParams(4, 6, &aom_variance16x64_avx2),
3344   VarianceParams(5, 3, &aom_variance32x8_avx2),
3345   VarianceParams(4, 2, &aom_variance16x4_avx2),
3346 #endif
3347 };
3348 INSTANTIATE_TEST_SUITE_P(AVX2, AvxVarianceTest,
3349                          ::testing::ValuesIn(kArrayVariance_avx2));
3350 
3351 const GetSseSumParams kArrayGetSseSum8x8Quad_avx2[] = {
3352   GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_avx2, 0),
3353   GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_avx2, 0),
3354   GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_avx2, 0),
3355   GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_avx2, 0)
3356 };
3357 INSTANTIATE_TEST_SUITE_P(AVX2, GetSseSum8x8QuadTest,
3358                          ::testing::ValuesIn(kArrayGetSseSum8x8Quad_avx2));
3359 
3360 const GetSseSumParamsDual kArrayGetSseSum16x16Dual_avx2[] = {
3361   GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_avx2, 0),
3362   GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_avx2, 0),
3363   GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_avx2, 0),
3364   GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_avx2, 0)
3365 };
3366 INSTANTIATE_TEST_SUITE_P(AVX2, GetSseSum16x16DualTest,
3367                          ::testing::ValuesIn(kArrayGetSseSum16x16Dual_avx2));
3368 
3369 const SubpelVarianceParams kArraySubpelVariance_avx2[] = {
3370   SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_avx2, 0),
3371   SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_avx2, 0),
3372   SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_avx2, 0),
3373   SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_avx2, 0),
3374   SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_avx2, 0),
3375   SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_avx2, 0),
3376   SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_avx2, 0),
3377   SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_avx2, 0),
3378 
3379   SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_avx2, 0),
3380   SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_avx2, 0),
3381   SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_avx2, 0),
3382 #if !CONFIG_REALTIME_ONLY
3383   SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_avx2, 0),
3384   SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_avx2, 0),
3385 #endif
3386 };
3387 INSTANTIATE_TEST_SUITE_P(AVX2, AvxSubpelVarianceTest,
3388                          ::testing::ValuesIn(kArraySubpelVariance_avx2));
3389 
3390 INSTANTIATE_TEST_SUITE_P(
3391     AVX2, AvxSubpelAvgVarianceTest,
3392     ::testing::Values(
3393         SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_avx2,
3394                                 0),
3395         SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_avx2,
3396                                 0),
3397         SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_avx2,
3398                                 0),
3399         SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_avx2, 0),
3400         SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_avx2, 0),
3401         SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_avx2, 0),
3402         SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_avx2, 0),
3403         SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_avx2,
3404                                 0)));
3405 #endif  // HAVE_AVX2
3406 
3407 #if HAVE_NEON
3408 INSTANTIATE_TEST_SUITE_P(
3409     NEON, MseWxHTest,
3410     ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_neon, 8),
3411                       MseWxHParams(3, 2, &aom_mse_wxh_16bit_neon, 8),
3412                       MseWxHParams(2, 3, &aom_mse_wxh_16bit_neon, 8),
3413                       MseWxHParams(2, 2, &aom_mse_wxh_16bit_neon, 8)));
3414 
3415 INSTANTIATE_TEST_SUITE_P(
3416     NEON, Mse16xHTest,
3417     ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_neon, 8),
3418                       Mse16xHParams(3, 2, &aom_mse_16xh_16bit_neon, 8),
3419                       Mse16xHParams(2, 3, &aom_mse_16xh_16bit_neon, 8),
3420                       Mse16xHParams(2, 2, &aom_mse_16xh_16bit_neon, 8)));
3421 
3422 #if !CONFIG_REALTIME_ONLY
3423 INSTANTIATE_TEST_SUITE_P(NEON, SumOfSquaresTest,
3424                          ::testing::Values(aom_get_mb_ss_neon));
3425 #endif  // !CONFIG_REALTIME_ONLY
3426 
3427 INSTANTIATE_TEST_SUITE_P(NEON, AvxMseTest,
3428                          ::testing::Values(MseParams(3, 3, &aom_mse8x8_neon),
3429                                            MseParams(3, 4, &aom_mse8x16_neon),
3430                                            MseParams(4, 4, &aom_mse16x16_neon),
3431                                            MseParams(4, 3, &aom_mse16x8_neon)));
3432 
3433 const VarianceParams kArrayVariance_neon[] = {
3434   VarianceParams(7, 7, &aom_variance128x128_neon),
3435   VarianceParams(6, 6, &aom_variance64x64_neon),
3436   VarianceParams(7, 6, &aom_variance128x64_neon),
3437   VarianceParams(6, 7, &aom_variance64x128_neon),
3438   VarianceParams(6, 6, &aom_variance64x64_neon),
3439   VarianceParams(6, 5, &aom_variance64x32_neon),
3440   VarianceParams(5, 6, &aom_variance32x64_neon),
3441   VarianceParams(5, 5, &aom_variance32x32_neon),
3442   VarianceParams(5, 4, &aom_variance32x16_neon),
3443   VarianceParams(4, 5, &aom_variance16x32_neon),
3444   VarianceParams(4, 4, &aom_variance16x16_neon),
3445   VarianceParams(4, 3, &aom_variance16x8_neon),
3446   VarianceParams(3, 4, &aom_variance8x16_neon),
3447   VarianceParams(3, 3, &aom_variance8x8_neon),
3448   VarianceParams(3, 2, &aom_variance8x4_neon),
3449   VarianceParams(2, 3, &aom_variance4x8_neon),
3450   VarianceParams(2, 2, &aom_variance4x4_neon),
3451 #if !CONFIG_REALTIME_ONLY
3452   VarianceParams(2, 4, &aom_variance4x16_neon),
3453   VarianceParams(4, 2, &aom_variance16x4_neon),
3454   VarianceParams(3, 5, &aom_variance8x32_neon),
3455   VarianceParams(5, 3, &aom_variance32x8_neon),
3456   VarianceParams(4, 6, &aom_variance16x64_neon),
3457   VarianceParams(6, 4, &aom_variance64x16_neon),
3458 #endif
3459 };
3460 
3461 INSTANTIATE_TEST_SUITE_P(NEON, AvxVarianceTest,
3462                          ::testing::ValuesIn(kArrayVariance_neon));
3463 
3464 const SubpelVarianceParams kArraySubpelVariance_neon[] = {
3465   SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_neon, 0),
3466   SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_neon, 0),
3467   SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_neon, 0),
3468   SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_neon, 0),
3469   SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_neon, 0),
3470   SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_neon, 0),
3471   SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_neon, 0),
3472   SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_neon, 0),
3473   SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_neon, 0),
3474   SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_neon, 0),
3475   SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_neon, 0),
3476   SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_neon, 0),
3477   SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_neon, 0),
3478   SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_neon, 0),
3479   SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_neon, 0),
3480   SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_neon, 0),
3481 #if !CONFIG_REALTIME_ONLY
3482   SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_neon, 0),
3483   SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_neon, 0),
3484   SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_neon, 0),
3485   SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_neon, 0),
3486   SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_neon, 0),
3487   SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_neon, 0),
3488 #endif
3489 };
3490 INSTANTIATE_TEST_SUITE_P(NEON, AvxSubpelVarianceTest,
3491                          ::testing::ValuesIn(kArraySubpelVariance_neon));
3492 
3493 const SubpelAvgVarianceParams kArraySubpelAvgVariance_neon[] = {
3494   SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_neon, 0),
3495   SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_neon, 0),
3496   SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_neon, 0),
3497   SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_neon, 0),
3498   SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_neon, 0),
3499   SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_neon, 0),
3500   SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_neon, 0),
3501   SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_neon, 0),
3502   SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_neon, 0),
3503   SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_neon, 0),
3504   SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_neon, 0),
3505   SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_neon, 0),
3506   SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_neon, 0),
3507   SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_neon, 0),
3508   SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_neon, 0),
3509   SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_neon, 0),
3510 #if !CONFIG_REALTIME_ONLY
3511   SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_neon, 0),
3512   SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_neon, 0),
3513   SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_neon, 0),
3514   SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_neon, 0),
3515   SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_neon, 0),
3516   SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_neon, 0),
3517 #endif
3518 };
3519 INSTANTIATE_TEST_SUITE_P(NEON, AvxSubpelAvgVarianceTest,
3520                          ::testing::ValuesIn(kArraySubpelAvgVariance_neon));
3521 
3522 const DistWtdSubpelAvgVarianceParams kArrayDistWtdSubpelAvgVariance_neon[] = {
3523   DistWtdSubpelAvgVarianceParams(
3524       6, 6, &aom_dist_wtd_sub_pixel_avg_variance64x64_neon, 0),
3525   DistWtdSubpelAvgVarianceParams(
3526       6, 5, &aom_dist_wtd_sub_pixel_avg_variance64x32_neon, 0),
3527   DistWtdSubpelAvgVarianceParams(
3528       5, 6, &aom_dist_wtd_sub_pixel_avg_variance32x64_neon, 0),
3529   DistWtdSubpelAvgVarianceParams(
3530       5, 5, &aom_dist_wtd_sub_pixel_avg_variance32x32_neon, 0),
3531   DistWtdSubpelAvgVarianceParams(
3532       5, 4, &aom_dist_wtd_sub_pixel_avg_variance32x16_neon, 0),
3533   DistWtdSubpelAvgVarianceParams(
3534       4, 5, &aom_dist_wtd_sub_pixel_avg_variance16x32_neon, 0),
3535   DistWtdSubpelAvgVarianceParams(
3536       4, 4, &aom_dist_wtd_sub_pixel_avg_variance16x16_neon, 0),
3537   DistWtdSubpelAvgVarianceParams(
3538       4, 3, &aom_dist_wtd_sub_pixel_avg_variance16x8_neon, 0),
3539   DistWtdSubpelAvgVarianceParams(
3540       3, 4, &aom_dist_wtd_sub_pixel_avg_variance8x16_neon, 0),
3541   DistWtdSubpelAvgVarianceParams(
3542       3, 3, &aom_dist_wtd_sub_pixel_avg_variance8x8_neon, 0),
3543   DistWtdSubpelAvgVarianceParams(
3544       3, 2, &aom_dist_wtd_sub_pixel_avg_variance8x4_neon, 0),
3545   DistWtdSubpelAvgVarianceParams(
3546       2, 3, &aom_dist_wtd_sub_pixel_avg_variance4x8_neon, 0),
3547   DistWtdSubpelAvgVarianceParams(
3548       2, 2, &aom_dist_wtd_sub_pixel_avg_variance4x4_neon, 0),
3549 #if !CONFIG_REALTIME_ONLY
3550   DistWtdSubpelAvgVarianceParams(
3551       6, 4, &aom_dist_wtd_sub_pixel_avg_variance64x16_neon, 0),
3552   DistWtdSubpelAvgVarianceParams(
3553       4, 6, &aom_dist_wtd_sub_pixel_avg_variance16x64_neon, 0),
3554   DistWtdSubpelAvgVarianceParams(
3555       5, 3, &aom_dist_wtd_sub_pixel_avg_variance32x8_neon, 0),
3556   DistWtdSubpelAvgVarianceParams(
3557       3, 5, &aom_dist_wtd_sub_pixel_avg_variance8x32_neon, 0),
3558   DistWtdSubpelAvgVarianceParams(
3559       4, 2, &aom_dist_wtd_sub_pixel_avg_variance16x4_neon, 0),
3560   DistWtdSubpelAvgVarianceParams(
3561       2, 4, &aom_dist_wtd_sub_pixel_avg_variance4x16_neon, 0),
3562 #endif  // !CONFIG_REALTIME_ONLY
3563 };
3564 INSTANTIATE_TEST_SUITE_P(
3565     NEON, AvxDistWtdSubpelAvgVarianceTest,
3566     ::testing::ValuesIn(kArrayDistWtdSubpelAvgVariance_neon));
3567 
3568 #if !CONFIG_REALTIME_ONLY
3569 const ObmcSubpelVarianceParams kArrayObmcSubpelVariance_neon[] = {
3570   ObmcSubpelVarianceParams(7, 7, &aom_obmc_sub_pixel_variance128x128_neon, 0),
3571   ObmcSubpelVarianceParams(7, 6, &aom_obmc_sub_pixel_variance128x64_neon, 0),
3572   ObmcSubpelVarianceParams(6, 7, &aom_obmc_sub_pixel_variance64x128_neon, 0),
3573   ObmcSubpelVarianceParams(6, 6, &aom_obmc_sub_pixel_variance64x64_neon, 0),
3574   ObmcSubpelVarianceParams(6, 5, &aom_obmc_sub_pixel_variance64x32_neon, 0),
3575   ObmcSubpelVarianceParams(5, 6, &aom_obmc_sub_pixel_variance32x64_neon, 0),
3576   ObmcSubpelVarianceParams(5, 5, &aom_obmc_sub_pixel_variance32x32_neon, 0),
3577   ObmcSubpelVarianceParams(5, 4, &aom_obmc_sub_pixel_variance32x16_neon, 0),
3578   ObmcSubpelVarianceParams(4, 5, &aom_obmc_sub_pixel_variance16x32_neon, 0),
3579   ObmcSubpelVarianceParams(4, 4, &aom_obmc_sub_pixel_variance16x16_neon, 0),
3580   ObmcSubpelVarianceParams(4, 3, &aom_obmc_sub_pixel_variance16x8_neon, 0),
3581   ObmcSubpelVarianceParams(3, 4, &aom_obmc_sub_pixel_variance8x16_neon, 0),
3582   ObmcSubpelVarianceParams(3, 3, &aom_obmc_sub_pixel_variance8x8_neon, 0),
3583   ObmcSubpelVarianceParams(3, 2, &aom_obmc_sub_pixel_variance8x4_neon, 0),
3584   ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_neon, 0),
3585   ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_neon, 0),
3586   ObmcSubpelVarianceParams(6, 4, &aom_obmc_sub_pixel_variance64x16_neon, 0),
3587   ObmcSubpelVarianceParams(4, 6, &aom_obmc_sub_pixel_variance16x64_neon, 0),
3588   ObmcSubpelVarianceParams(5, 3, &aom_obmc_sub_pixel_variance32x8_neon, 0),
3589   ObmcSubpelVarianceParams(3, 5, &aom_obmc_sub_pixel_variance8x32_neon, 0),
3590   ObmcSubpelVarianceParams(4, 2, &aom_obmc_sub_pixel_variance16x4_neon, 0),
3591   ObmcSubpelVarianceParams(2, 4, &aom_obmc_sub_pixel_variance4x16_neon, 0),
3592 };
3593 INSTANTIATE_TEST_SUITE_P(NEON, AvxObmcSubpelVarianceTest,
3594                          ::testing::ValuesIn(kArrayObmcSubpelVariance_neon));
3595 #endif
3596 
3597 const GetSseSumParams kArrayGetSseSum8x8Quad_neon[] = {
3598   GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_neon, 0),
3599   GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_neon, 0),
3600   GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_neon, 0),
3601   GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_neon, 0)
3602 };
3603 INSTANTIATE_TEST_SUITE_P(NEON, GetSseSum8x8QuadTest,
3604                          ::testing::ValuesIn(kArrayGetSseSum8x8Quad_neon));
3605 
3606 const GetSseSumParamsDual kArrayGetSseSum16x16Dual_neon[] = {
3607   GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_neon, 0),
3608   GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_neon, 0),
3609   GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_neon, 0),
3610   GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_neon, 0)
3611 };
3612 INSTANTIATE_TEST_SUITE_P(NEON, GetSseSum16x16DualTest,
3613                          ::testing::ValuesIn(kArrayGetSseSum16x16Dual_neon));
3614 
3615 #if CONFIG_AV1_HIGHBITDEPTH
3616 const VarianceParams kArrayHBDVariance_neon[] = {
3617   VarianceParams(7, 7, &aom_highbd_12_variance128x128_neon, 12),
3618   VarianceParams(7, 6, &aom_highbd_12_variance128x64_neon, 12),
3619   VarianceParams(6, 7, &aom_highbd_12_variance64x128_neon, 12),
3620   VarianceParams(6, 6, &aom_highbd_12_variance64x64_neon, 12),
3621   VarianceParams(6, 5, &aom_highbd_12_variance64x32_neon, 12),
3622   VarianceParams(5, 6, &aom_highbd_12_variance32x64_neon, 12),
3623   VarianceParams(5, 5, &aom_highbd_12_variance32x32_neon, 12),
3624   VarianceParams(5, 4, &aom_highbd_12_variance32x16_neon, 12),
3625   VarianceParams(4, 5, &aom_highbd_12_variance16x32_neon, 12),
3626   VarianceParams(4, 4, &aom_highbd_12_variance16x16_neon, 12),
3627   VarianceParams(4, 3, &aom_highbd_12_variance16x8_neon, 12),
3628   VarianceParams(3, 4, &aom_highbd_12_variance8x16_neon, 12),
3629   VarianceParams(3, 3, &aom_highbd_12_variance8x8_neon, 12),
3630   VarianceParams(3, 2, &aom_highbd_12_variance8x4_neon, 12),
3631   VarianceParams(2, 3, &aom_highbd_12_variance4x8_neon, 12),
3632   VarianceParams(2, 2, &aom_highbd_12_variance4x4_neon, 12),
3633   VarianceParams(7, 7, &aom_highbd_10_variance128x128_neon, 10),
3634   VarianceParams(7, 6, &aom_highbd_10_variance128x64_neon, 10),
3635   VarianceParams(6, 7, &aom_highbd_10_variance64x128_neon, 10),
3636   VarianceParams(6, 6, &aom_highbd_10_variance64x64_neon, 10),
3637   VarianceParams(6, 5, &aom_highbd_10_variance64x32_neon, 10),
3638   VarianceParams(5, 6, &aom_highbd_10_variance32x64_neon, 10),
3639   VarianceParams(5, 5, &aom_highbd_10_variance32x32_neon, 10),
3640   VarianceParams(5, 4, &aom_highbd_10_variance32x16_neon, 10),
3641   VarianceParams(4, 5, &aom_highbd_10_variance16x32_neon, 10),
3642   VarianceParams(4, 4, &aom_highbd_10_variance16x16_neon, 10),
3643   VarianceParams(4, 3, &aom_highbd_10_variance16x8_neon, 10),
3644   VarianceParams(3, 4, &aom_highbd_10_variance8x16_neon, 10),
3645   VarianceParams(3, 3, &aom_highbd_10_variance8x8_neon, 10),
3646   VarianceParams(3, 2, &aom_highbd_10_variance8x4_neon, 10),
3647   VarianceParams(2, 3, &aom_highbd_10_variance4x8_neon, 10),
3648   VarianceParams(2, 2, &aom_highbd_10_variance4x4_neon, 10),
3649   VarianceParams(7, 7, &aom_highbd_8_variance128x128_neon, 8),
3650   VarianceParams(7, 6, &aom_highbd_8_variance128x64_neon, 8),
3651   VarianceParams(6, 7, &aom_highbd_8_variance64x128_neon, 8),
3652   VarianceParams(6, 6, &aom_highbd_8_variance64x64_neon, 8),
3653   VarianceParams(6, 5, &aom_highbd_8_variance64x32_neon, 8),
3654   VarianceParams(5, 6, &aom_highbd_8_variance32x64_neon, 8),
3655   VarianceParams(5, 5, &aom_highbd_8_variance32x32_neon, 8),
3656   VarianceParams(5, 4, &aom_highbd_8_variance32x16_neon, 8),
3657   VarianceParams(4, 5, &aom_highbd_8_variance16x32_neon, 8),
3658   VarianceParams(4, 4, &aom_highbd_8_variance16x16_neon, 8),
3659   VarianceParams(4, 3, &aom_highbd_8_variance16x8_neon, 8),
3660   VarianceParams(3, 4, &aom_highbd_8_variance8x16_neon, 8),
3661   VarianceParams(3, 3, &aom_highbd_8_variance8x8_neon, 8),
3662   VarianceParams(3, 2, &aom_highbd_8_variance8x4_neon, 8),
3663   VarianceParams(2, 3, &aom_highbd_8_variance4x8_neon, 8),
3664   VarianceParams(2, 2, &aom_highbd_8_variance4x4_neon, 8),
3665 #if !CONFIG_REALTIME_ONLY
3666   VarianceParams(6, 4, &aom_highbd_12_variance64x16_neon, 12),
3667   VarianceParams(4, 6, &aom_highbd_12_variance16x64_neon, 12),
3668   VarianceParams(5, 3, &aom_highbd_12_variance32x8_neon, 12),
3669   VarianceParams(3, 5, &aom_highbd_12_variance8x32_neon, 12),
3670   VarianceParams(4, 2, &aom_highbd_12_variance16x4_neon, 12),
3671   VarianceParams(2, 4, &aom_highbd_12_variance4x16_neon, 12),
3672   VarianceParams(6, 4, &aom_highbd_10_variance64x16_neon, 10),
3673   VarianceParams(4, 6, &aom_highbd_10_variance16x64_neon, 10),
3674   VarianceParams(5, 3, &aom_highbd_10_variance32x8_neon, 10),
3675   VarianceParams(3, 5, &aom_highbd_10_variance8x32_neon, 10),
3676   VarianceParams(4, 2, &aom_highbd_10_variance16x4_neon, 10),
3677   VarianceParams(2, 4, &aom_highbd_10_variance4x16_neon, 10),
3678   VarianceParams(6, 4, &aom_highbd_8_variance64x16_neon, 8),
3679   VarianceParams(4, 6, &aom_highbd_8_variance16x64_neon, 8),
3680   VarianceParams(5, 3, &aom_highbd_8_variance32x8_neon, 8),
3681   VarianceParams(3, 5, &aom_highbd_8_variance8x32_neon, 8),
3682   VarianceParams(4, 2, &aom_highbd_8_variance16x4_neon, 8),
3683   VarianceParams(2, 4, &aom_highbd_8_variance4x16_neon, 8),
3684 #endif
3685 };
3686 
3687 INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDVarianceTest,
3688                          ::testing::ValuesIn(kArrayHBDVariance_neon));
3689 
3690 const SubpelVarianceParams kArrayHBDSubpelVariance_neon[] = {
3691   SubpelVarianceParams(6, 6, &aom_highbd_12_sub_pixel_variance64x64_neon, 12),
3692   SubpelVarianceParams(6, 5, &aom_highbd_12_sub_pixel_variance64x32_neon, 12),
3693   SubpelVarianceParams(5, 6, &aom_highbd_12_sub_pixel_variance32x64_neon, 12),
3694   SubpelVarianceParams(5, 5, &aom_highbd_12_sub_pixel_variance32x32_neon, 12),
3695   SubpelVarianceParams(5, 4, &aom_highbd_12_sub_pixel_variance32x16_neon, 12),
3696   SubpelVarianceParams(4, 5, &aom_highbd_12_sub_pixel_variance16x32_neon, 12),
3697   SubpelVarianceParams(4, 4, &aom_highbd_12_sub_pixel_variance16x16_neon, 12),
3698   SubpelVarianceParams(4, 3, &aom_highbd_12_sub_pixel_variance16x8_neon, 12),
3699   SubpelVarianceParams(3, 4, &aom_highbd_12_sub_pixel_variance8x16_neon, 12),
3700   SubpelVarianceParams(3, 3, &aom_highbd_12_sub_pixel_variance8x8_neon, 12),
3701   SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_neon, 12),
3702   SubpelVarianceParams(2, 3, &aom_highbd_12_sub_pixel_variance4x8_neon, 12),
3703   SubpelVarianceParams(2, 2, &aom_highbd_12_sub_pixel_variance4x4_neon, 12),
3704   SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_neon, 10),
3705   SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_neon, 10),
3706   SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_neon, 10),
3707   SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_neon, 10),
3708   SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_neon, 10),
3709   SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_neon, 10),
3710   SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_neon, 10),
3711   SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_neon, 10),
3712   SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_neon, 10),
3713   SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_neon, 10),
3714   SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_neon, 10),
3715   SubpelVarianceParams(2, 3, &aom_highbd_10_sub_pixel_variance4x8_neon, 10),
3716   SubpelVarianceParams(2, 2, &aom_highbd_10_sub_pixel_variance4x4_neon, 10),
3717   SubpelVarianceParams(6, 6, &aom_highbd_8_sub_pixel_variance64x64_neon, 8),
3718   SubpelVarianceParams(6, 5, &aom_highbd_8_sub_pixel_variance64x32_neon, 8),
3719   SubpelVarianceParams(5, 6, &aom_highbd_8_sub_pixel_variance32x64_neon, 8),
3720   SubpelVarianceParams(5, 5, &aom_highbd_8_sub_pixel_variance32x32_neon, 8),
3721   SubpelVarianceParams(5, 4, &aom_highbd_8_sub_pixel_variance32x16_neon, 8),
3722   SubpelVarianceParams(4, 5, &aom_highbd_8_sub_pixel_variance16x32_neon, 8),
3723   SubpelVarianceParams(4, 4, &aom_highbd_8_sub_pixel_variance16x16_neon, 8),
3724   SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_neon, 8),
3725   SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_neon, 8),
3726   SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_neon, 8),
3727   SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_neon, 8),
3728   SubpelVarianceParams(2, 3, &aom_highbd_8_sub_pixel_variance4x8_neon, 8),
3729   SubpelVarianceParams(2, 2, &aom_highbd_8_sub_pixel_variance4x4_neon, 8),
3730 #if !CONFIG_REALTIME_ONLY
3731   SubpelVarianceParams(6, 4, &aom_highbd_8_sub_pixel_variance64x16_neon, 8),
3732   SubpelVarianceParams(4, 6, &aom_highbd_8_sub_pixel_variance16x64_neon, 8),
3733   SubpelVarianceParams(5, 3, &aom_highbd_8_sub_pixel_variance32x8_neon, 8),
3734   SubpelVarianceParams(3, 5, &aom_highbd_8_sub_pixel_variance8x32_neon, 8),
3735   SubpelVarianceParams(4, 2, &aom_highbd_8_sub_pixel_variance16x4_neon, 8),
3736   SubpelVarianceParams(2, 4, &aom_highbd_8_sub_pixel_variance4x16_neon, 8),
3737   SubpelVarianceParams(6, 4, &aom_highbd_10_sub_pixel_variance64x16_neon, 10),
3738   SubpelVarianceParams(4, 6, &aom_highbd_10_sub_pixel_variance16x64_neon, 10),
3739   SubpelVarianceParams(5, 3, &aom_highbd_10_sub_pixel_variance32x8_neon, 10),
3740   SubpelVarianceParams(3, 5, &aom_highbd_10_sub_pixel_variance8x32_neon, 10),
3741   SubpelVarianceParams(4, 2, &aom_highbd_10_sub_pixel_variance16x4_neon, 10),
3742   SubpelVarianceParams(2, 4, &aom_highbd_10_sub_pixel_variance4x16_neon, 10),
3743   SubpelVarianceParams(6, 4, &aom_highbd_12_sub_pixel_variance64x16_neon, 12),
3744   SubpelVarianceParams(4, 6, &aom_highbd_12_sub_pixel_variance16x64_neon, 12),
3745   SubpelVarianceParams(5, 3, &aom_highbd_12_sub_pixel_variance32x8_neon, 12),
3746   SubpelVarianceParams(3, 5, &aom_highbd_12_sub_pixel_variance8x32_neon, 12),
3747   SubpelVarianceParams(4, 2, &aom_highbd_12_sub_pixel_variance16x4_neon, 12),
3748   SubpelVarianceParams(2, 4, &aom_highbd_12_sub_pixel_variance4x16_neon, 12),
3749 #endif  //! CONFIG_REALTIME_ONLY
3750 };
3751 
3752 INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDSubpelVarianceTest,
3753                          ::testing::ValuesIn(kArrayHBDSubpelVariance_neon));
3754 
3755 const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_neon[] = {
3756   SubpelAvgVarianceParams(7, 7,
3757                           &aom_highbd_8_sub_pixel_avg_variance128x128_neon, 8),
3758   SubpelAvgVarianceParams(7, 6, &aom_highbd_8_sub_pixel_avg_variance128x64_neon,
3759                           8),
3760   SubpelAvgVarianceParams(6, 7, &aom_highbd_8_sub_pixel_avg_variance64x128_neon,
3761                           8),
3762   SubpelAvgVarianceParams(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_neon,
3763                           8),
3764   SubpelAvgVarianceParams(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_neon,
3765                           8),
3766   SubpelAvgVarianceParams(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_neon,
3767                           8),
3768   SubpelAvgVarianceParams(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_neon,
3769                           8),
3770   SubpelAvgVarianceParams(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_neon,
3771                           8),
3772   SubpelAvgVarianceParams(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_neon,
3773                           8),
3774   SubpelAvgVarianceParams(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_neon,
3775                           8),
3776   SubpelAvgVarianceParams(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_neon,
3777                           8),
3778   SubpelAvgVarianceParams(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_neon,
3779                           8),
3780   SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_neon,
3781                           8),
3782   SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_neon,
3783                           8),
3784   SubpelAvgVarianceParams(2, 3, &aom_highbd_8_sub_pixel_avg_variance4x8_neon,
3785                           8),
3786   SubpelAvgVarianceParams(2, 2, &aom_highbd_8_sub_pixel_avg_variance4x4_neon,
3787                           8),
3788   SubpelAvgVarianceParams(
3789       7, 7, &aom_highbd_10_sub_pixel_avg_variance128x128_neon, 10),
3790   SubpelAvgVarianceParams(7, 6,
3791                           &aom_highbd_10_sub_pixel_avg_variance128x64_neon, 10),
3792   SubpelAvgVarianceParams(6, 7,
3793                           &aom_highbd_10_sub_pixel_avg_variance64x128_neon, 10),
3794   SubpelAvgVarianceParams(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_neon,
3795                           10),
3796   SubpelAvgVarianceParams(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_neon,
3797                           10),
3798   SubpelAvgVarianceParams(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_neon,
3799                           10),
3800   SubpelAvgVarianceParams(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_neon,
3801                           10),
3802   SubpelAvgVarianceParams(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_neon,
3803                           10),
3804   SubpelAvgVarianceParams(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_neon,
3805                           10),
3806   SubpelAvgVarianceParams(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_neon,
3807                           10),
3808   SubpelAvgVarianceParams(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_neon,
3809                           10),
3810   SubpelAvgVarianceParams(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_neon,
3811                           10),
3812   SubpelAvgVarianceParams(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_neon,
3813                           10),
3814   SubpelAvgVarianceParams(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_neon,
3815                           10),
3816   SubpelAvgVarianceParams(2, 3, &aom_highbd_10_sub_pixel_avg_variance4x8_neon,
3817                           10),
3818   SubpelAvgVarianceParams(2, 2, &aom_highbd_10_sub_pixel_avg_variance4x4_neon,
3819                           10),
3820   SubpelAvgVarianceParams(
3821       7, 7, &aom_highbd_12_sub_pixel_avg_variance128x128_neon, 12),
3822   SubpelAvgVarianceParams(7, 6,
3823                           &aom_highbd_12_sub_pixel_avg_variance128x64_neon, 12),
3824   SubpelAvgVarianceParams(6, 7,
3825                           &aom_highbd_12_sub_pixel_avg_variance64x128_neon, 12),
3826   SubpelAvgVarianceParams(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_neon,
3827                           12),
3828   SubpelAvgVarianceParams(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_neon,
3829                           12),
3830   SubpelAvgVarianceParams(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_neon,
3831                           12),
3832   SubpelAvgVarianceParams(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_neon,
3833                           12),
3834   SubpelAvgVarianceParams(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_neon,
3835                           12),
3836   SubpelAvgVarianceParams(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_neon,
3837                           12),
3838   SubpelAvgVarianceParams(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_neon,
3839                           12),
3840   SubpelAvgVarianceParams(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_neon,
3841                           12),
3842   SubpelAvgVarianceParams(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_neon,
3843                           12),
3844   SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_neon,
3845                           12),
3846   SubpelAvgVarianceParams(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_neon,
3847                           12),
3848   SubpelAvgVarianceParams(2, 3, &aom_highbd_12_sub_pixel_avg_variance4x8_neon,
3849                           12),
3850   SubpelAvgVarianceParams(2, 2, &aom_highbd_12_sub_pixel_avg_variance4x4_neon,
3851                           12),
3852 
3853 #if !CONFIG_REALTIME_ONLY
3854   SubpelAvgVarianceParams(6, 4, &aom_highbd_8_sub_pixel_avg_variance64x16_neon,
3855                           8),
3856   SubpelAvgVarianceParams(4, 6, &aom_highbd_8_sub_pixel_avg_variance16x64_neon,
3857                           8),
3858   SubpelAvgVarianceParams(5, 3, &aom_highbd_8_sub_pixel_avg_variance32x8_neon,
3859                           8),
3860   SubpelAvgVarianceParams(3, 5, &aom_highbd_8_sub_pixel_avg_variance8x32_neon,
3861                           8),
3862   SubpelAvgVarianceParams(4, 2, &aom_highbd_8_sub_pixel_avg_variance16x4_neon,
3863                           8),
3864   SubpelAvgVarianceParams(2, 4, &aom_highbd_8_sub_pixel_avg_variance4x16_neon,
3865                           8),
3866   SubpelAvgVarianceParams(6, 4, &aom_highbd_10_sub_pixel_avg_variance64x16_neon,
3867                           10),
3868   SubpelAvgVarianceParams(4, 6, &aom_highbd_10_sub_pixel_avg_variance16x64_neon,
3869                           10),
3870   SubpelAvgVarianceParams(5, 3, &aom_highbd_10_sub_pixel_avg_variance32x8_neon,
3871                           10),
3872   SubpelAvgVarianceParams(3, 5, &aom_highbd_10_sub_pixel_avg_variance8x32_neon,
3873                           10),
3874   SubpelAvgVarianceParams(4, 2, &aom_highbd_10_sub_pixel_avg_variance16x4_neon,
3875                           10),
3876   SubpelAvgVarianceParams(2, 4, &aom_highbd_10_sub_pixel_avg_variance4x16_neon,
3877                           10),
3878   SubpelAvgVarianceParams(6, 4, &aom_highbd_12_sub_pixel_avg_variance64x16_neon,
3879                           12),
3880   SubpelAvgVarianceParams(4, 6, &aom_highbd_12_sub_pixel_avg_variance16x64_neon,
3881                           12),
3882   SubpelAvgVarianceParams(5, 3, &aom_highbd_12_sub_pixel_avg_variance32x8_neon,
3883                           12),
3884   SubpelAvgVarianceParams(3, 5, &aom_highbd_12_sub_pixel_avg_variance8x32_neon,
3885                           12),
3886   SubpelAvgVarianceParams(4, 2, &aom_highbd_12_sub_pixel_avg_variance16x4_neon,
3887                           12),
3888   SubpelAvgVarianceParams(2, 4, &aom_highbd_12_sub_pixel_avg_variance4x16_neon,
3889                           12),
3890 #endif
3891 };
3892 
3893 INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDSubpelAvgVarianceTest,
3894                          ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_neon));
3895 
3896 const DistWtdSubpelAvgVarianceParams
3897     kArrayHBDDistWtdSubpelAvgVariance_neon[] = {
3898       DistWtdSubpelAvgVarianceParams(
3899           7, 7, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance128x128_neon, 8),
3900       DistWtdSubpelAvgVarianceParams(
3901           7, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance128x64_neon, 8),
3902       DistWtdSubpelAvgVarianceParams(
3903           6, 7, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x128_neon, 8),
3904       DistWtdSubpelAvgVarianceParams(
3905           6, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x64_neon, 8),
3906       DistWtdSubpelAvgVarianceParams(
3907           6, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x32_neon, 8),
3908       DistWtdSubpelAvgVarianceParams(
3909           5, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x64_neon, 8),
3910       DistWtdSubpelAvgVarianceParams(
3911           5, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x32_neon, 8),
3912       DistWtdSubpelAvgVarianceParams(
3913           5, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x16_neon, 8),
3914       DistWtdSubpelAvgVarianceParams(
3915           4, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x32_neon, 8),
3916       DistWtdSubpelAvgVarianceParams(
3917           4, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x16_neon, 8),
3918       DistWtdSubpelAvgVarianceParams(
3919           4, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x8_neon, 8),
3920       DistWtdSubpelAvgVarianceParams(
3921           3, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x16_neon, 8),
3922       DistWtdSubpelAvgVarianceParams(
3923           3, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x8_neon, 8),
3924       DistWtdSubpelAvgVarianceParams(
3925           3, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x4_neon, 8),
3926       DistWtdSubpelAvgVarianceParams(
3927           2, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x8_neon, 8),
3928       DistWtdSubpelAvgVarianceParams(
3929           2, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x4_neon, 8),
3930       DistWtdSubpelAvgVarianceParams(
3931           7, 7, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance128x128_neon, 10),
3932       DistWtdSubpelAvgVarianceParams(
3933           7, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance128x64_neon, 10),
3934       DistWtdSubpelAvgVarianceParams(
3935           6, 7, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x128_neon, 10),
3936       DistWtdSubpelAvgVarianceParams(
3937           6, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x64_neon, 10),
3938       DistWtdSubpelAvgVarianceParams(
3939           6, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x32_neon, 10),
3940       DistWtdSubpelAvgVarianceParams(
3941           5, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x64_neon, 10),
3942       DistWtdSubpelAvgVarianceParams(
3943           5, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x32_neon, 10),
3944       DistWtdSubpelAvgVarianceParams(
3945           5, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x16_neon, 10),
3946       DistWtdSubpelAvgVarianceParams(
3947           4, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x32_neon, 10),
3948       DistWtdSubpelAvgVarianceParams(
3949           4, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x16_neon, 10),
3950       DistWtdSubpelAvgVarianceParams(
3951           4, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x8_neon, 10),
3952       DistWtdSubpelAvgVarianceParams(
3953           3, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x16_neon, 10),
3954       DistWtdSubpelAvgVarianceParams(
3955           3, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x8_neon, 10),
3956       DistWtdSubpelAvgVarianceParams(
3957           3, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x4_neon, 10),
3958       DistWtdSubpelAvgVarianceParams(
3959           2, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x8_neon, 10),
3960       DistWtdSubpelAvgVarianceParams(
3961           2, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x4_neon, 10),
3962       DistWtdSubpelAvgVarianceParams(
3963           7, 7, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance128x128_neon, 12),
3964       DistWtdSubpelAvgVarianceParams(
3965           7, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance128x64_neon, 12),
3966       DistWtdSubpelAvgVarianceParams(
3967           6, 7, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x128_neon, 12),
3968       DistWtdSubpelAvgVarianceParams(
3969           6, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x64_neon, 12),
3970       DistWtdSubpelAvgVarianceParams(
3971           6, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x32_neon, 12),
3972       DistWtdSubpelAvgVarianceParams(
3973           5, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x64_neon, 12),
3974       DistWtdSubpelAvgVarianceParams(
3975           5, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x32_neon, 12),
3976       DistWtdSubpelAvgVarianceParams(
3977           5, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x16_neon, 12),
3978       DistWtdSubpelAvgVarianceParams(
3979           4, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x32_neon, 12),
3980       DistWtdSubpelAvgVarianceParams(
3981           4, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x16_neon, 12),
3982       DistWtdSubpelAvgVarianceParams(
3983           4, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x8_neon, 12),
3984       DistWtdSubpelAvgVarianceParams(
3985           3, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x16_neon, 12),
3986       DistWtdSubpelAvgVarianceParams(
3987           3, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x8_neon, 12),
3988       DistWtdSubpelAvgVarianceParams(
3989           3, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x4_neon, 12),
3990       DistWtdSubpelAvgVarianceParams(
3991           2, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x8_neon, 12),
3992       DistWtdSubpelAvgVarianceParams(
3993           2, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x4_neon, 12),
3994 
3995 #if !CONFIG_REALTIME_ONLY
3996       DistWtdSubpelAvgVarianceParams(
3997           6, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x16_neon, 8),
3998       DistWtdSubpelAvgVarianceParams(
3999           4, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x64_neon, 8),
4000       DistWtdSubpelAvgVarianceParams(
4001           5, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x8_neon, 8),
4002       DistWtdSubpelAvgVarianceParams(
4003           3, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x32_neon, 8),
4004       DistWtdSubpelAvgVarianceParams(
4005           4, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x4_neon, 8),
4006       DistWtdSubpelAvgVarianceParams(
4007           2, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x16_neon, 8),
4008       DistWtdSubpelAvgVarianceParams(
4009           6, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x16_neon, 10),
4010       DistWtdSubpelAvgVarianceParams(
4011           4, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x64_neon, 10),
4012       DistWtdSubpelAvgVarianceParams(
4013           5, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x8_neon, 10),
4014       DistWtdSubpelAvgVarianceParams(
4015           3, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x32_neon, 10),
4016       DistWtdSubpelAvgVarianceParams(
4017           4, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x4_neon, 10),
4018       DistWtdSubpelAvgVarianceParams(
4019           2, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x16_neon, 10),
4020       DistWtdSubpelAvgVarianceParams(
4021           6, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x16_neon, 12),
4022       DistWtdSubpelAvgVarianceParams(
4023           4, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x64_neon, 12),
4024       DistWtdSubpelAvgVarianceParams(
4025           5, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x8_neon, 12),
4026       DistWtdSubpelAvgVarianceParams(
4027           3, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x32_neon, 12),
4028       DistWtdSubpelAvgVarianceParams(
4029           4, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x4_neon, 12),
4030       DistWtdSubpelAvgVarianceParams(
4031           2, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x16_neon, 12),
4032 #endif  // !CONFIG_REALTIME_ONLY
4033     };
4034 INSTANTIATE_TEST_SUITE_P(
4035     NEON, AvxHBDDistWtdSubpelAvgVarianceTest,
4036     ::testing::ValuesIn(kArrayHBDDistWtdSubpelAvgVariance_neon));
4037 
4038 #if !CONFIG_REALTIME_ONLY
4039 const ObmcSubpelVarianceParams kArrayHBDObmcSubpelVariance_neon[] = {
4040   ObmcSubpelVarianceParams(
4041       7, 7, &aom_highbd_12_obmc_sub_pixel_variance128x128_neon, 12),
4042   ObmcSubpelVarianceParams(
4043       7, 6, &aom_highbd_12_obmc_sub_pixel_variance128x64_neon, 12),
4044   ObmcSubpelVarianceParams(
4045       6, 7, &aom_highbd_12_obmc_sub_pixel_variance64x128_neon, 12),
4046   ObmcSubpelVarianceParams(
4047       6, 6, &aom_highbd_12_obmc_sub_pixel_variance64x64_neon, 12),
4048   ObmcSubpelVarianceParams(
4049       6, 5, &aom_highbd_12_obmc_sub_pixel_variance64x32_neon, 12),
4050   ObmcSubpelVarianceParams(
4051       5, 6, &aom_highbd_12_obmc_sub_pixel_variance32x64_neon, 12),
4052   ObmcSubpelVarianceParams(
4053       5, 5, &aom_highbd_12_obmc_sub_pixel_variance32x32_neon, 12),
4054   ObmcSubpelVarianceParams(
4055       5, 4, &aom_highbd_12_obmc_sub_pixel_variance32x16_neon, 12),
4056   ObmcSubpelVarianceParams(
4057       4, 5, &aom_highbd_12_obmc_sub_pixel_variance16x32_neon, 12),
4058   ObmcSubpelVarianceParams(
4059       4, 4, &aom_highbd_12_obmc_sub_pixel_variance16x16_neon, 12),
4060   ObmcSubpelVarianceParams(4, 3,
4061                            &aom_highbd_12_obmc_sub_pixel_variance16x8_neon, 12),
4062   ObmcSubpelVarianceParams(3, 4,
4063                            &aom_highbd_12_obmc_sub_pixel_variance8x16_neon, 12),
4064   ObmcSubpelVarianceParams(3, 3, &aom_highbd_12_obmc_sub_pixel_variance8x8_neon,
4065                            12),
4066   ObmcSubpelVarianceParams(3, 2, &aom_highbd_12_obmc_sub_pixel_variance8x4_neon,
4067                            12),
4068   ObmcSubpelVarianceParams(2, 3, &aom_highbd_12_obmc_sub_pixel_variance4x8_neon,
4069                            12),
4070   ObmcSubpelVarianceParams(2, 2, &aom_highbd_12_obmc_sub_pixel_variance4x4_neon,
4071                            12),
4072   ObmcSubpelVarianceParams(
4073       6, 4, &aom_highbd_12_obmc_sub_pixel_variance64x16_neon, 12),
4074   ObmcSubpelVarianceParams(
4075       4, 6, &aom_highbd_12_obmc_sub_pixel_variance16x64_neon, 12),
4076   ObmcSubpelVarianceParams(5, 3,
4077                            &aom_highbd_12_obmc_sub_pixel_variance32x8_neon, 12),
4078   ObmcSubpelVarianceParams(3, 5,
4079                            &aom_highbd_12_obmc_sub_pixel_variance8x32_neon, 12),
4080   ObmcSubpelVarianceParams(4, 2,
4081                            &aom_highbd_12_obmc_sub_pixel_variance16x4_neon, 12),
4082   ObmcSubpelVarianceParams(2, 4,
4083                            &aom_highbd_12_obmc_sub_pixel_variance4x16_neon, 12),
4084   ObmcSubpelVarianceParams(
4085       7, 7, &aom_highbd_10_obmc_sub_pixel_variance128x128_neon, 10),
4086   ObmcSubpelVarianceParams(
4087       7, 6, &aom_highbd_10_obmc_sub_pixel_variance128x64_neon, 10),
4088   ObmcSubpelVarianceParams(
4089       6, 7, &aom_highbd_10_obmc_sub_pixel_variance64x128_neon, 10),
4090   ObmcSubpelVarianceParams(
4091       6, 6, &aom_highbd_10_obmc_sub_pixel_variance64x64_neon, 10),
4092   ObmcSubpelVarianceParams(
4093       6, 5, &aom_highbd_10_obmc_sub_pixel_variance64x32_neon, 10),
4094   ObmcSubpelVarianceParams(
4095       5, 6, &aom_highbd_10_obmc_sub_pixel_variance32x64_neon, 10),
4096   ObmcSubpelVarianceParams(
4097       5, 5, &aom_highbd_10_obmc_sub_pixel_variance32x32_neon, 10),
4098   ObmcSubpelVarianceParams(
4099       5, 4, &aom_highbd_10_obmc_sub_pixel_variance32x16_neon, 10),
4100   ObmcSubpelVarianceParams(
4101       4, 5, &aom_highbd_10_obmc_sub_pixel_variance16x32_neon, 10),
4102   ObmcSubpelVarianceParams(
4103       4, 4, &aom_highbd_10_obmc_sub_pixel_variance16x16_neon, 10),
4104   ObmcSubpelVarianceParams(4, 3,
4105                            &aom_highbd_10_obmc_sub_pixel_variance16x8_neon, 10),
4106   ObmcSubpelVarianceParams(3, 4,
4107                            &aom_highbd_10_obmc_sub_pixel_variance8x16_neon, 10),
4108   ObmcSubpelVarianceParams(3, 3, &aom_highbd_10_obmc_sub_pixel_variance8x8_neon,
4109                            10),
4110   ObmcSubpelVarianceParams(3, 2, &aom_highbd_10_obmc_sub_pixel_variance8x4_neon,
4111                            10),
4112   ObmcSubpelVarianceParams(2, 3, &aom_highbd_10_obmc_sub_pixel_variance4x8_neon,
4113                            10),
4114   ObmcSubpelVarianceParams(2, 2, &aom_highbd_10_obmc_sub_pixel_variance4x4_neon,
4115                            10),
4116   ObmcSubpelVarianceParams(
4117       6, 4, &aom_highbd_10_obmc_sub_pixel_variance64x16_neon, 10),
4118   ObmcSubpelVarianceParams(
4119       4, 6, &aom_highbd_10_obmc_sub_pixel_variance16x64_neon, 10),
4120   ObmcSubpelVarianceParams(5, 3,
4121                            &aom_highbd_10_obmc_sub_pixel_variance32x8_neon, 10),
4122   ObmcSubpelVarianceParams(3, 5,
4123                            &aom_highbd_10_obmc_sub_pixel_variance8x32_neon, 10),
4124   ObmcSubpelVarianceParams(4, 2,
4125                            &aom_highbd_10_obmc_sub_pixel_variance16x4_neon, 10),
4126   ObmcSubpelVarianceParams(2, 4,
4127                            &aom_highbd_10_obmc_sub_pixel_variance4x16_neon, 10),
4128   ObmcSubpelVarianceParams(
4129       7, 7, &aom_highbd_8_obmc_sub_pixel_variance128x128_neon, 8),
4130   ObmcSubpelVarianceParams(7, 6,
4131                            &aom_highbd_8_obmc_sub_pixel_variance128x64_neon, 8),
4132   ObmcSubpelVarianceParams(6, 7,
4133                            &aom_highbd_8_obmc_sub_pixel_variance64x128_neon, 8),
4134   ObmcSubpelVarianceParams(6, 6,
4135                            &aom_highbd_8_obmc_sub_pixel_variance64x64_neon, 8),
4136   ObmcSubpelVarianceParams(6, 5,
4137                            &aom_highbd_8_obmc_sub_pixel_variance64x32_neon, 8),
4138   ObmcSubpelVarianceParams(5, 6,
4139                            &aom_highbd_8_obmc_sub_pixel_variance32x64_neon, 8),
4140   ObmcSubpelVarianceParams(5, 5,
4141                            &aom_highbd_8_obmc_sub_pixel_variance32x32_neon, 8),
4142   ObmcSubpelVarianceParams(5, 4,
4143                            &aom_highbd_8_obmc_sub_pixel_variance32x16_neon, 8),
4144   ObmcSubpelVarianceParams(4, 5,
4145                            &aom_highbd_8_obmc_sub_pixel_variance16x32_neon, 8),
4146   ObmcSubpelVarianceParams(4, 4,
4147                            &aom_highbd_8_obmc_sub_pixel_variance16x16_neon, 8),
4148   ObmcSubpelVarianceParams(4, 3, &aom_highbd_8_obmc_sub_pixel_variance16x8_neon,
4149                            8),
4150   ObmcSubpelVarianceParams(3, 4, &aom_highbd_8_obmc_sub_pixel_variance8x16_neon,
4151                            8),
4152   ObmcSubpelVarianceParams(3, 3, &aom_highbd_8_obmc_sub_pixel_variance8x8_neon,
4153                            8),
4154   ObmcSubpelVarianceParams(3, 2, &aom_highbd_8_obmc_sub_pixel_variance8x4_neon,
4155                            8),
4156   ObmcSubpelVarianceParams(2, 3, &aom_highbd_8_obmc_sub_pixel_variance4x8_neon,
4157                            8),
4158   ObmcSubpelVarianceParams(2, 2, &aom_highbd_8_obmc_sub_pixel_variance4x4_neon,
4159                            8),
4160   ObmcSubpelVarianceParams(6, 4,
4161                            &aom_highbd_8_obmc_sub_pixel_variance64x16_neon, 8),
4162   ObmcSubpelVarianceParams(4, 6,
4163                            &aom_highbd_8_obmc_sub_pixel_variance16x64_neon, 8),
4164   ObmcSubpelVarianceParams(5, 3, &aom_highbd_8_obmc_sub_pixel_variance32x8_neon,
4165                            8),
4166   ObmcSubpelVarianceParams(3, 5, &aom_highbd_8_obmc_sub_pixel_variance8x32_neon,
4167                            8),
4168   ObmcSubpelVarianceParams(4, 2, &aom_highbd_8_obmc_sub_pixel_variance16x4_neon,
4169                            8),
4170   ObmcSubpelVarianceParams(2, 4, &aom_highbd_8_obmc_sub_pixel_variance4x16_neon,
4171                            8),
4172 };
4173 
4174 INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDObmcSubpelVarianceTest,
4175                          ::testing::ValuesIn(kArrayHBDObmcSubpelVariance_neon));
4176 #endif  // !CONFIG_REALTIME_ONLY
4177 
4178 #endif  // CONFIG_AV1_HIGHBITDEPTH
4179 
4180 #endif  // HAVE_NEON
4181 
4182 #if HAVE_NEON_DOTPROD
4183 
4184 const VarianceParams kArrayVariance_neon_dotprod[] = {
4185   VarianceParams(7, 7, &aom_variance128x128_neon_dotprod),
4186   VarianceParams(6, 6, &aom_variance64x64_neon_dotprod),
4187   VarianceParams(7, 6, &aom_variance128x64_neon_dotprod),
4188   VarianceParams(6, 7, &aom_variance64x128_neon_dotprod),
4189   VarianceParams(6, 6, &aom_variance64x64_neon_dotprod),
4190   VarianceParams(6, 5, &aom_variance64x32_neon_dotprod),
4191   VarianceParams(5, 6, &aom_variance32x64_neon_dotprod),
4192   VarianceParams(5, 5, &aom_variance32x32_neon_dotprod),
4193   VarianceParams(5, 4, &aom_variance32x16_neon_dotprod),
4194   VarianceParams(4, 5, &aom_variance16x32_neon_dotprod),
4195   VarianceParams(4, 4, &aom_variance16x16_neon_dotprod),
4196   VarianceParams(4, 3, &aom_variance16x8_neon_dotprod),
4197   VarianceParams(3, 4, &aom_variance8x16_neon_dotprod),
4198   VarianceParams(3, 3, &aom_variance8x8_neon_dotprod),
4199   VarianceParams(3, 2, &aom_variance8x4_neon_dotprod),
4200   VarianceParams(2, 3, &aom_variance4x8_neon_dotprod),
4201   VarianceParams(2, 2, &aom_variance4x4_neon_dotprod),
4202 #if !CONFIG_REALTIME_ONLY
4203   VarianceParams(2, 4, &aom_variance4x16_neon_dotprod),
4204   VarianceParams(4, 2, &aom_variance16x4_neon_dotprod),
4205   VarianceParams(3, 5, &aom_variance8x32_neon_dotprod),
4206   VarianceParams(5, 3, &aom_variance32x8_neon_dotprod),
4207   VarianceParams(4, 6, &aom_variance16x64_neon_dotprod),
4208   VarianceParams(6, 4, &aom_variance64x16_neon_dotprod),
4209 #endif
4210 };
4211 
4212 INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, AvxVarianceTest,
4213                          ::testing::ValuesIn(kArrayVariance_neon_dotprod));
4214 
4215 const GetSseSumParams kArrayGetSseSum8x8Quad_neon_dotprod[] = {
4216   GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0),
4217   GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0),
4218   GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0),
4219   GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0)
4220 };
4221 INSTANTIATE_TEST_SUITE_P(
4222     NEON_DOTPROD, GetSseSum8x8QuadTest,
4223     ::testing::ValuesIn(kArrayGetSseSum8x8Quad_neon_dotprod));
4224 
4225 const GetSseSumParamsDual kArrayGetSseSum16x16Dual_neon_dotprod[] = {
4226   GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0),
4227   GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0),
4228   GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0),
4229   GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0)
4230 };
4231 INSTANTIATE_TEST_SUITE_P(
4232     NEON_DOTPROD, GetSseSum16x16DualTest,
4233     ::testing::ValuesIn(kArrayGetSseSum16x16Dual_neon_dotprod));
4234 
4235 INSTANTIATE_TEST_SUITE_P(
4236     NEON_DOTPROD, AvxMseTest,
4237     ::testing::Values(MseParams(3, 3, &aom_mse8x8_neon_dotprod),
4238                       MseParams(3, 4, &aom_mse8x16_neon_dotprod),
4239                       MseParams(4, 4, &aom_mse16x16_neon_dotprod),
4240                       MseParams(4, 3, &aom_mse16x8_neon_dotprod)));
4241 
4242 #endif  // HAVE_NEON_DOTPROD
4243 
4244 #if HAVE_SVE
4245 
4246 #if CONFIG_AV1_HIGHBITDEPTH
4247 const VarianceParams kArrayHBDVariance_sve[] = {
4248   VarianceParams(7, 7, &aom_highbd_12_variance128x128_sve, 12),
4249   VarianceParams(7, 6, &aom_highbd_12_variance128x64_sve, 12),
4250   VarianceParams(6, 7, &aom_highbd_12_variance64x128_sve, 12),
4251   VarianceParams(6, 6, &aom_highbd_12_variance64x64_sve, 12),
4252   VarianceParams(6, 5, &aom_highbd_12_variance64x32_sve, 12),
4253   VarianceParams(5, 6, &aom_highbd_12_variance32x64_sve, 12),
4254   VarianceParams(5, 5, &aom_highbd_12_variance32x32_sve, 12),
4255   VarianceParams(5, 4, &aom_highbd_12_variance32x16_sve, 12),
4256   VarianceParams(4, 5, &aom_highbd_12_variance16x32_sve, 12),
4257   VarianceParams(4, 4, &aom_highbd_12_variance16x16_sve, 12),
4258   VarianceParams(4, 3, &aom_highbd_12_variance16x8_sve, 12),
4259   VarianceParams(3, 4, &aom_highbd_12_variance8x16_sve, 12),
4260   VarianceParams(3, 3, &aom_highbd_12_variance8x8_sve, 12),
4261   VarianceParams(3, 2, &aom_highbd_12_variance8x4_sve, 12),
4262   VarianceParams(2, 3, &aom_highbd_12_variance4x8_sve, 12),
4263   VarianceParams(2, 2, &aom_highbd_12_variance4x4_sve, 12),
4264   VarianceParams(7, 7, &aom_highbd_10_variance128x128_sve, 10),
4265   VarianceParams(7, 6, &aom_highbd_10_variance128x64_sve, 10),
4266   VarianceParams(6, 7, &aom_highbd_10_variance64x128_sve, 10),
4267   VarianceParams(6, 6, &aom_highbd_10_variance64x64_sve, 10),
4268   VarianceParams(6, 5, &aom_highbd_10_variance64x32_sve, 10),
4269   VarianceParams(5, 6, &aom_highbd_10_variance32x64_sve, 10),
4270   VarianceParams(5, 5, &aom_highbd_10_variance32x32_sve, 10),
4271   VarianceParams(5, 4, &aom_highbd_10_variance32x16_sve, 10),
4272   VarianceParams(4, 5, &aom_highbd_10_variance16x32_sve, 10),
4273   VarianceParams(4, 4, &aom_highbd_10_variance16x16_sve, 10),
4274   VarianceParams(4, 3, &aom_highbd_10_variance16x8_sve, 10),
4275   VarianceParams(3, 4, &aom_highbd_10_variance8x16_sve, 10),
4276   VarianceParams(3, 3, &aom_highbd_10_variance8x8_sve, 10),
4277   VarianceParams(3, 2, &aom_highbd_10_variance8x4_sve, 10),
4278   VarianceParams(2, 3, &aom_highbd_10_variance4x8_sve, 10),
4279   VarianceParams(2, 2, &aom_highbd_10_variance4x4_sve, 10),
4280   VarianceParams(7, 7, &aom_highbd_8_variance128x128_sve, 8),
4281   VarianceParams(7, 6, &aom_highbd_8_variance128x64_sve, 8),
4282   VarianceParams(6, 7, &aom_highbd_8_variance64x128_sve, 8),
4283   VarianceParams(6, 6, &aom_highbd_8_variance64x64_sve, 8),
4284   VarianceParams(6, 5, &aom_highbd_8_variance64x32_sve, 8),
4285   VarianceParams(5, 6, &aom_highbd_8_variance32x64_sve, 8),
4286   VarianceParams(5, 5, &aom_highbd_8_variance32x32_sve, 8),
4287   VarianceParams(5, 4, &aom_highbd_8_variance32x16_sve, 8),
4288   VarianceParams(4, 5, &aom_highbd_8_variance16x32_sve, 8),
4289   VarianceParams(4, 4, &aom_highbd_8_variance16x16_sve, 8),
4290   VarianceParams(4, 3, &aom_highbd_8_variance16x8_sve, 8),
4291   VarianceParams(3, 4, &aom_highbd_8_variance8x16_sve, 8),
4292   VarianceParams(3, 3, &aom_highbd_8_variance8x8_sve, 8),
4293   VarianceParams(3, 2, &aom_highbd_8_variance8x4_sve, 8),
4294   VarianceParams(2, 3, &aom_highbd_8_variance4x8_sve, 8),
4295   VarianceParams(2, 2, &aom_highbd_8_variance4x4_sve, 8),
4296 #if !CONFIG_REALTIME_ONLY
4297   VarianceParams(6, 4, &aom_highbd_12_variance64x16_sve, 12),
4298   VarianceParams(4, 6, &aom_highbd_12_variance16x64_sve, 12),
4299   VarianceParams(5, 3, &aom_highbd_12_variance32x8_sve, 12),
4300   VarianceParams(3, 5, &aom_highbd_12_variance8x32_sve, 12),
4301   VarianceParams(4, 2, &aom_highbd_12_variance16x4_sve, 12),
4302   VarianceParams(2, 4, &aom_highbd_12_variance4x16_sve, 12),
4303   VarianceParams(6, 4, &aom_highbd_10_variance64x16_sve, 10),
4304   VarianceParams(4, 6, &aom_highbd_10_variance16x64_sve, 10),
4305   VarianceParams(5, 3, &aom_highbd_10_variance32x8_sve, 10),
4306   VarianceParams(3, 5, &aom_highbd_10_variance8x32_sve, 10),
4307   VarianceParams(4, 2, &aom_highbd_10_variance16x4_sve, 10),
4308   VarianceParams(2, 4, &aom_highbd_10_variance4x16_sve, 10),
4309   VarianceParams(6, 4, &aom_highbd_8_variance64x16_sve, 8),
4310   VarianceParams(4, 6, &aom_highbd_8_variance16x64_sve, 8),
4311   VarianceParams(5, 3, &aom_highbd_8_variance32x8_sve, 8),
4312   VarianceParams(3, 5, &aom_highbd_8_variance8x32_sve, 8),
4313   VarianceParams(4, 2, &aom_highbd_8_variance16x4_sve, 8),
4314   VarianceParams(2, 4, &aom_highbd_8_variance4x16_sve, 8),
4315 #endif
4316 };
4317 
4318 INSTANTIATE_TEST_SUITE_P(SVE, AvxHBDVarianceTest,
4319                          ::testing::ValuesIn(kArrayHBDVariance_sve));
4320 
4321 #endif  // CONFIG_AV1_HIGHBITDEPTH
4322 #endif  // HAVE_SVE
4323 
4324 }  // namespace
4325