xref: /aosp_15_r20/external/libaom/test/cfl_test.cc (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1 /*
2  * Copyright (c) 2017, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <tuple>
13 
14 #include "gtest/gtest.h"
15 
16 #include "config/av1_rtcd.h"
17 
18 #include "aom_ports/aom_timer.h"
19 #include "test/util.h"
20 #include "test/acm_random.h"
21 
22 using std::make_tuple;
23 
24 using libaom_test::ACMRandom;
25 
26 #define NUM_ITERATIONS (100)
27 #define NUM_ITERATIONS_SPEED (INT16_MAX)
28 
29 #define ALL_CFL_TX_SIZES(function)                           \
30   make_tuple(static_cast<TX_SIZE>(TX_4X4), &function),       \
31       make_tuple(static_cast<TX_SIZE>(TX_4X8), &function),   \
32       make_tuple(static_cast<TX_SIZE>(TX_4X16), &function),  \
33       make_tuple(static_cast<TX_SIZE>(TX_8X4), &function),   \
34       make_tuple(static_cast<TX_SIZE>(TX_8X8), &function),   \
35       make_tuple(static_cast<TX_SIZE>(TX_8X16), &function),  \
36       make_tuple(static_cast<TX_SIZE>(TX_8X32), &function),  \
37       make_tuple(static_cast<TX_SIZE>(TX_16X4), &function),  \
38       make_tuple(static_cast<TX_SIZE>(TX_16X8), &function),  \
39       make_tuple(static_cast<TX_SIZE>(TX_16X16), &function), \
40       make_tuple(static_cast<TX_SIZE>(TX_16X32), &function), \
41       make_tuple(static_cast<TX_SIZE>(TX_32X8), &function),  \
42       make_tuple(static_cast<TX_SIZE>(TX_32X16), &function), \
43       make_tuple(static_cast<TX_SIZE>(TX_32X32), &function)
44 
45 #define ALL_CFL_TX_SIZES_SUBSAMPLE(fun420, fun422, fun444)                   \
46   make_tuple(static_cast<TX_SIZE>(TX_4X4), &fun420, &fun422, &fun444),       \
47       make_tuple(static_cast<TX_SIZE>(TX_4X8), &fun420, &fun422, &fun444),   \
48       make_tuple(static_cast<TX_SIZE>(TX_4X16), &fun420, &fun422, &fun444),  \
49       make_tuple(static_cast<TX_SIZE>(TX_8X4), &fun420, &fun422, &fun444),   \
50       make_tuple(static_cast<TX_SIZE>(TX_8X8), &fun420, &fun422, &fun444),   \
51       make_tuple(static_cast<TX_SIZE>(TX_8X16), &fun420, &fun422, &fun444),  \
52       make_tuple(static_cast<TX_SIZE>(TX_8X32), &fun420, &fun422, &fun444),  \
53       make_tuple(static_cast<TX_SIZE>(TX_16X4), &fun420, &fun422, &fun444),  \
54       make_tuple(static_cast<TX_SIZE>(TX_16X8), &fun420, &fun422, &fun444),  \
55       make_tuple(static_cast<TX_SIZE>(TX_16X16), &fun420, &fun422, &fun444), \
56       make_tuple(static_cast<TX_SIZE>(TX_16X32), &fun420, &fun422, &fun444), \
57       make_tuple(static_cast<TX_SIZE>(TX_32X8), &fun420, &fun422, &fun444),  \
58       make_tuple(static_cast<TX_SIZE>(TX_32X16), &fun420, &fun422, &fun444), \
59       make_tuple(static_cast<TX_SIZE>(TX_32X32), &fun420, &fun422, &fun444)
60 
61 namespace {
62 
63 template <typename A>
assert_eq(const A * a,const A * b,int width,int height)64 static void assert_eq(const A *a, const A *b, int width, int height) {
65   for (int j = 0; j < height; j++) {
66     for (int i = 0; i < width; i++) {
67       ASSERT_EQ(a[j * CFL_BUF_LINE + i], b[j * CFL_BUF_LINE + i]);
68     }
69   }
70 }
71 
assertFaster(int ref_elapsed_time,int elapsed_time)72 static void assertFaster(int ref_elapsed_time, int elapsed_time) {
73   EXPECT_GT(ref_elapsed_time, elapsed_time)
74       << "Error: CFLSubtractSpeedTest, SIMD slower than C." << std::endl
75       << "C time: " << ref_elapsed_time << " us" << std::endl
76       << "SIMD time: " << elapsed_time << " us" << std::endl;
77 }
78 
printSpeed(int ref_elapsed_time,int elapsed_time,int width,int height)79 static void printSpeed(int ref_elapsed_time, int elapsed_time, int width,
80                        int height) {
81   std::cout.precision(2);
82   std::cout << "[          ] " << width << "x" << height
83             << ": C time = " << ref_elapsed_time
84             << " us, SIMD time = " << elapsed_time << " us"
85             << " (~" << ref_elapsed_time / (double)elapsed_time << "x) "
86             << std::endl;
87 }
88 
89 class CFLTest {
90  public:
91   virtual ~CFLTest() = default;
init(TX_SIZE tx)92   void init(TX_SIZE tx) {
93     tx_size = tx;
94     width = tx_size_wide[tx_size];
95     height = tx_size_high[tx_size];
96     rnd.Reset(ACMRandom::DeterministicSeed());
97   }
98 
99  protected:
100   TX_SIZE tx_size;
101   int width;
102   int height;
103   ACMRandom rnd;
104 };
105 
106 template <typename I>
107 class CFLTestWithData : public CFLTest {
108  public:
109   ~CFLTestWithData() override = default;
110 
111  protected:
112   I data[CFL_BUF_SQUARE];
113   I data_ref[CFL_BUF_SQUARE];
randData(I (ACMRandom::* random)())114   void randData(I (ACMRandom::*random)()) {
115     for (int j = 0; j < this->height; j++) {
116       for (int i = 0; i < this->width; i++) {
117         const I d = (this->rnd.*random)();
118         data[j * CFL_BUF_LINE + i] = d;
119         data_ref[j * CFL_BUF_LINE + i] = d;
120       }
121     }
122   }
123 };
124 
125 template <typename I>
126 class CFLTestWithAlignedData : public CFLTest {
127  public:
~CFLTestWithAlignedData()128   ~CFLTestWithAlignedData() override {
129     aom_free(chroma_pels_ref);
130     aom_free(sub_luma_pels_ref);
131     aom_free(chroma_pels);
132     aom_free(sub_luma_pels);
133   }
134 
135  protected:
init()136   void init() {
137     chroma_pels_ref =
138         reinterpret_cast<I *>(aom_memalign(32, sizeof(I) * CFL_BUF_SQUARE));
139     ASSERT_NE(chroma_pels_ref, nullptr);
140     chroma_pels =
141         reinterpret_cast<I *>(aom_memalign(32, sizeof(I) * CFL_BUF_SQUARE));
142     ASSERT_NE(chroma_pels, nullptr);
143     sub_luma_pels_ref = reinterpret_cast<int16_t *>(
144         aom_memalign(32, sizeof(int16_t) * CFL_BUF_SQUARE));
145     ASSERT_NE(sub_luma_pels_ref, nullptr);
146     sub_luma_pels = reinterpret_cast<int16_t *>(
147         aom_memalign(32, sizeof(int16_t) * CFL_BUF_SQUARE));
148     ASSERT_NE(sub_luma_pels, nullptr);
149     memset(chroma_pels_ref, 0, sizeof(I) * CFL_BUF_SQUARE);
150     memset(chroma_pels, 0, sizeof(I) * CFL_BUF_SQUARE);
151     memset(sub_luma_pels_ref, 0, sizeof(int16_t) * CFL_BUF_SQUARE);
152     memset(sub_luma_pels, 0, sizeof(int16_t) * CFL_BUF_SQUARE);
153   }
154 
155   I *chroma_pels_ref;
156   I *chroma_pels;
157   int16_t *sub_luma_pels_ref;
158   int16_t *sub_luma_pels;
159   int alpha_q3;
160   I dc;
randData(int bd)161   void randData(int bd) {
162     alpha_q3 = this->rnd(33) - 16;
163     dc = this->rnd(1 << bd);
164     for (int j = 0; j < this->height; j++) {
165       for (int i = 0; i < this->width; i++) {
166         chroma_pels[j * CFL_BUF_LINE + i] = dc;
167         chroma_pels_ref[j * CFL_BUF_LINE + i] = dc;
168         sub_luma_pels_ref[j * CFL_BUF_LINE + i] =
169             sub_luma_pels[j * CFL_BUF_LINE + i] = this->rnd(1 << (bd + 3));
170       }
171     }
172   }
173 };
174 
175 typedef cfl_subtract_average_fn (*sub_avg_fn)(TX_SIZE tx_size);
176 typedef std::tuple<TX_SIZE, sub_avg_fn> sub_avg_param;
177 class CFLSubAvgTest : public ::testing::TestWithParam<sub_avg_param>,
178                       public CFLTestWithData<int16_t> {
179  public:
SetUp()180   void SetUp() override {
181     CFLTest::init(std::get<0>(this->GetParam()));
182     sub_avg = std::get<1>(this->GetParam())(tx_size);
183     sub_avg_ref = cfl_get_subtract_average_fn_c(tx_size);
184   }
185   ~CFLSubAvgTest() override = default;
186 
187  protected:
188   cfl_subtract_average_fn sub_avg;
189   cfl_subtract_average_fn sub_avg_ref;
190 };
191 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CFLSubAvgTest);
192 
TEST_P(CFLSubAvgTest,SubAvgTest)193 TEST_P(CFLSubAvgTest, SubAvgTest) {
194   for (int it = 0; it < NUM_ITERATIONS; it++) {
195     randData(&ACMRandom::Rand15);
196     sub_avg((uint16_t *)data, data);
197     sub_avg_ref((uint16_t *)data_ref, data_ref);
198     assert_eq<int16_t>(data, data_ref, width, height);
199   }
200 }
201 
TEST_P(CFLSubAvgTest,DISABLED_SubAvgSpeedTest)202 TEST_P(CFLSubAvgTest, DISABLED_SubAvgSpeedTest) {
203   aom_usec_timer ref_timer;
204   aom_usec_timer timer;
205   randData(&ACMRandom::Rand15);
206   aom_usec_timer_start(&ref_timer);
207   for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
208     sub_avg_ref((uint16_t *)data_ref, data_ref);
209   }
210   aom_usec_timer_mark(&ref_timer);
211   int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
212   aom_usec_timer_start(&timer);
213   for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
214     sub_avg((uint16_t *)data, data);
215   }
216   aom_usec_timer_mark(&timer);
217   int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
218   printSpeed(ref_elapsed_time, elapsed_time, width, height);
219   assertFaster(ref_elapsed_time, elapsed_time);
220 }
221 
222 template <typename S, typename T, typename I>
223 class CFLSubsampleTest : public ::testing::TestWithParam<S>,
224                          public CFLTestWithData<I> {
225  public:
SetUp()226   void SetUp() override {
227     CFLTest::init(std::get<0>(this->GetParam()));
228     fun_420 = std::get<1>(this->GetParam())(this->tx_size);
229     fun_422 = std::get<2>(this->GetParam())(this->tx_size);
230     fun_444 = std::get<3>(this->GetParam())(this->tx_size);
231   }
232 
233  protected:
234   T fun_420;
235   T fun_422;
236   T fun_444;
237   T fun_420_ref;
238   T fun_422_ref;
239   T fun_444_ref;
240 
subsampleTest(T fun,T fun_ref,int sub_width,int sub_height,I (ACMRandom::* random)())241   void subsampleTest(T fun, T fun_ref, int sub_width, int sub_height,
242                      I (ACMRandom::*random)()) {
243     uint16_t sub_luma_pels[CFL_BUF_SQUARE];
244     uint16_t sub_luma_pels_ref[CFL_BUF_SQUARE];
245 
246     for (int it = 0; it < NUM_ITERATIONS; it++) {
247       CFLTestWithData<I>::randData(random);
248       fun(this->data, CFL_BUF_LINE, sub_luma_pels);
249       fun_ref(this->data_ref, CFL_BUF_LINE, sub_luma_pels_ref);
250       assert_eq<uint16_t>(sub_luma_pels, sub_luma_pels_ref, sub_width,
251                           sub_height);
252     }
253   }
254 
subsampleSpeedTest(T fun,T fun_ref,I (ACMRandom::* random)())255   void subsampleSpeedTest(T fun, T fun_ref, I (ACMRandom::*random)()) {
256     uint16_t sub_luma_pels[CFL_BUF_SQUARE];
257     uint16_t sub_luma_pels_ref[CFL_BUF_SQUARE];
258     aom_usec_timer ref_timer;
259     aom_usec_timer timer;
260 
261     CFLTestWithData<I>::randData(random);
262     aom_usec_timer_start(&ref_timer);
263     for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
264       fun_ref(this->data_ref, CFL_BUF_LINE, sub_luma_pels);
265     }
266     aom_usec_timer_mark(&ref_timer);
267     int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
268     aom_usec_timer_start(&timer);
269     for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
270       fun(this->data, CFL_BUF_LINE, sub_luma_pels_ref);
271     }
272     aom_usec_timer_mark(&timer);
273     int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
274     printSpeed(ref_elapsed_time, elapsed_time, this->width, this->height);
275     assertFaster(ref_elapsed_time, elapsed_time);
276   }
277 };
278 
279 typedef cfl_subsample_lbd_fn (*get_subsample_lbd_fn)(TX_SIZE tx_size);
280 typedef std::tuple<TX_SIZE, get_subsample_lbd_fn, get_subsample_lbd_fn,
281                    get_subsample_lbd_fn>
282     subsample_lbd_param;
283 class CFLSubsampleLBDTest
284     : public CFLSubsampleTest<subsample_lbd_param, cfl_subsample_lbd_fn,
285                               uint8_t> {
286  public:
287   ~CFLSubsampleLBDTest() override = default;
SetUp()288   void SetUp() override {
289     CFLSubsampleTest::SetUp();
290     fun_420_ref = cfl_get_luma_subsampling_420_lbd_c(tx_size);
291     fun_422_ref = cfl_get_luma_subsampling_422_lbd_c(tx_size);
292     fun_444_ref = cfl_get_luma_subsampling_444_lbd_c(tx_size);
293   }
294 };
295 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CFLSubsampleLBDTest);
296 
TEST_P(CFLSubsampleLBDTest,SubsampleLBD420Test)297 TEST_P(CFLSubsampleLBDTest, SubsampleLBD420Test) {
298   subsampleTest(fun_420, fun_420_ref, width >> 1, height >> 1,
299                 &ACMRandom::Rand8);
300 }
301 
TEST_P(CFLSubsampleLBDTest,DISABLED_SubsampleLBD420SpeedTest)302 TEST_P(CFLSubsampleLBDTest, DISABLED_SubsampleLBD420SpeedTest) {
303   subsampleSpeedTest(fun_420, fun_420_ref, &ACMRandom::Rand8);
304 }
305 
TEST_P(CFLSubsampleLBDTest,SubsampleLBD422Test)306 TEST_P(CFLSubsampleLBDTest, SubsampleLBD422Test) {
307   subsampleTest(fun_422, fun_422_ref, width >> 1, height, &ACMRandom::Rand8);
308 }
309 
TEST_P(CFLSubsampleLBDTest,DISABLED_SubsampleLBD422SpeedTest)310 TEST_P(CFLSubsampleLBDTest, DISABLED_SubsampleLBD422SpeedTest) {
311   subsampleSpeedTest(fun_422, fun_422_ref, &ACMRandom::Rand8);
312 }
313 
TEST_P(CFLSubsampleLBDTest,SubsampleLBD444Test)314 TEST_P(CFLSubsampleLBDTest, SubsampleLBD444Test) {
315   subsampleTest(fun_444, fun_444_ref, width, height, &ACMRandom::Rand8);
316 }
317 
TEST_P(CFLSubsampleLBDTest,DISABLED_SubsampleLBD444SpeedTest)318 TEST_P(CFLSubsampleLBDTest, DISABLED_SubsampleLBD444SpeedTest) {
319   subsampleSpeedTest(fun_444, fun_444_ref, &ACMRandom::Rand8);
320 }
321 
322 #if CONFIG_AV1_HIGHBITDEPTH
323 typedef cfl_subsample_hbd_fn (*get_subsample_hbd_fn)(TX_SIZE tx_size);
324 typedef std::tuple<TX_SIZE, get_subsample_hbd_fn, get_subsample_hbd_fn,
325                    get_subsample_hbd_fn>
326     subsample_hbd_param;
327 class CFLSubsampleHBDTest
328     : public CFLSubsampleTest<subsample_hbd_param, cfl_subsample_hbd_fn,
329                               uint16_t> {
330  public:
331   ~CFLSubsampleHBDTest() override = default;
SetUp()332   void SetUp() override {
333     CFLSubsampleTest::SetUp();
334     fun_420_ref = cfl_get_luma_subsampling_420_hbd_c(tx_size);
335     fun_422_ref = cfl_get_luma_subsampling_422_hbd_c(tx_size);
336     fun_444_ref = cfl_get_luma_subsampling_444_hbd_c(tx_size);
337   }
338 };
339 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CFLSubsampleHBDTest);
340 
TEST_P(CFLSubsampleHBDTest,SubsampleHBD420Test)341 TEST_P(CFLSubsampleHBDTest, SubsampleHBD420Test) {
342   subsampleTest(fun_420, fun_420_ref, width >> 1, height >> 1,
343                 &ACMRandom::Rand12);
344 }
345 
TEST_P(CFLSubsampleHBDTest,DISABLED_SubsampleHBD420SpeedTest)346 TEST_P(CFLSubsampleHBDTest, DISABLED_SubsampleHBD420SpeedTest) {
347   subsampleSpeedTest(fun_420, fun_420_ref, &ACMRandom::Rand12);
348 }
349 
TEST_P(CFLSubsampleHBDTest,SubsampleHBD422Test)350 TEST_P(CFLSubsampleHBDTest, SubsampleHBD422Test) {
351   subsampleTest(fun_422, fun_422_ref, width >> 1, height, &ACMRandom::Rand12);
352 }
353 
TEST_P(CFLSubsampleHBDTest,DISABLED_SubsampleHBD422SpeedTest)354 TEST_P(CFLSubsampleHBDTest, DISABLED_SubsampleHBD422SpeedTest) {
355   subsampleSpeedTest(fun_422, fun_422_ref, &ACMRandom::Rand12);
356 }
357 
TEST_P(CFLSubsampleHBDTest,SubsampleHBD444Test)358 TEST_P(CFLSubsampleHBDTest, SubsampleHBD444Test) {
359   subsampleTest(fun_444, fun_444_ref, width, height, &ACMRandom::Rand12);
360 }
361 
TEST_P(CFLSubsampleHBDTest,DISABLED_SubsampleHBD444SpeedTest)362 TEST_P(CFLSubsampleHBDTest, DISABLED_SubsampleHBD444SpeedTest) {
363   subsampleSpeedTest(fun_444, fun_444_ref, &ACMRandom::Rand12);
364 }
365 #endif  // CONFIG_AV1_HIGHBITDEPTH
366 
367 typedef cfl_predict_lbd_fn (*get_predict_fn)(TX_SIZE tx_size);
368 typedef std::tuple<TX_SIZE, get_predict_fn> predict_param;
369 class CFLPredictTest : public ::testing::TestWithParam<predict_param>,
370                        public CFLTestWithAlignedData<uint8_t> {
371  public:
SetUp()372   void SetUp() override {
373     CFLTest::init(std::get<0>(this->GetParam()));
374     CFLTestWithAlignedData::init();
375     predict = std::get<1>(this->GetParam())(tx_size);
376     predict_ref = cfl_get_predict_lbd_fn_c(tx_size);
377   }
378   ~CFLPredictTest() override = default;
379 
380  protected:
381   cfl_predict_lbd_fn predict;
382   cfl_predict_lbd_fn predict_ref;
383 };
384 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CFLPredictTest);
385 
TEST_P(CFLPredictTest,PredictTest)386 TEST_P(CFLPredictTest, PredictTest) {
387   for (int it = 0; it < NUM_ITERATIONS; it++) {
388     randData(8);
389     predict(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3);
390     predict_ref(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3);
391     assert_eq<uint8_t>(chroma_pels, chroma_pels_ref, width, height);
392   }
393 }
TEST_P(CFLPredictTest,DISABLED_PredictSpeedTest)394 TEST_P(CFLPredictTest, DISABLED_PredictSpeedTest) {
395   aom_usec_timer ref_timer;
396   aom_usec_timer timer;
397   randData(8);
398   aom_usec_timer_start(&ref_timer);
399   for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
400     predict_ref(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3);
401   }
402   aom_usec_timer_mark(&ref_timer);
403   int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
404 
405   aom_usec_timer_start(&timer);
406   for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
407     predict(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3);
408   }
409   aom_usec_timer_mark(&timer);
410   int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
411   printSpeed(ref_elapsed_time, elapsed_time, width, height);
412   assertFaster(ref_elapsed_time, elapsed_time);
413 }
414 
415 #if CONFIG_AV1_HIGHBITDEPTH
416 typedef cfl_predict_hbd_fn (*get_predict_fn_hbd)(TX_SIZE tx_size);
417 typedef std::tuple<TX_SIZE, get_predict_fn_hbd> predict_param_hbd;
418 class CFLPredictHBDTest : public ::testing::TestWithParam<predict_param_hbd>,
419                           public CFLTestWithAlignedData<uint16_t> {
420  public:
SetUp()421   void SetUp() override {
422     CFLTest::init(std::get<0>(this->GetParam()));
423     CFLTestWithAlignedData::init();
424     predict = std::get<1>(this->GetParam())(tx_size);
425     predict_ref = cfl_get_predict_hbd_fn_c(tx_size);
426   }
427   ~CFLPredictHBDTest() override = default;
428 
429  protected:
430   cfl_predict_hbd_fn predict;
431   cfl_predict_hbd_fn predict_ref;
432 };
433 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CFLPredictHBDTest);
434 
TEST_P(CFLPredictHBDTest,PredictHBDTest)435 TEST_P(CFLPredictHBDTest, PredictHBDTest) {
436   int bd = 12;
437   for (int it = 0; it < NUM_ITERATIONS; it++) {
438     randData(bd);
439     predict(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3, bd);
440     predict_ref(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3, bd);
441     assert_eq<uint16_t>(chroma_pels, chroma_pels_ref, width, height);
442   }
443 }
TEST_P(CFLPredictHBDTest,DISABLED_PredictHBDSpeedTest)444 TEST_P(CFLPredictHBDTest, DISABLED_PredictHBDSpeedTest) {
445   aom_usec_timer ref_timer;
446   aom_usec_timer timer;
447   const int bd = 12;
448   randData(bd);
449   aom_usec_timer_start(&ref_timer);
450   for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
451     predict_ref(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3, bd);
452   }
453   aom_usec_timer_mark(&ref_timer);
454   int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
455 
456   aom_usec_timer_start(&timer);
457   for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
458     predict(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3, bd);
459   }
460   aom_usec_timer_mark(&timer);
461   int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
462   printSpeed(ref_elapsed_time, elapsed_time, width, height);
463   assertFaster(ref_elapsed_time, elapsed_time);
464 }
465 #endif  // CONFIG_AV1_HIGHBITDEPTH
466 
467 #if HAVE_SSE2
468 const sub_avg_param sub_avg_sizes_sse2[] = { ALL_CFL_TX_SIZES(
469     cfl_get_subtract_average_fn_sse2) };
470 
471 INSTANTIATE_TEST_SUITE_P(SSE2, CFLSubAvgTest,
472                          ::testing::ValuesIn(sub_avg_sizes_sse2));
473 
474 #endif
475 
476 #if HAVE_SSSE3
477 const subsample_lbd_param subsample_lbd_sizes_ssse3[] = {
478   ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_lbd_ssse3,
479                              cfl_get_luma_subsampling_422_lbd_ssse3,
480                              cfl_get_luma_subsampling_444_lbd_ssse3)
481 };
482 
483 const predict_param predict_sizes_ssse3[] = { ALL_CFL_TX_SIZES(
484     cfl_get_predict_lbd_fn_ssse3) };
485 
486 INSTANTIATE_TEST_SUITE_P(SSSE3, CFLSubsampleLBDTest,
487                          ::testing::ValuesIn(subsample_lbd_sizes_ssse3));
488 
489 INSTANTIATE_TEST_SUITE_P(SSSE3, CFLPredictTest,
490                          ::testing::ValuesIn(predict_sizes_ssse3));
491 
492 #if CONFIG_AV1_HIGHBITDEPTH
493 const subsample_hbd_param subsample_hbd_sizes_ssse3[] = {
494   ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_hbd_ssse3,
495                              cfl_get_luma_subsampling_422_hbd_ssse3,
496                              cfl_get_luma_subsampling_444_hbd_ssse3)
497 };
498 
499 const predict_param_hbd predict_sizes_hbd_ssse3[] = { ALL_CFL_TX_SIZES(
500     cfl_get_predict_hbd_fn_ssse3) };
501 
502 INSTANTIATE_TEST_SUITE_P(SSSE3, CFLSubsampleHBDTest,
503                          ::testing::ValuesIn(subsample_hbd_sizes_ssse3));
504 
505 INSTANTIATE_TEST_SUITE_P(SSSE3, CFLPredictHBDTest,
506                          ::testing::ValuesIn(predict_sizes_hbd_ssse3));
507 #endif  // CONFIG_AV1_HIGHBITDEPTH
508 #endif  // HAVE_SSSE3
509 
510 #if HAVE_AVX2
511 const sub_avg_param sub_avg_sizes_avx2[] = { ALL_CFL_TX_SIZES(
512     cfl_get_subtract_average_fn_avx2) };
513 
514 const subsample_lbd_param subsample_lbd_sizes_avx2[] = {
515   ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_lbd_avx2,
516                              cfl_get_luma_subsampling_422_lbd_avx2,
517                              cfl_get_luma_subsampling_444_lbd_avx2)
518 };
519 
520 const predict_param predict_sizes_avx2[] = { ALL_CFL_TX_SIZES(
521     cfl_get_predict_lbd_fn_avx2) };
522 
523 INSTANTIATE_TEST_SUITE_P(AVX2, CFLSubAvgTest,
524                          ::testing::ValuesIn(sub_avg_sizes_avx2));
525 
526 INSTANTIATE_TEST_SUITE_P(AVX2, CFLSubsampleLBDTest,
527                          ::testing::ValuesIn(subsample_lbd_sizes_avx2));
528 
529 INSTANTIATE_TEST_SUITE_P(AVX2, CFLPredictTest,
530                          ::testing::ValuesIn(predict_sizes_avx2));
531 
532 #if CONFIG_AV1_HIGHBITDEPTH
533 const subsample_hbd_param subsample_hbd_sizes_avx2[] = {
534   ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_hbd_avx2,
535                              cfl_get_luma_subsampling_422_hbd_avx2,
536                              cfl_get_luma_subsampling_444_hbd_avx2)
537 };
538 
539 const predict_param_hbd predict_sizes_hbd_avx2[] = { ALL_CFL_TX_SIZES(
540     cfl_get_predict_hbd_fn_avx2) };
541 
542 INSTANTIATE_TEST_SUITE_P(AVX2, CFLSubsampleHBDTest,
543                          ::testing::ValuesIn(subsample_hbd_sizes_avx2));
544 
545 INSTANTIATE_TEST_SUITE_P(AVX2, CFLPredictHBDTest,
546                          ::testing::ValuesIn(predict_sizes_hbd_avx2));
547 #endif  // CONFIG_AV1_HIGHBITDEPTH
548 #endif  // HAVE_AVX2
549 
550 #if HAVE_NEON
551 const sub_avg_param sub_avg_sizes_neon[] = { ALL_CFL_TX_SIZES(
552     cfl_get_subtract_average_fn_neon) };
553 
554 const predict_param predict_sizes_neon[] = { ALL_CFL_TX_SIZES(
555     cfl_get_predict_lbd_fn_neon) };
556 
557 const subsample_lbd_param subsample_lbd_sizes_neon[] = {
558   ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_lbd_neon,
559                              cfl_get_luma_subsampling_422_lbd_neon,
560                              cfl_get_luma_subsampling_444_lbd_neon)
561 };
562 
563 INSTANTIATE_TEST_SUITE_P(NEON, CFLSubAvgTest,
564                          ::testing::ValuesIn(sub_avg_sizes_neon));
565 
566 INSTANTIATE_TEST_SUITE_P(NEON, CFLSubsampleLBDTest,
567                          ::testing::ValuesIn(subsample_lbd_sizes_neon));
568 
569 INSTANTIATE_TEST_SUITE_P(NEON, CFLPredictTest,
570                          ::testing::ValuesIn(predict_sizes_neon));
571 
572 #if CONFIG_AV1_HIGHBITDEPTH
573 const subsample_hbd_param subsample_hbd_sizes_neon[] = {
574   ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_hbd_neon,
575                              cfl_get_luma_subsampling_422_hbd_neon,
576                              cfl_get_luma_subsampling_444_hbd_neon)
577 };
578 
579 const predict_param_hbd predict_sizes_hbd_neon[] = { ALL_CFL_TX_SIZES(
580     cfl_get_predict_hbd_fn_neon) };
581 
582 INSTANTIATE_TEST_SUITE_P(NEON, CFLSubsampleHBDTest,
583                          ::testing::ValuesIn(subsample_hbd_sizes_neon));
584 
585 INSTANTIATE_TEST_SUITE_P(NEON, CFLPredictHBDTest,
586                          ::testing::ValuesIn(predict_sizes_hbd_neon));
587 #endif  // CONFIG_AV1_HIGHBITDEPTH
588 #endif  // HAVE_NEON
589 
590 #if HAVE_VSX
591 const sub_avg_param sub_avg_sizes_vsx[] = { ALL_CFL_TX_SIZES(
592     cfl_get_subtract_average_fn_vsx) };
593 
594 INSTANTIATE_TEST_SUITE_P(VSX, CFLSubAvgTest,
595                          ::testing::ValuesIn(sub_avg_sizes_vsx));
596 #endif
597 }  // namespace
598