1 /*
2 * Copyright (c) 2017, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <tuple>
13
14 #include "gtest/gtest.h"
15
16 #include "config/av1_rtcd.h"
17
18 #include "aom_ports/aom_timer.h"
19 #include "test/util.h"
20 #include "test/acm_random.h"
21
22 using std::make_tuple;
23
24 using libaom_test::ACMRandom;
25
26 #define NUM_ITERATIONS (100)
27 #define NUM_ITERATIONS_SPEED (INT16_MAX)
28
29 #define ALL_CFL_TX_SIZES(function) \
30 make_tuple(static_cast<TX_SIZE>(TX_4X4), &function), \
31 make_tuple(static_cast<TX_SIZE>(TX_4X8), &function), \
32 make_tuple(static_cast<TX_SIZE>(TX_4X16), &function), \
33 make_tuple(static_cast<TX_SIZE>(TX_8X4), &function), \
34 make_tuple(static_cast<TX_SIZE>(TX_8X8), &function), \
35 make_tuple(static_cast<TX_SIZE>(TX_8X16), &function), \
36 make_tuple(static_cast<TX_SIZE>(TX_8X32), &function), \
37 make_tuple(static_cast<TX_SIZE>(TX_16X4), &function), \
38 make_tuple(static_cast<TX_SIZE>(TX_16X8), &function), \
39 make_tuple(static_cast<TX_SIZE>(TX_16X16), &function), \
40 make_tuple(static_cast<TX_SIZE>(TX_16X32), &function), \
41 make_tuple(static_cast<TX_SIZE>(TX_32X8), &function), \
42 make_tuple(static_cast<TX_SIZE>(TX_32X16), &function), \
43 make_tuple(static_cast<TX_SIZE>(TX_32X32), &function)
44
45 #define ALL_CFL_TX_SIZES_SUBSAMPLE(fun420, fun422, fun444) \
46 make_tuple(static_cast<TX_SIZE>(TX_4X4), &fun420, &fun422, &fun444), \
47 make_tuple(static_cast<TX_SIZE>(TX_4X8), &fun420, &fun422, &fun444), \
48 make_tuple(static_cast<TX_SIZE>(TX_4X16), &fun420, &fun422, &fun444), \
49 make_tuple(static_cast<TX_SIZE>(TX_8X4), &fun420, &fun422, &fun444), \
50 make_tuple(static_cast<TX_SIZE>(TX_8X8), &fun420, &fun422, &fun444), \
51 make_tuple(static_cast<TX_SIZE>(TX_8X16), &fun420, &fun422, &fun444), \
52 make_tuple(static_cast<TX_SIZE>(TX_8X32), &fun420, &fun422, &fun444), \
53 make_tuple(static_cast<TX_SIZE>(TX_16X4), &fun420, &fun422, &fun444), \
54 make_tuple(static_cast<TX_SIZE>(TX_16X8), &fun420, &fun422, &fun444), \
55 make_tuple(static_cast<TX_SIZE>(TX_16X16), &fun420, &fun422, &fun444), \
56 make_tuple(static_cast<TX_SIZE>(TX_16X32), &fun420, &fun422, &fun444), \
57 make_tuple(static_cast<TX_SIZE>(TX_32X8), &fun420, &fun422, &fun444), \
58 make_tuple(static_cast<TX_SIZE>(TX_32X16), &fun420, &fun422, &fun444), \
59 make_tuple(static_cast<TX_SIZE>(TX_32X32), &fun420, &fun422, &fun444)
60
61 namespace {
62
63 template <typename A>
assert_eq(const A * a,const A * b,int width,int height)64 static void assert_eq(const A *a, const A *b, int width, int height) {
65 for (int j = 0; j < height; j++) {
66 for (int i = 0; i < width; i++) {
67 ASSERT_EQ(a[j * CFL_BUF_LINE + i], b[j * CFL_BUF_LINE + i]);
68 }
69 }
70 }
71
assertFaster(int ref_elapsed_time,int elapsed_time)72 static void assertFaster(int ref_elapsed_time, int elapsed_time) {
73 EXPECT_GT(ref_elapsed_time, elapsed_time)
74 << "Error: CFLSubtractSpeedTest, SIMD slower than C." << std::endl
75 << "C time: " << ref_elapsed_time << " us" << std::endl
76 << "SIMD time: " << elapsed_time << " us" << std::endl;
77 }
78
printSpeed(int ref_elapsed_time,int elapsed_time,int width,int height)79 static void printSpeed(int ref_elapsed_time, int elapsed_time, int width,
80 int height) {
81 std::cout.precision(2);
82 std::cout << "[ ] " << width << "x" << height
83 << ": C time = " << ref_elapsed_time
84 << " us, SIMD time = " << elapsed_time << " us"
85 << " (~" << ref_elapsed_time / (double)elapsed_time << "x) "
86 << std::endl;
87 }
88
89 class CFLTest {
90 public:
91 virtual ~CFLTest() = default;
init(TX_SIZE tx)92 void init(TX_SIZE tx) {
93 tx_size = tx;
94 width = tx_size_wide[tx_size];
95 height = tx_size_high[tx_size];
96 rnd.Reset(ACMRandom::DeterministicSeed());
97 }
98
99 protected:
100 TX_SIZE tx_size;
101 int width;
102 int height;
103 ACMRandom rnd;
104 };
105
106 template <typename I>
107 class CFLTestWithData : public CFLTest {
108 public:
109 ~CFLTestWithData() override = default;
110
111 protected:
112 I data[CFL_BUF_SQUARE];
113 I data_ref[CFL_BUF_SQUARE];
randData(I (ACMRandom::* random)())114 void randData(I (ACMRandom::*random)()) {
115 for (int j = 0; j < this->height; j++) {
116 for (int i = 0; i < this->width; i++) {
117 const I d = (this->rnd.*random)();
118 data[j * CFL_BUF_LINE + i] = d;
119 data_ref[j * CFL_BUF_LINE + i] = d;
120 }
121 }
122 }
123 };
124
125 template <typename I>
126 class CFLTestWithAlignedData : public CFLTest {
127 public:
~CFLTestWithAlignedData()128 ~CFLTestWithAlignedData() override {
129 aom_free(chroma_pels_ref);
130 aom_free(sub_luma_pels_ref);
131 aom_free(chroma_pels);
132 aom_free(sub_luma_pels);
133 }
134
135 protected:
init()136 void init() {
137 chroma_pels_ref =
138 reinterpret_cast<I *>(aom_memalign(32, sizeof(I) * CFL_BUF_SQUARE));
139 ASSERT_NE(chroma_pels_ref, nullptr);
140 chroma_pels =
141 reinterpret_cast<I *>(aom_memalign(32, sizeof(I) * CFL_BUF_SQUARE));
142 ASSERT_NE(chroma_pels, nullptr);
143 sub_luma_pels_ref = reinterpret_cast<int16_t *>(
144 aom_memalign(32, sizeof(int16_t) * CFL_BUF_SQUARE));
145 ASSERT_NE(sub_luma_pels_ref, nullptr);
146 sub_luma_pels = reinterpret_cast<int16_t *>(
147 aom_memalign(32, sizeof(int16_t) * CFL_BUF_SQUARE));
148 ASSERT_NE(sub_luma_pels, nullptr);
149 memset(chroma_pels_ref, 0, sizeof(I) * CFL_BUF_SQUARE);
150 memset(chroma_pels, 0, sizeof(I) * CFL_BUF_SQUARE);
151 memset(sub_luma_pels_ref, 0, sizeof(int16_t) * CFL_BUF_SQUARE);
152 memset(sub_luma_pels, 0, sizeof(int16_t) * CFL_BUF_SQUARE);
153 }
154
155 I *chroma_pels_ref;
156 I *chroma_pels;
157 int16_t *sub_luma_pels_ref;
158 int16_t *sub_luma_pels;
159 int alpha_q3;
160 I dc;
randData(int bd)161 void randData(int bd) {
162 alpha_q3 = this->rnd(33) - 16;
163 dc = this->rnd(1 << bd);
164 for (int j = 0; j < this->height; j++) {
165 for (int i = 0; i < this->width; i++) {
166 chroma_pels[j * CFL_BUF_LINE + i] = dc;
167 chroma_pels_ref[j * CFL_BUF_LINE + i] = dc;
168 sub_luma_pels_ref[j * CFL_BUF_LINE + i] =
169 sub_luma_pels[j * CFL_BUF_LINE + i] = this->rnd(1 << (bd + 3));
170 }
171 }
172 }
173 };
174
175 typedef cfl_subtract_average_fn (*sub_avg_fn)(TX_SIZE tx_size);
176 typedef std::tuple<TX_SIZE, sub_avg_fn> sub_avg_param;
177 class CFLSubAvgTest : public ::testing::TestWithParam<sub_avg_param>,
178 public CFLTestWithData<int16_t> {
179 public:
SetUp()180 void SetUp() override {
181 CFLTest::init(std::get<0>(this->GetParam()));
182 sub_avg = std::get<1>(this->GetParam())(tx_size);
183 sub_avg_ref = cfl_get_subtract_average_fn_c(tx_size);
184 }
185 ~CFLSubAvgTest() override = default;
186
187 protected:
188 cfl_subtract_average_fn sub_avg;
189 cfl_subtract_average_fn sub_avg_ref;
190 };
191 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CFLSubAvgTest);
192
TEST_P(CFLSubAvgTest,SubAvgTest)193 TEST_P(CFLSubAvgTest, SubAvgTest) {
194 for (int it = 0; it < NUM_ITERATIONS; it++) {
195 randData(&ACMRandom::Rand15);
196 sub_avg((uint16_t *)data, data);
197 sub_avg_ref((uint16_t *)data_ref, data_ref);
198 assert_eq<int16_t>(data, data_ref, width, height);
199 }
200 }
201
TEST_P(CFLSubAvgTest,DISABLED_SubAvgSpeedTest)202 TEST_P(CFLSubAvgTest, DISABLED_SubAvgSpeedTest) {
203 aom_usec_timer ref_timer;
204 aom_usec_timer timer;
205 randData(&ACMRandom::Rand15);
206 aom_usec_timer_start(&ref_timer);
207 for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
208 sub_avg_ref((uint16_t *)data_ref, data_ref);
209 }
210 aom_usec_timer_mark(&ref_timer);
211 int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
212 aom_usec_timer_start(&timer);
213 for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
214 sub_avg((uint16_t *)data, data);
215 }
216 aom_usec_timer_mark(&timer);
217 int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
218 printSpeed(ref_elapsed_time, elapsed_time, width, height);
219 assertFaster(ref_elapsed_time, elapsed_time);
220 }
221
222 template <typename S, typename T, typename I>
223 class CFLSubsampleTest : public ::testing::TestWithParam<S>,
224 public CFLTestWithData<I> {
225 public:
SetUp()226 void SetUp() override {
227 CFLTest::init(std::get<0>(this->GetParam()));
228 fun_420 = std::get<1>(this->GetParam())(this->tx_size);
229 fun_422 = std::get<2>(this->GetParam())(this->tx_size);
230 fun_444 = std::get<3>(this->GetParam())(this->tx_size);
231 }
232
233 protected:
234 T fun_420;
235 T fun_422;
236 T fun_444;
237 T fun_420_ref;
238 T fun_422_ref;
239 T fun_444_ref;
240
subsampleTest(T fun,T fun_ref,int sub_width,int sub_height,I (ACMRandom::* random)())241 void subsampleTest(T fun, T fun_ref, int sub_width, int sub_height,
242 I (ACMRandom::*random)()) {
243 uint16_t sub_luma_pels[CFL_BUF_SQUARE];
244 uint16_t sub_luma_pels_ref[CFL_BUF_SQUARE];
245
246 for (int it = 0; it < NUM_ITERATIONS; it++) {
247 CFLTestWithData<I>::randData(random);
248 fun(this->data, CFL_BUF_LINE, sub_luma_pels);
249 fun_ref(this->data_ref, CFL_BUF_LINE, sub_luma_pels_ref);
250 assert_eq<uint16_t>(sub_luma_pels, sub_luma_pels_ref, sub_width,
251 sub_height);
252 }
253 }
254
subsampleSpeedTest(T fun,T fun_ref,I (ACMRandom::* random)())255 void subsampleSpeedTest(T fun, T fun_ref, I (ACMRandom::*random)()) {
256 uint16_t sub_luma_pels[CFL_BUF_SQUARE];
257 uint16_t sub_luma_pels_ref[CFL_BUF_SQUARE];
258 aom_usec_timer ref_timer;
259 aom_usec_timer timer;
260
261 CFLTestWithData<I>::randData(random);
262 aom_usec_timer_start(&ref_timer);
263 for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
264 fun_ref(this->data_ref, CFL_BUF_LINE, sub_luma_pels);
265 }
266 aom_usec_timer_mark(&ref_timer);
267 int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
268 aom_usec_timer_start(&timer);
269 for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
270 fun(this->data, CFL_BUF_LINE, sub_luma_pels_ref);
271 }
272 aom_usec_timer_mark(&timer);
273 int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
274 printSpeed(ref_elapsed_time, elapsed_time, this->width, this->height);
275 assertFaster(ref_elapsed_time, elapsed_time);
276 }
277 };
278
279 typedef cfl_subsample_lbd_fn (*get_subsample_lbd_fn)(TX_SIZE tx_size);
280 typedef std::tuple<TX_SIZE, get_subsample_lbd_fn, get_subsample_lbd_fn,
281 get_subsample_lbd_fn>
282 subsample_lbd_param;
283 class CFLSubsampleLBDTest
284 : public CFLSubsampleTest<subsample_lbd_param, cfl_subsample_lbd_fn,
285 uint8_t> {
286 public:
287 ~CFLSubsampleLBDTest() override = default;
SetUp()288 void SetUp() override {
289 CFLSubsampleTest::SetUp();
290 fun_420_ref = cfl_get_luma_subsampling_420_lbd_c(tx_size);
291 fun_422_ref = cfl_get_luma_subsampling_422_lbd_c(tx_size);
292 fun_444_ref = cfl_get_luma_subsampling_444_lbd_c(tx_size);
293 }
294 };
295 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CFLSubsampleLBDTest);
296
TEST_P(CFLSubsampleLBDTest,SubsampleLBD420Test)297 TEST_P(CFLSubsampleLBDTest, SubsampleLBD420Test) {
298 subsampleTest(fun_420, fun_420_ref, width >> 1, height >> 1,
299 &ACMRandom::Rand8);
300 }
301
TEST_P(CFLSubsampleLBDTest,DISABLED_SubsampleLBD420SpeedTest)302 TEST_P(CFLSubsampleLBDTest, DISABLED_SubsampleLBD420SpeedTest) {
303 subsampleSpeedTest(fun_420, fun_420_ref, &ACMRandom::Rand8);
304 }
305
TEST_P(CFLSubsampleLBDTest,SubsampleLBD422Test)306 TEST_P(CFLSubsampleLBDTest, SubsampleLBD422Test) {
307 subsampleTest(fun_422, fun_422_ref, width >> 1, height, &ACMRandom::Rand8);
308 }
309
TEST_P(CFLSubsampleLBDTest,DISABLED_SubsampleLBD422SpeedTest)310 TEST_P(CFLSubsampleLBDTest, DISABLED_SubsampleLBD422SpeedTest) {
311 subsampleSpeedTest(fun_422, fun_422_ref, &ACMRandom::Rand8);
312 }
313
TEST_P(CFLSubsampleLBDTest,SubsampleLBD444Test)314 TEST_P(CFLSubsampleLBDTest, SubsampleLBD444Test) {
315 subsampleTest(fun_444, fun_444_ref, width, height, &ACMRandom::Rand8);
316 }
317
TEST_P(CFLSubsampleLBDTest,DISABLED_SubsampleLBD444SpeedTest)318 TEST_P(CFLSubsampleLBDTest, DISABLED_SubsampleLBD444SpeedTest) {
319 subsampleSpeedTest(fun_444, fun_444_ref, &ACMRandom::Rand8);
320 }
321
322 #if CONFIG_AV1_HIGHBITDEPTH
323 typedef cfl_subsample_hbd_fn (*get_subsample_hbd_fn)(TX_SIZE tx_size);
324 typedef std::tuple<TX_SIZE, get_subsample_hbd_fn, get_subsample_hbd_fn,
325 get_subsample_hbd_fn>
326 subsample_hbd_param;
327 class CFLSubsampleHBDTest
328 : public CFLSubsampleTest<subsample_hbd_param, cfl_subsample_hbd_fn,
329 uint16_t> {
330 public:
331 ~CFLSubsampleHBDTest() override = default;
SetUp()332 void SetUp() override {
333 CFLSubsampleTest::SetUp();
334 fun_420_ref = cfl_get_luma_subsampling_420_hbd_c(tx_size);
335 fun_422_ref = cfl_get_luma_subsampling_422_hbd_c(tx_size);
336 fun_444_ref = cfl_get_luma_subsampling_444_hbd_c(tx_size);
337 }
338 };
339 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CFLSubsampleHBDTest);
340
TEST_P(CFLSubsampleHBDTest,SubsampleHBD420Test)341 TEST_P(CFLSubsampleHBDTest, SubsampleHBD420Test) {
342 subsampleTest(fun_420, fun_420_ref, width >> 1, height >> 1,
343 &ACMRandom::Rand12);
344 }
345
TEST_P(CFLSubsampleHBDTest,DISABLED_SubsampleHBD420SpeedTest)346 TEST_P(CFLSubsampleHBDTest, DISABLED_SubsampleHBD420SpeedTest) {
347 subsampleSpeedTest(fun_420, fun_420_ref, &ACMRandom::Rand12);
348 }
349
TEST_P(CFLSubsampleHBDTest,SubsampleHBD422Test)350 TEST_P(CFLSubsampleHBDTest, SubsampleHBD422Test) {
351 subsampleTest(fun_422, fun_422_ref, width >> 1, height, &ACMRandom::Rand12);
352 }
353
TEST_P(CFLSubsampleHBDTest,DISABLED_SubsampleHBD422SpeedTest)354 TEST_P(CFLSubsampleHBDTest, DISABLED_SubsampleHBD422SpeedTest) {
355 subsampleSpeedTest(fun_422, fun_422_ref, &ACMRandom::Rand12);
356 }
357
TEST_P(CFLSubsampleHBDTest,SubsampleHBD444Test)358 TEST_P(CFLSubsampleHBDTest, SubsampleHBD444Test) {
359 subsampleTest(fun_444, fun_444_ref, width, height, &ACMRandom::Rand12);
360 }
361
TEST_P(CFLSubsampleHBDTest,DISABLED_SubsampleHBD444SpeedTest)362 TEST_P(CFLSubsampleHBDTest, DISABLED_SubsampleHBD444SpeedTest) {
363 subsampleSpeedTest(fun_444, fun_444_ref, &ACMRandom::Rand12);
364 }
365 #endif // CONFIG_AV1_HIGHBITDEPTH
366
367 typedef cfl_predict_lbd_fn (*get_predict_fn)(TX_SIZE tx_size);
368 typedef std::tuple<TX_SIZE, get_predict_fn> predict_param;
369 class CFLPredictTest : public ::testing::TestWithParam<predict_param>,
370 public CFLTestWithAlignedData<uint8_t> {
371 public:
SetUp()372 void SetUp() override {
373 CFLTest::init(std::get<0>(this->GetParam()));
374 CFLTestWithAlignedData::init();
375 predict = std::get<1>(this->GetParam())(tx_size);
376 predict_ref = cfl_get_predict_lbd_fn_c(tx_size);
377 }
378 ~CFLPredictTest() override = default;
379
380 protected:
381 cfl_predict_lbd_fn predict;
382 cfl_predict_lbd_fn predict_ref;
383 };
384 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CFLPredictTest);
385
TEST_P(CFLPredictTest,PredictTest)386 TEST_P(CFLPredictTest, PredictTest) {
387 for (int it = 0; it < NUM_ITERATIONS; it++) {
388 randData(8);
389 predict(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3);
390 predict_ref(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3);
391 assert_eq<uint8_t>(chroma_pels, chroma_pels_ref, width, height);
392 }
393 }
TEST_P(CFLPredictTest,DISABLED_PredictSpeedTest)394 TEST_P(CFLPredictTest, DISABLED_PredictSpeedTest) {
395 aom_usec_timer ref_timer;
396 aom_usec_timer timer;
397 randData(8);
398 aom_usec_timer_start(&ref_timer);
399 for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
400 predict_ref(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3);
401 }
402 aom_usec_timer_mark(&ref_timer);
403 int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
404
405 aom_usec_timer_start(&timer);
406 for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
407 predict(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3);
408 }
409 aom_usec_timer_mark(&timer);
410 int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
411 printSpeed(ref_elapsed_time, elapsed_time, width, height);
412 assertFaster(ref_elapsed_time, elapsed_time);
413 }
414
415 #if CONFIG_AV1_HIGHBITDEPTH
416 typedef cfl_predict_hbd_fn (*get_predict_fn_hbd)(TX_SIZE tx_size);
417 typedef std::tuple<TX_SIZE, get_predict_fn_hbd> predict_param_hbd;
418 class CFLPredictHBDTest : public ::testing::TestWithParam<predict_param_hbd>,
419 public CFLTestWithAlignedData<uint16_t> {
420 public:
SetUp()421 void SetUp() override {
422 CFLTest::init(std::get<0>(this->GetParam()));
423 CFLTestWithAlignedData::init();
424 predict = std::get<1>(this->GetParam())(tx_size);
425 predict_ref = cfl_get_predict_hbd_fn_c(tx_size);
426 }
427 ~CFLPredictHBDTest() override = default;
428
429 protected:
430 cfl_predict_hbd_fn predict;
431 cfl_predict_hbd_fn predict_ref;
432 };
433 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CFLPredictHBDTest);
434
TEST_P(CFLPredictHBDTest,PredictHBDTest)435 TEST_P(CFLPredictHBDTest, PredictHBDTest) {
436 int bd = 12;
437 for (int it = 0; it < NUM_ITERATIONS; it++) {
438 randData(bd);
439 predict(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3, bd);
440 predict_ref(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3, bd);
441 assert_eq<uint16_t>(chroma_pels, chroma_pels_ref, width, height);
442 }
443 }
TEST_P(CFLPredictHBDTest,DISABLED_PredictHBDSpeedTest)444 TEST_P(CFLPredictHBDTest, DISABLED_PredictHBDSpeedTest) {
445 aom_usec_timer ref_timer;
446 aom_usec_timer timer;
447 const int bd = 12;
448 randData(bd);
449 aom_usec_timer_start(&ref_timer);
450 for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
451 predict_ref(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3, bd);
452 }
453 aom_usec_timer_mark(&ref_timer);
454 int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
455
456 aom_usec_timer_start(&timer);
457 for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
458 predict(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3, bd);
459 }
460 aom_usec_timer_mark(&timer);
461 int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
462 printSpeed(ref_elapsed_time, elapsed_time, width, height);
463 assertFaster(ref_elapsed_time, elapsed_time);
464 }
465 #endif // CONFIG_AV1_HIGHBITDEPTH
466
467 #if HAVE_SSE2
468 const sub_avg_param sub_avg_sizes_sse2[] = { ALL_CFL_TX_SIZES(
469 cfl_get_subtract_average_fn_sse2) };
470
471 INSTANTIATE_TEST_SUITE_P(SSE2, CFLSubAvgTest,
472 ::testing::ValuesIn(sub_avg_sizes_sse2));
473
474 #endif
475
476 #if HAVE_SSSE3
477 const subsample_lbd_param subsample_lbd_sizes_ssse3[] = {
478 ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_lbd_ssse3,
479 cfl_get_luma_subsampling_422_lbd_ssse3,
480 cfl_get_luma_subsampling_444_lbd_ssse3)
481 };
482
483 const predict_param predict_sizes_ssse3[] = { ALL_CFL_TX_SIZES(
484 cfl_get_predict_lbd_fn_ssse3) };
485
486 INSTANTIATE_TEST_SUITE_P(SSSE3, CFLSubsampleLBDTest,
487 ::testing::ValuesIn(subsample_lbd_sizes_ssse3));
488
489 INSTANTIATE_TEST_SUITE_P(SSSE3, CFLPredictTest,
490 ::testing::ValuesIn(predict_sizes_ssse3));
491
492 #if CONFIG_AV1_HIGHBITDEPTH
493 const subsample_hbd_param subsample_hbd_sizes_ssse3[] = {
494 ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_hbd_ssse3,
495 cfl_get_luma_subsampling_422_hbd_ssse3,
496 cfl_get_luma_subsampling_444_hbd_ssse3)
497 };
498
499 const predict_param_hbd predict_sizes_hbd_ssse3[] = { ALL_CFL_TX_SIZES(
500 cfl_get_predict_hbd_fn_ssse3) };
501
502 INSTANTIATE_TEST_SUITE_P(SSSE3, CFLSubsampleHBDTest,
503 ::testing::ValuesIn(subsample_hbd_sizes_ssse3));
504
505 INSTANTIATE_TEST_SUITE_P(SSSE3, CFLPredictHBDTest,
506 ::testing::ValuesIn(predict_sizes_hbd_ssse3));
507 #endif // CONFIG_AV1_HIGHBITDEPTH
508 #endif // HAVE_SSSE3
509
510 #if HAVE_AVX2
511 const sub_avg_param sub_avg_sizes_avx2[] = { ALL_CFL_TX_SIZES(
512 cfl_get_subtract_average_fn_avx2) };
513
514 const subsample_lbd_param subsample_lbd_sizes_avx2[] = {
515 ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_lbd_avx2,
516 cfl_get_luma_subsampling_422_lbd_avx2,
517 cfl_get_luma_subsampling_444_lbd_avx2)
518 };
519
520 const predict_param predict_sizes_avx2[] = { ALL_CFL_TX_SIZES(
521 cfl_get_predict_lbd_fn_avx2) };
522
523 INSTANTIATE_TEST_SUITE_P(AVX2, CFLSubAvgTest,
524 ::testing::ValuesIn(sub_avg_sizes_avx2));
525
526 INSTANTIATE_TEST_SUITE_P(AVX2, CFLSubsampleLBDTest,
527 ::testing::ValuesIn(subsample_lbd_sizes_avx2));
528
529 INSTANTIATE_TEST_SUITE_P(AVX2, CFLPredictTest,
530 ::testing::ValuesIn(predict_sizes_avx2));
531
532 #if CONFIG_AV1_HIGHBITDEPTH
533 const subsample_hbd_param subsample_hbd_sizes_avx2[] = {
534 ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_hbd_avx2,
535 cfl_get_luma_subsampling_422_hbd_avx2,
536 cfl_get_luma_subsampling_444_hbd_avx2)
537 };
538
539 const predict_param_hbd predict_sizes_hbd_avx2[] = { ALL_CFL_TX_SIZES(
540 cfl_get_predict_hbd_fn_avx2) };
541
542 INSTANTIATE_TEST_SUITE_P(AVX2, CFLSubsampleHBDTest,
543 ::testing::ValuesIn(subsample_hbd_sizes_avx2));
544
545 INSTANTIATE_TEST_SUITE_P(AVX2, CFLPredictHBDTest,
546 ::testing::ValuesIn(predict_sizes_hbd_avx2));
547 #endif // CONFIG_AV1_HIGHBITDEPTH
548 #endif // HAVE_AVX2
549
550 #if HAVE_NEON
551 const sub_avg_param sub_avg_sizes_neon[] = { ALL_CFL_TX_SIZES(
552 cfl_get_subtract_average_fn_neon) };
553
554 const predict_param predict_sizes_neon[] = { ALL_CFL_TX_SIZES(
555 cfl_get_predict_lbd_fn_neon) };
556
557 const subsample_lbd_param subsample_lbd_sizes_neon[] = {
558 ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_lbd_neon,
559 cfl_get_luma_subsampling_422_lbd_neon,
560 cfl_get_luma_subsampling_444_lbd_neon)
561 };
562
563 INSTANTIATE_TEST_SUITE_P(NEON, CFLSubAvgTest,
564 ::testing::ValuesIn(sub_avg_sizes_neon));
565
566 INSTANTIATE_TEST_SUITE_P(NEON, CFLSubsampleLBDTest,
567 ::testing::ValuesIn(subsample_lbd_sizes_neon));
568
569 INSTANTIATE_TEST_SUITE_P(NEON, CFLPredictTest,
570 ::testing::ValuesIn(predict_sizes_neon));
571
572 #if CONFIG_AV1_HIGHBITDEPTH
573 const subsample_hbd_param subsample_hbd_sizes_neon[] = {
574 ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_hbd_neon,
575 cfl_get_luma_subsampling_422_hbd_neon,
576 cfl_get_luma_subsampling_444_hbd_neon)
577 };
578
579 const predict_param_hbd predict_sizes_hbd_neon[] = { ALL_CFL_TX_SIZES(
580 cfl_get_predict_hbd_fn_neon) };
581
582 INSTANTIATE_TEST_SUITE_P(NEON, CFLSubsampleHBDTest,
583 ::testing::ValuesIn(subsample_hbd_sizes_neon));
584
585 INSTANTIATE_TEST_SUITE_P(NEON, CFLPredictHBDTest,
586 ::testing::ValuesIn(predict_sizes_hbd_neon));
587 #endif // CONFIG_AV1_HIGHBITDEPTH
588 #endif // HAVE_NEON
589
590 #if HAVE_VSX
591 const sub_avg_param sub_avg_sizes_vsx[] = { ALL_CFL_TX_SIZES(
592 cfl_get_subtract_average_fn_vsx) };
593
594 INSTANTIATE_TEST_SUITE_P(VSX, CFLSubAvgTest,
595 ::testing::ValuesIn(sub_avg_sizes_vsx));
596 #endif
597 } // namespace
598