1 /*
2 * Copyright 2023 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <stdlib.h>
12 #include <time.h>
13
14 #include "../unit_test/unit_test.h"
15 #include "libyuv/cpu_id.h"
16 #include "libyuv/scale.h"
17
18 #ifdef ENABLE_ROW_TESTS
19 #include "libyuv/scale_row.h" // For ScaleRowDown2Box_Odd_C
20 #endif
21
22 #define STRINGIZE(line) #line
23 #define FILELINESTR(file, line) file ":" STRINGIZE(line)
24
25 #if defined(__riscv) && !defined(__clang__)
26 #define DISABLE_SLOW_TESTS
27 #undef ENABLE_FULL_TESTS
28 #undef ENABLE_ROW_TESTS
29 #define LEAN_TESTS
30 #endif
31
32 #if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
33 // SLOW TESTS are those that are unoptimized C code.
34 // FULL TESTS are optimized but test many variations of the same code.
35 #define ENABLE_FULL_TESTS
36 #endif
37
38 namespace libyuv {
39
40 #ifdef ENABLE_ROW_TESTS
41 #ifdef HAS_SCALEROWDOWN2_SSSE3
TEST_F(LibYUVScaleTest,TestScaleRowDown2Box_Odd_SSSE3)42 TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) {
43 SIMD_ALIGNED(uint8_t orig_pixels[128 * 2]);
44 SIMD_ALIGNED(uint8_t dst_pixels_opt[64]);
45 SIMD_ALIGNED(uint8_t dst_pixels_c[64]);
46 memset(orig_pixels, 0, sizeof(orig_pixels));
47 memset(dst_pixels_opt, 0, sizeof(dst_pixels_opt));
48 memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
49
50 int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
51 if (!has_ssse3) {
52 printf("Warning SSSE3 not detected; Skipping test.\n");
53 } else {
54 // TL.
55 orig_pixels[0] = 255u;
56 orig_pixels[1] = 0u;
57 orig_pixels[128 + 0] = 0u;
58 orig_pixels[128 + 1] = 0u;
59 // TR.
60 orig_pixels[2] = 0u;
61 orig_pixels[3] = 100u;
62 orig_pixels[128 + 2] = 0u;
63 orig_pixels[128 + 3] = 0u;
64 // BL.
65 orig_pixels[4] = 0u;
66 orig_pixels[5] = 0u;
67 orig_pixels[128 + 4] = 50u;
68 orig_pixels[128 + 5] = 0u;
69 // BR.
70 orig_pixels[6] = 0u;
71 orig_pixels[7] = 0u;
72 orig_pixels[128 + 6] = 0u;
73 orig_pixels[128 + 7] = 20u;
74 // Odd.
75 orig_pixels[126] = 4u;
76 orig_pixels[127] = 255u;
77 orig_pixels[128 + 126] = 16u;
78 orig_pixels[128 + 127] = 255u;
79
80 // Test regular half size.
81 ScaleRowDown2Box_C(orig_pixels, 128, dst_pixels_c, 64);
82
83 EXPECT_EQ(64u, dst_pixels_c[0]);
84 EXPECT_EQ(25u, dst_pixels_c[1]);
85 EXPECT_EQ(13u, dst_pixels_c[2]);
86 EXPECT_EQ(5u, dst_pixels_c[3]);
87 EXPECT_EQ(0u, dst_pixels_c[4]);
88 EXPECT_EQ(133u, dst_pixels_c[63]);
89
90 // Test Odd width version - Last pixel is just 1 horizontal pixel.
91 ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
92
93 EXPECT_EQ(64u, dst_pixels_c[0]);
94 EXPECT_EQ(25u, dst_pixels_c[1]);
95 EXPECT_EQ(13u, dst_pixels_c[2]);
96 EXPECT_EQ(5u, dst_pixels_c[3]);
97 EXPECT_EQ(0u, dst_pixels_c[4]);
98 EXPECT_EQ(10u, dst_pixels_c[63]);
99
100 // Test one pixel less, should skip the last pixel.
101 memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
102 ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 63);
103
104 EXPECT_EQ(64u, dst_pixels_c[0]);
105 EXPECT_EQ(25u, dst_pixels_c[1]);
106 EXPECT_EQ(13u, dst_pixels_c[2]);
107 EXPECT_EQ(5u, dst_pixels_c[3]);
108 EXPECT_EQ(0u, dst_pixels_c[4]);
109 EXPECT_EQ(0u, dst_pixels_c[63]);
110
111 // Test regular half size SSSE3.
112 ScaleRowDown2Box_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
113
114 EXPECT_EQ(64u, dst_pixels_opt[0]);
115 EXPECT_EQ(25u, dst_pixels_opt[1]);
116 EXPECT_EQ(13u, dst_pixels_opt[2]);
117 EXPECT_EQ(5u, dst_pixels_opt[3]);
118 EXPECT_EQ(0u, dst_pixels_opt[4]);
119 EXPECT_EQ(133u, dst_pixels_opt[63]);
120
121 // Compare C and SSSE3 match.
122 ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
123 ScaleRowDown2Box_Odd_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
124 for (int i = 0; i < 64; ++i) {
125 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
126 }
127 }
128 }
129 #endif // HAS_SCALEROWDOWN2_SSSE3
130
131 extern "C" void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr,
132 ptrdiff_t src_stride,
133 uint16_t* dst,
134 int dst_width);
135
TEST_F(LibYUVScaleTest,TestScaleRowDown2Box_16)136 TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) {
137 SIMD_ALIGNED(uint16_t orig_pixels[2560 * 2]);
138 SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
139 SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
140
141 memset(orig_pixels, 0, sizeof(orig_pixels));
142 memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
143 memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
144
145 for (int i = 0; i < 2560 * 2; ++i) {
146 orig_pixels[i] = i;
147 }
148 ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_c[0], 1280);
149 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
150 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
151 int has_neon = TestCpuFlag(kCpuHasNEON);
152 if (has_neon) {
153 ScaleRowDown2Box_16_NEON(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
154 } else {
155 ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
156 }
157 #else
158 ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
159 #endif
160 }
161
162 for (int i = 0; i < 1280; ++i) {
163 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
164 }
165
166 EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4);
167 EXPECT_EQ(dst_pixels_c[1279], 3839);
168 }
169 #endif // ENABLE_ROW_TESTS
170
171 // Test scaling plane with 8 bit C vs 12 bit C and return maximum pixel
172 // difference.
173 // 0 = exact.
TestPlaneFilter_16(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)174 static int TestPlaneFilter_16(int src_width,
175 int src_height,
176 int dst_width,
177 int dst_height,
178 FilterMode f,
179 int benchmark_iterations,
180 int disable_cpu_flags,
181 int benchmark_cpu_info) {
182 if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
183 return 0;
184 }
185
186 int i;
187 int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
188 int src_stride_y = Abs(src_width);
189 int dst_y_plane_size = dst_width * dst_height;
190 int dst_stride_y = dst_width;
191
192 align_buffer_page_end(src_y, src_y_plane_size);
193 align_buffer_page_end(src_y_16, src_y_plane_size * 2);
194 align_buffer_page_end(dst_y_8, dst_y_plane_size);
195 align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
196 uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
197 uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
198
199 MemRandomize(src_y, src_y_plane_size);
200 memset(dst_y_8, 0, dst_y_plane_size);
201 memset(dst_y_16, 1, dst_y_plane_size * 2);
202
203 for (i = 0; i < src_y_plane_size; ++i) {
204 p_src_y_16[i] = src_y[i] & 255;
205 }
206
207 MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
208 ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y_8, dst_stride_y,
209 dst_width, dst_height, f);
210 MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
211
212 for (i = 0; i < benchmark_iterations; ++i) {
213 ScalePlane_16(p_src_y_16, src_stride_y, src_width, src_height, p_dst_y_16,
214 dst_stride_y, dst_width, dst_height, f);
215 }
216
217 // Expect an exact match.
218 int max_diff = 0;
219 for (i = 0; i < dst_y_plane_size; ++i) {
220 int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
221 if (abs_diff > max_diff) {
222 max_diff = abs_diff;
223 }
224 }
225
226 free_aligned_buffer_page_end(dst_y_8);
227 free_aligned_buffer_page_end(dst_y_16);
228 free_aligned_buffer_page_end(src_y);
229 free_aligned_buffer_page_end(src_y_16);
230
231 return max_diff;
232 }
233
234 // The following adjustments in dimensions ensure the scale factor will be
235 // exactly achieved.
236 // 2 is chroma subsample.
237 #define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
238 #define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
239
240 #define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
241 TEST_F(LibYUVScaleTest, DISABLED_##ScalePlaneDownBy##name##_##filter##_16) { \
242 int diff = TestPlaneFilter_16( \
243 SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
244 DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
245 kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
246 benchmark_cpu_info_); \
247 EXPECT_LE(diff, max_diff); \
248 }
249
250 // Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
251 // filtering is different fixed point implementations for SSSE3, Neon and C.
252 #define TEST_FACTOR(name, nom, denom, boxdiff) \
253 TEST_FACTOR1(name, None, nom, denom, 0) \
254 TEST_FACTOR1(name, Linear, nom, denom, boxdiff) \
255 TEST_FACTOR1(name, Bilinear, nom, denom, boxdiff) \
256 TEST_FACTOR1(name, Box, nom, denom, boxdiff)
257
258 TEST_FACTOR(2, 1, 2, 0)
259 TEST_FACTOR(4, 1, 4, 0)
260 // TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance. Takes 90 seconds.
261 TEST_FACTOR(3by4, 3, 4, 1)
262 TEST_FACTOR(3by8, 3, 8, 1)
263 TEST_FACTOR(3, 1, 3, 0)
264 #undef TEST_FACTOR1
265 #undef TEST_FACTOR
266 #undef SX
267 #undef DX
268
TEST_F(LibYUVScaleTest,PlaneTest3x)269 TEST_F(LibYUVScaleTest, PlaneTest3x) {
270 const int kSrcStride = 480;
271 const int kDstStride = 160;
272 const int kSize = kSrcStride * 3;
273 align_buffer_page_end(orig_pixels, kSize);
274 for (int i = 0; i < 480 * 3; ++i) {
275 orig_pixels[i] = i;
276 }
277 align_buffer_page_end(dest_pixels, kDstStride);
278
279 int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
280 benchmark_iterations_;
281 for (int i = 0; i < iterations160; ++i) {
282 ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
283 kFilterBilinear);
284 }
285
286 EXPECT_EQ(225, dest_pixels[0]);
287
288 ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
289 kFilterNone);
290
291 EXPECT_EQ(225, dest_pixels[0]);
292
293 free_aligned_buffer_page_end(dest_pixels);
294 free_aligned_buffer_page_end(orig_pixels);
295 }
296
TEST_F(LibYUVScaleTest,PlaneTest4x)297 TEST_F(LibYUVScaleTest, PlaneTest4x) {
298 const int kSrcStride = 640;
299 const int kDstStride = 160;
300 const int kSize = kSrcStride * 4;
301 align_buffer_page_end(orig_pixels, kSize);
302 for (int i = 0; i < 640 * 4; ++i) {
303 orig_pixels[i] = i;
304 }
305 align_buffer_page_end(dest_pixels, kDstStride);
306
307 int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
308 benchmark_iterations_;
309 for (int i = 0; i < iterations160; ++i) {
310 ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
311 kFilterBilinear);
312 }
313
314 EXPECT_EQ(66, dest_pixels[0]);
315
316 ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
317 kFilterNone);
318
319 EXPECT_EQ(2, dest_pixels[0]); // expect the 3rd pixel of the 3rd row
320
321 free_aligned_buffer_page_end(dest_pixels);
322 free_aligned_buffer_page_end(orig_pixels);
323 }
324
325 // Intent is to test 200x50 to 50x200 but width and height can be parameters.
TEST_F(LibYUVScaleTest,PlaneTestRotate_None)326 TEST_F(LibYUVScaleTest, PlaneTestRotate_None) {
327 const int kSize = benchmark_width_ * benchmark_height_;
328 align_buffer_page_end(orig_pixels, kSize);
329 for (int i = 0; i < kSize; ++i) {
330 orig_pixels[i] = i;
331 }
332 align_buffer_page_end(dest_opt_pixels, kSize);
333 align_buffer_page_end(dest_c_pixels, kSize);
334
335 MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization.
336 ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
337 dest_c_pixels, benchmark_height_, benchmark_height_,
338 benchmark_width_, kFilterNone);
339 MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization.
340
341 for (int i = 0; i < benchmark_iterations_; ++i) {
342 ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
343 benchmark_height_, dest_opt_pixels, benchmark_height_,
344 benchmark_height_, benchmark_width_, kFilterNone);
345 }
346
347 for (int i = 0; i < kSize; ++i) {
348 EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
349 }
350
351 free_aligned_buffer_page_end(dest_c_pixels);
352 free_aligned_buffer_page_end(dest_opt_pixels);
353 free_aligned_buffer_page_end(orig_pixels);
354 }
355
TEST_F(LibYUVScaleTest,PlaneTestRotate_Bilinear)356 TEST_F(LibYUVScaleTest, PlaneTestRotate_Bilinear) {
357 const int kSize = benchmark_width_ * benchmark_height_;
358 align_buffer_page_end(orig_pixels, kSize);
359 for (int i = 0; i < kSize; ++i) {
360 orig_pixels[i] = i;
361 }
362 align_buffer_page_end(dest_opt_pixels, kSize);
363 align_buffer_page_end(dest_c_pixels, kSize);
364
365 MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization.
366 ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
367 dest_c_pixels, benchmark_height_, benchmark_height_,
368 benchmark_width_, kFilterBilinear);
369 MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization.
370
371 for (int i = 0; i < benchmark_iterations_; ++i) {
372 ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
373 benchmark_height_, dest_opt_pixels, benchmark_height_,
374 benchmark_height_, benchmark_width_, kFilterBilinear);
375 }
376
377 for (int i = 0; i < kSize; ++i) {
378 EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
379 }
380
381 free_aligned_buffer_page_end(dest_c_pixels);
382 free_aligned_buffer_page_end(dest_opt_pixels);
383 free_aligned_buffer_page_end(orig_pixels);
384 }
385
386 // Intent is to test 200x50 to 50x200 but width and height can be parameters.
TEST_F(LibYUVScaleTest,PlaneTestRotate_Box)387 TEST_F(LibYUVScaleTest, PlaneTestRotate_Box) {
388 const int kSize = benchmark_width_ * benchmark_height_;
389 align_buffer_page_end(orig_pixels, kSize);
390 for (int i = 0; i < kSize; ++i) {
391 orig_pixels[i] = i;
392 }
393 align_buffer_page_end(dest_opt_pixels, kSize);
394 align_buffer_page_end(dest_c_pixels, kSize);
395
396 MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization.
397 ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
398 dest_c_pixels, benchmark_height_, benchmark_height_,
399 benchmark_width_, kFilterBox);
400 MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization.
401
402 for (int i = 0; i < benchmark_iterations_; ++i) {
403 ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
404 benchmark_height_, dest_opt_pixels, benchmark_height_,
405 benchmark_height_, benchmark_width_, kFilterBox);
406 }
407
408 for (int i = 0; i < kSize; ++i) {
409 EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
410 }
411
412 free_aligned_buffer_page_end(dest_c_pixels);
413 free_aligned_buffer_page_end(dest_opt_pixels);
414 free_aligned_buffer_page_end(orig_pixels);
415 }
416
TEST_F(LibYUVScaleTest,PlaneTest1_Box)417 TEST_F(LibYUVScaleTest, PlaneTest1_Box) {
418 align_buffer_page_end(orig_pixels, 3);
419 align_buffer_page_end(dst_pixels, 3);
420
421 // Pad the 1x1 byte image with invalid values before and after in case libyuv
422 // reads outside the memory boundaries.
423 orig_pixels[0] = 0;
424 orig_pixels[1] = 1; // scale this pixel
425 orig_pixels[2] = 2;
426 dst_pixels[0] = 3;
427 dst_pixels[1] = 3;
428 dst_pixels[2] = 3;
429
430 libyuv::ScalePlane(orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1,
431 /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1,
432 /* dst_width= */ 1, /* dst_height= */ 2,
433 libyuv::kFilterBox);
434
435 EXPECT_EQ(dst_pixels[0], 1);
436 EXPECT_EQ(dst_pixels[1], 1);
437 EXPECT_EQ(dst_pixels[2], 3);
438
439 free_aligned_buffer_page_end(dst_pixels);
440 free_aligned_buffer_page_end(orig_pixels);
441 }
442
TEST_F(LibYUVScaleTest,PlaneTest1_16_Box)443 TEST_F(LibYUVScaleTest, PlaneTest1_16_Box) {
444 align_buffer_page_end(orig_pixels_alloc, 3 * 2);
445 align_buffer_page_end(dst_pixels_alloc, 3 * 2);
446 uint16_t* orig_pixels = (uint16_t*)orig_pixels_alloc;
447 uint16_t* dst_pixels = (uint16_t*)dst_pixels_alloc;
448
449 // Pad the 1x1 byte image with invalid values before and after in case libyuv
450 // reads outside the memory boundaries.
451 orig_pixels[0] = 0;
452 orig_pixels[1] = 1; // scale this pixel
453 orig_pixels[2] = 2;
454 dst_pixels[0] = 3;
455 dst_pixels[1] = 3;
456 dst_pixels[2] = 3;
457
458 libyuv::ScalePlane_16(
459 orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1,
460 /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1,
461 /* dst_width= */ 1, /* dst_height= */ 2, libyuv::kFilterNone);
462
463 EXPECT_EQ(dst_pixels[0], 1);
464 EXPECT_EQ(dst_pixels[1], 1);
465 EXPECT_EQ(dst_pixels[2], 3);
466
467 free_aligned_buffer_page_end(dst_pixels_alloc);
468 free_aligned_buffer_page_end(orig_pixels_alloc);
469 }
470 } // namespace libyuv
471