xref: /aosp_15_r20/external/libyuv/unit_test/scale_plane_test.cc (revision 4e366538070a3a6c5c163c31b791eab742e1657a)
1 /*
2  *  Copyright 2023 The LibYuv Project Authors. All rights reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS. All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <stdlib.h>
12 #include <time.h>
13 
14 #include "../unit_test/unit_test.h"
15 #include "libyuv/cpu_id.h"
16 #include "libyuv/scale.h"
17 
18 #ifdef ENABLE_ROW_TESTS
19 #include "libyuv/scale_row.h"  // For ScaleRowDown2Box_Odd_C
20 #endif
21 
22 #define STRINGIZE(line) #line
23 #define FILELINESTR(file, line) file ":" STRINGIZE(line)
24 
25 #if defined(__riscv) && !defined(__clang__)
26 #define DISABLE_SLOW_TESTS
27 #undef ENABLE_FULL_TESTS
28 #undef ENABLE_ROW_TESTS
29 #define LEAN_TESTS
30 #endif
31 
32 #if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
33 // SLOW TESTS are those that are unoptimized C code.
34 // FULL TESTS are optimized but test many variations of the same code.
35 #define ENABLE_FULL_TESTS
36 #endif
37 
38 namespace libyuv {
39 
40 #ifdef ENABLE_ROW_TESTS
41 #ifdef HAS_SCALEROWDOWN2_SSSE3
TEST_F(LibYUVScaleTest,TestScaleRowDown2Box_Odd_SSSE3)42 TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) {
43   SIMD_ALIGNED(uint8_t orig_pixels[128 * 2]);
44   SIMD_ALIGNED(uint8_t dst_pixels_opt[64]);
45   SIMD_ALIGNED(uint8_t dst_pixels_c[64]);
46   memset(orig_pixels, 0, sizeof(orig_pixels));
47   memset(dst_pixels_opt, 0, sizeof(dst_pixels_opt));
48   memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
49 
50   int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
51   if (!has_ssse3) {
52     printf("Warning SSSE3 not detected; Skipping test.\n");
53   } else {
54     // TL.
55     orig_pixels[0] = 255u;
56     orig_pixels[1] = 0u;
57     orig_pixels[128 + 0] = 0u;
58     orig_pixels[128 + 1] = 0u;
59     // TR.
60     orig_pixels[2] = 0u;
61     orig_pixels[3] = 100u;
62     orig_pixels[128 + 2] = 0u;
63     orig_pixels[128 + 3] = 0u;
64     // BL.
65     orig_pixels[4] = 0u;
66     orig_pixels[5] = 0u;
67     orig_pixels[128 + 4] = 50u;
68     orig_pixels[128 + 5] = 0u;
69     // BR.
70     orig_pixels[6] = 0u;
71     orig_pixels[7] = 0u;
72     orig_pixels[128 + 6] = 0u;
73     orig_pixels[128 + 7] = 20u;
74     // Odd.
75     orig_pixels[126] = 4u;
76     orig_pixels[127] = 255u;
77     orig_pixels[128 + 126] = 16u;
78     orig_pixels[128 + 127] = 255u;
79 
80     // Test regular half size.
81     ScaleRowDown2Box_C(orig_pixels, 128, dst_pixels_c, 64);
82 
83     EXPECT_EQ(64u, dst_pixels_c[0]);
84     EXPECT_EQ(25u, dst_pixels_c[1]);
85     EXPECT_EQ(13u, dst_pixels_c[2]);
86     EXPECT_EQ(5u, dst_pixels_c[3]);
87     EXPECT_EQ(0u, dst_pixels_c[4]);
88     EXPECT_EQ(133u, dst_pixels_c[63]);
89 
90     // Test Odd width version - Last pixel is just 1 horizontal pixel.
91     ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
92 
93     EXPECT_EQ(64u, dst_pixels_c[0]);
94     EXPECT_EQ(25u, dst_pixels_c[1]);
95     EXPECT_EQ(13u, dst_pixels_c[2]);
96     EXPECT_EQ(5u, dst_pixels_c[3]);
97     EXPECT_EQ(0u, dst_pixels_c[4]);
98     EXPECT_EQ(10u, dst_pixels_c[63]);
99 
100     // Test one pixel less, should skip the last pixel.
101     memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
102     ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 63);
103 
104     EXPECT_EQ(64u, dst_pixels_c[0]);
105     EXPECT_EQ(25u, dst_pixels_c[1]);
106     EXPECT_EQ(13u, dst_pixels_c[2]);
107     EXPECT_EQ(5u, dst_pixels_c[3]);
108     EXPECT_EQ(0u, dst_pixels_c[4]);
109     EXPECT_EQ(0u, dst_pixels_c[63]);
110 
111     // Test regular half size SSSE3.
112     ScaleRowDown2Box_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
113 
114     EXPECT_EQ(64u, dst_pixels_opt[0]);
115     EXPECT_EQ(25u, dst_pixels_opt[1]);
116     EXPECT_EQ(13u, dst_pixels_opt[2]);
117     EXPECT_EQ(5u, dst_pixels_opt[3]);
118     EXPECT_EQ(0u, dst_pixels_opt[4]);
119     EXPECT_EQ(133u, dst_pixels_opt[63]);
120 
121     // Compare C and SSSE3 match.
122     ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
123     ScaleRowDown2Box_Odd_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
124     for (int i = 0; i < 64; ++i) {
125       EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
126     }
127   }
128 }
129 #endif  // HAS_SCALEROWDOWN2_SSSE3
130 
131 extern "C" void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr,
132                                          ptrdiff_t src_stride,
133                                          uint16_t* dst,
134                                          int dst_width);
135 
TEST_F(LibYUVScaleTest,TestScaleRowDown2Box_16)136 TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) {
137   SIMD_ALIGNED(uint16_t orig_pixels[2560 * 2]);
138   SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
139   SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
140 
141   memset(orig_pixels, 0, sizeof(orig_pixels));
142   memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
143   memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
144 
145   for (int i = 0; i < 2560 * 2; ++i) {
146     orig_pixels[i] = i;
147   }
148   ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_c[0], 1280);
149   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
150 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
151     int has_neon = TestCpuFlag(kCpuHasNEON);
152     if (has_neon) {
153       ScaleRowDown2Box_16_NEON(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
154     } else {
155       ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
156     }
157 #else
158     ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
159 #endif
160   }
161 
162   for (int i = 0; i < 1280; ++i) {
163     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
164   }
165 
166   EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4);
167   EXPECT_EQ(dst_pixels_c[1279], 3839);
168 }
169 #endif  // ENABLE_ROW_TESTS
170 
171 // Test scaling plane with 8 bit C vs 12 bit C and return maximum pixel
172 // difference.
173 // 0 = exact.
TestPlaneFilter_16(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)174 static int TestPlaneFilter_16(int src_width,
175                               int src_height,
176                               int dst_width,
177                               int dst_height,
178                               FilterMode f,
179                               int benchmark_iterations,
180                               int disable_cpu_flags,
181                               int benchmark_cpu_info) {
182   if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
183     return 0;
184   }
185 
186   int i;
187   int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
188   int src_stride_y = Abs(src_width);
189   int dst_y_plane_size = dst_width * dst_height;
190   int dst_stride_y = dst_width;
191 
192   align_buffer_page_end(src_y, src_y_plane_size);
193   align_buffer_page_end(src_y_16, src_y_plane_size * 2);
194   align_buffer_page_end(dst_y_8, dst_y_plane_size);
195   align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
196   uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
197   uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
198 
199   MemRandomize(src_y, src_y_plane_size);
200   memset(dst_y_8, 0, dst_y_plane_size);
201   memset(dst_y_16, 1, dst_y_plane_size * 2);
202 
203   for (i = 0; i < src_y_plane_size; ++i) {
204     p_src_y_16[i] = src_y[i] & 255;
205   }
206 
207   MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
208   ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y_8, dst_stride_y,
209              dst_width, dst_height, f);
210   MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
211 
212   for (i = 0; i < benchmark_iterations; ++i) {
213     ScalePlane_16(p_src_y_16, src_stride_y, src_width, src_height, p_dst_y_16,
214                   dst_stride_y, dst_width, dst_height, f);
215   }
216 
217   // Expect an exact match.
218   int max_diff = 0;
219   for (i = 0; i < dst_y_plane_size; ++i) {
220     int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
221     if (abs_diff > max_diff) {
222       max_diff = abs_diff;
223     }
224   }
225 
226   free_aligned_buffer_page_end(dst_y_8);
227   free_aligned_buffer_page_end(dst_y_16);
228   free_aligned_buffer_page_end(src_y);
229   free_aligned_buffer_page_end(src_y_16);
230 
231   return max_diff;
232 }
233 
234 // The following adjustments in dimensions ensure the scale factor will be
235 // exactly achieved.
236 // 2 is chroma subsample.
237 #define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
238 #define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
239 
240 #define TEST_FACTOR1(name, filter, nom, denom, max_diff)                       \
241   TEST_F(LibYUVScaleTest, DISABLED_##ScalePlaneDownBy##name##_##filter##_16) { \
242     int diff = TestPlaneFilter_16(                                             \
243         SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom),   \
244         DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom),   \
245         kFilter##filter, benchmark_iterations_, disable_cpu_flags_,            \
246         benchmark_cpu_info_);                                                  \
247     EXPECT_LE(diff, max_diff);                                                 \
248   }
249 
250 // Test a scale factor with all 4 filters.  Expect unfiltered to be exact, but
251 // filtering is different fixed point implementations for SSSE3, Neon and C.
252 #define TEST_FACTOR(name, nom, denom, boxdiff)      \
253   TEST_FACTOR1(name, None, nom, denom, 0)           \
254   TEST_FACTOR1(name, Linear, nom, denom, boxdiff)   \
255   TEST_FACTOR1(name, Bilinear, nom, denom, boxdiff) \
256   TEST_FACTOR1(name, Box, nom, denom, boxdiff)
257 
258 TEST_FACTOR(2, 1, 2, 0)
259 TEST_FACTOR(4, 1, 4, 0)
260 // TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance.  Takes 90 seconds.
261 TEST_FACTOR(3by4, 3, 4, 1)
262 TEST_FACTOR(3by8, 3, 8, 1)
263 TEST_FACTOR(3, 1, 3, 0)
264 #undef TEST_FACTOR1
265 #undef TEST_FACTOR
266 #undef SX
267 #undef DX
268 
TEST_F(LibYUVScaleTest,PlaneTest3x)269 TEST_F(LibYUVScaleTest, PlaneTest3x) {
270   const int kSrcStride = 480;
271   const int kDstStride = 160;
272   const int kSize = kSrcStride * 3;
273   align_buffer_page_end(orig_pixels, kSize);
274   for (int i = 0; i < 480 * 3; ++i) {
275     orig_pixels[i] = i;
276   }
277   align_buffer_page_end(dest_pixels, kDstStride);
278 
279   int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
280                       benchmark_iterations_;
281   for (int i = 0; i < iterations160; ++i) {
282     ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
283                kFilterBilinear);
284   }
285 
286   EXPECT_EQ(225, dest_pixels[0]);
287 
288   ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
289              kFilterNone);
290 
291   EXPECT_EQ(225, dest_pixels[0]);
292 
293   free_aligned_buffer_page_end(dest_pixels);
294   free_aligned_buffer_page_end(orig_pixels);
295 }
296 
TEST_F(LibYUVScaleTest,PlaneTest4x)297 TEST_F(LibYUVScaleTest, PlaneTest4x) {
298   const int kSrcStride = 640;
299   const int kDstStride = 160;
300   const int kSize = kSrcStride * 4;
301   align_buffer_page_end(orig_pixels, kSize);
302   for (int i = 0; i < 640 * 4; ++i) {
303     orig_pixels[i] = i;
304   }
305   align_buffer_page_end(dest_pixels, kDstStride);
306 
307   int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
308                       benchmark_iterations_;
309   for (int i = 0; i < iterations160; ++i) {
310     ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
311                kFilterBilinear);
312   }
313 
314   EXPECT_EQ(66, dest_pixels[0]);
315 
316   ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
317              kFilterNone);
318 
319   EXPECT_EQ(2, dest_pixels[0]);  // expect the 3rd pixel of the 3rd row
320 
321   free_aligned_buffer_page_end(dest_pixels);
322   free_aligned_buffer_page_end(orig_pixels);
323 }
324 
325 // Intent is to test 200x50 to 50x200 but width and height can be parameters.
TEST_F(LibYUVScaleTest,PlaneTestRotate_None)326 TEST_F(LibYUVScaleTest, PlaneTestRotate_None) {
327   const int kSize = benchmark_width_ * benchmark_height_;
328   align_buffer_page_end(orig_pixels, kSize);
329   for (int i = 0; i < kSize; ++i) {
330     orig_pixels[i] = i;
331   }
332   align_buffer_page_end(dest_opt_pixels, kSize);
333   align_buffer_page_end(dest_c_pixels, kSize);
334 
335   MaskCpuFlags(disable_cpu_flags_);  // Disable all CPU optimization.
336   ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
337              dest_c_pixels, benchmark_height_, benchmark_height_,
338              benchmark_width_, kFilterNone);
339   MaskCpuFlags(benchmark_cpu_info_);  // Enable all CPU optimization.
340 
341   for (int i = 0; i < benchmark_iterations_; ++i) {
342     ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
343                benchmark_height_, dest_opt_pixels, benchmark_height_,
344                benchmark_height_, benchmark_width_, kFilterNone);
345   }
346 
347   for (int i = 0; i < kSize; ++i) {
348     EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
349   }
350 
351   free_aligned_buffer_page_end(dest_c_pixels);
352   free_aligned_buffer_page_end(dest_opt_pixels);
353   free_aligned_buffer_page_end(orig_pixels);
354 }
355 
TEST_F(LibYUVScaleTest,PlaneTestRotate_Bilinear)356 TEST_F(LibYUVScaleTest, PlaneTestRotate_Bilinear) {
357   const int kSize = benchmark_width_ * benchmark_height_;
358   align_buffer_page_end(orig_pixels, kSize);
359   for (int i = 0; i < kSize; ++i) {
360     orig_pixels[i] = i;
361   }
362   align_buffer_page_end(dest_opt_pixels, kSize);
363   align_buffer_page_end(dest_c_pixels, kSize);
364 
365   MaskCpuFlags(disable_cpu_flags_);  // Disable all CPU optimization.
366   ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
367              dest_c_pixels, benchmark_height_, benchmark_height_,
368              benchmark_width_, kFilterBilinear);
369   MaskCpuFlags(benchmark_cpu_info_);  // Enable all CPU optimization.
370 
371   for (int i = 0; i < benchmark_iterations_; ++i) {
372     ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
373                benchmark_height_, dest_opt_pixels, benchmark_height_,
374                benchmark_height_, benchmark_width_, kFilterBilinear);
375   }
376 
377   for (int i = 0; i < kSize; ++i) {
378     EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
379   }
380 
381   free_aligned_buffer_page_end(dest_c_pixels);
382   free_aligned_buffer_page_end(dest_opt_pixels);
383   free_aligned_buffer_page_end(orig_pixels);
384 }
385 
386 // Intent is to test 200x50 to 50x200 but width and height can be parameters.
TEST_F(LibYUVScaleTest,PlaneTestRotate_Box)387 TEST_F(LibYUVScaleTest, PlaneTestRotate_Box) {
388   const int kSize = benchmark_width_ * benchmark_height_;
389   align_buffer_page_end(orig_pixels, kSize);
390   for (int i = 0; i < kSize; ++i) {
391     orig_pixels[i] = i;
392   }
393   align_buffer_page_end(dest_opt_pixels, kSize);
394   align_buffer_page_end(dest_c_pixels, kSize);
395 
396   MaskCpuFlags(disable_cpu_flags_);  // Disable all CPU optimization.
397   ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
398              dest_c_pixels, benchmark_height_, benchmark_height_,
399              benchmark_width_, kFilterBox);
400   MaskCpuFlags(benchmark_cpu_info_);  // Enable all CPU optimization.
401 
402   for (int i = 0; i < benchmark_iterations_; ++i) {
403     ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
404                benchmark_height_, dest_opt_pixels, benchmark_height_,
405                benchmark_height_, benchmark_width_, kFilterBox);
406   }
407 
408   for (int i = 0; i < kSize; ++i) {
409     EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
410   }
411 
412   free_aligned_buffer_page_end(dest_c_pixels);
413   free_aligned_buffer_page_end(dest_opt_pixels);
414   free_aligned_buffer_page_end(orig_pixels);
415 }
416 
TEST_F(LibYUVScaleTest,PlaneTest1_Box)417 TEST_F(LibYUVScaleTest, PlaneTest1_Box) {
418   align_buffer_page_end(orig_pixels, 3);
419   align_buffer_page_end(dst_pixels, 3);
420 
421   // Pad the 1x1 byte image with invalid values before and after in case libyuv
422   // reads outside the memory boundaries.
423   orig_pixels[0] = 0;
424   orig_pixels[1] = 1;  // scale this pixel
425   orig_pixels[2] = 2;
426   dst_pixels[0] = 3;
427   dst_pixels[1] = 3;
428   dst_pixels[2] = 3;
429 
430   libyuv::ScalePlane(orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1,
431                      /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1,
432                      /* dst_width= */ 1, /* dst_height= */ 2,
433                      libyuv::kFilterBox);
434 
435   EXPECT_EQ(dst_pixels[0], 1);
436   EXPECT_EQ(dst_pixels[1], 1);
437   EXPECT_EQ(dst_pixels[2], 3);
438 
439   free_aligned_buffer_page_end(dst_pixels);
440   free_aligned_buffer_page_end(orig_pixels);
441 }
442 
TEST_F(LibYUVScaleTest,PlaneTest1_16_Box)443 TEST_F(LibYUVScaleTest, PlaneTest1_16_Box) {
444   align_buffer_page_end(orig_pixels_alloc, 3 * 2);
445   align_buffer_page_end(dst_pixels_alloc, 3 * 2);
446   uint16_t* orig_pixels = (uint16_t*)orig_pixels_alloc;
447   uint16_t* dst_pixels = (uint16_t*)dst_pixels_alloc;
448 
449   // Pad the 1x1 byte image with invalid values before and after in case libyuv
450   // reads outside the memory boundaries.
451   orig_pixels[0] = 0;
452   orig_pixels[1] = 1;  // scale this pixel
453   orig_pixels[2] = 2;
454   dst_pixels[0] = 3;
455   dst_pixels[1] = 3;
456   dst_pixels[2] = 3;
457 
458   libyuv::ScalePlane_16(
459       orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1,
460       /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1,
461       /* dst_width= */ 1, /* dst_height= */ 2, libyuv::kFilterNone);
462 
463   EXPECT_EQ(dst_pixels[0], 1);
464   EXPECT_EQ(dst_pixels[1], 1);
465   EXPECT_EQ(dst_pixels[2], 3);
466 
467   free_aligned_buffer_page_end(dst_pixels_alloc);
468   free_aligned_buffer_page_end(orig_pixels_alloc);
469 }
470 }  // namespace libyuv
471