1*4e366538SXin Li /*
2*4e366538SXin Li * Copyright 2012 The LibYuv Project Authors. All rights reserved.
3*4e366538SXin Li *
4*4e366538SXin Li * Use of this source code is governed by a BSD-style license
5*4e366538SXin Li * that can be found in the LICENSE file in the root of the source
6*4e366538SXin Li * tree. An additional intellectual property rights grant can be found
7*4e366538SXin Li * in the file PATENTS. All contributing project authors may
8*4e366538SXin Li * be found in the AUTHORS file in the root of the source tree.
9*4e366538SXin Li */
10*4e366538SXin Li
11*4e366538SXin Li #include <stdlib.h>
12*4e366538SXin Li
13*4e366538SXin Li #include "../unit_test/unit_test.h"
14*4e366538SXin Li #include "libyuv/cpu_id.h"
15*4e366538SXin Li #include "libyuv/rotate.h"
16*4e366538SXin Li
17*4e366538SXin Li #ifdef ENABLE_ROW_TESTS
18*4e366538SXin Li #include "libyuv/rotate_row.h"
19*4e366538SXin Li #endif
20*4e366538SXin Li
21*4e366538SXin Li namespace libyuv {
22*4e366538SXin Li
23*4e366538SXin Li #define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
24*4e366538SXin Li
I420TestRotate(int src_width,int src_height,int dst_width,int dst_height,libyuv::RotationMode mode,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)25*4e366538SXin Li static void I420TestRotate(int src_width,
26*4e366538SXin Li int src_height,
27*4e366538SXin Li int dst_width,
28*4e366538SXin Li int dst_height,
29*4e366538SXin Li libyuv::RotationMode mode,
30*4e366538SXin Li int benchmark_iterations,
31*4e366538SXin Li int disable_cpu_flags,
32*4e366538SXin Li int benchmark_cpu_info) {
33*4e366538SXin Li if (src_width < 1) {
34*4e366538SXin Li src_width = 1;
35*4e366538SXin Li }
36*4e366538SXin Li if (src_height == 0) {
37*4e366538SXin Li src_height = 1;
38*4e366538SXin Li }
39*4e366538SXin Li if (dst_width < 1) {
40*4e366538SXin Li dst_width = 1;
41*4e366538SXin Li }
42*4e366538SXin Li if (dst_height < 1) {
43*4e366538SXin Li dst_height = 1;
44*4e366538SXin Li }
45*4e366538SXin Li int src_i420_y_size = src_width * Abs(src_height);
46*4e366538SXin Li int src_i420_uv_size = ((src_width + 1) / 2) * ((Abs(src_height) + 1) / 2);
47*4e366538SXin Li int src_i420_size = src_i420_y_size + src_i420_uv_size * 2;
48*4e366538SXin Li align_buffer_page_end(src_i420, src_i420_size);
49*4e366538SXin Li for (int i = 0; i < src_i420_size; ++i) {
50*4e366538SXin Li src_i420[i] = fastrand() & 0xff;
51*4e366538SXin Li }
52*4e366538SXin Li
53*4e366538SXin Li int dst_i420_y_size = dst_width * dst_height;
54*4e366538SXin Li int dst_i420_uv_size = ((dst_width + 1) / 2) * ((dst_height + 1) / 2);
55*4e366538SXin Li int dst_i420_size = dst_i420_y_size + dst_i420_uv_size * 2;
56*4e366538SXin Li align_buffer_page_end(dst_i420_c, dst_i420_size);
57*4e366538SXin Li align_buffer_page_end(dst_i420_opt, dst_i420_size);
58*4e366538SXin Li memset(dst_i420_c, 2, dst_i420_size);
59*4e366538SXin Li memset(dst_i420_opt, 3, dst_i420_size);
60*4e366538SXin Li
61*4e366538SXin Li MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
62*4e366538SXin Li I420Rotate(src_i420, src_width, src_i420 + src_i420_y_size,
63*4e366538SXin Li (src_width + 1) / 2, src_i420 + src_i420_y_size + src_i420_uv_size,
64*4e366538SXin Li (src_width + 1) / 2, dst_i420_c, dst_width,
65*4e366538SXin Li dst_i420_c + dst_i420_y_size, (dst_width + 1) / 2,
66*4e366538SXin Li dst_i420_c + dst_i420_y_size + dst_i420_uv_size,
67*4e366538SXin Li (dst_width + 1) / 2, src_width, src_height, mode);
68*4e366538SXin Li
69*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
70*4e366538SXin Li for (int i = 0; i < benchmark_iterations; ++i) {
71*4e366538SXin Li I420Rotate(
72*4e366538SXin Li src_i420, src_width, src_i420 + src_i420_y_size, (src_width + 1) / 2,
73*4e366538SXin Li src_i420 + src_i420_y_size + src_i420_uv_size, (src_width + 1) / 2,
74*4e366538SXin Li dst_i420_opt, dst_width, dst_i420_opt + dst_i420_y_size,
75*4e366538SXin Li (dst_width + 1) / 2, dst_i420_opt + dst_i420_y_size + dst_i420_uv_size,
76*4e366538SXin Li (dst_width + 1) / 2, src_width, src_height, mode);
77*4e366538SXin Li }
78*4e366538SXin Li
79*4e366538SXin Li // Rotation should be exact.
80*4e366538SXin Li for (int i = 0; i < dst_i420_size; ++i) {
81*4e366538SXin Li EXPECT_EQ(dst_i420_c[i], dst_i420_opt[i]);
82*4e366538SXin Li }
83*4e366538SXin Li
84*4e366538SXin Li free_aligned_buffer_page_end(dst_i420_c);
85*4e366538SXin Li free_aligned_buffer_page_end(dst_i420_opt);
86*4e366538SXin Li free_aligned_buffer_page_end(src_i420);
87*4e366538SXin Li }
88*4e366538SXin Li
TEST_F(LibYUVRotateTest,I420Rotate0_Opt)89*4e366538SXin Li TEST_F(LibYUVRotateTest, I420Rotate0_Opt) {
90*4e366538SXin Li I420TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
91*4e366538SXin Li benchmark_height_, kRotate0, benchmark_iterations_,
92*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
93*4e366538SXin Li }
94*4e366538SXin Li
TEST_F(LibYUVRotateTest,I420Rotate90_Opt)95*4e366538SXin Li TEST_F(LibYUVRotateTest, I420Rotate90_Opt) {
96*4e366538SXin Li I420TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
97*4e366538SXin Li benchmark_width_, kRotate90, benchmark_iterations_,
98*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
99*4e366538SXin Li }
100*4e366538SXin Li
TEST_F(LibYUVRotateTest,I420Rotate180_Opt)101*4e366538SXin Li TEST_F(LibYUVRotateTest, I420Rotate180_Opt) {
102*4e366538SXin Li I420TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
103*4e366538SXin Li benchmark_height_, kRotate180, benchmark_iterations_,
104*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
105*4e366538SXin Li }
106*4e366538SXin Li
TEST_F(LibYUVRotateTest,I420Rotate270_Opt)107*4e366538SXin Li TEST_F(LibYUVRotateTest, I420Rotate270_Opt) {
108*4e366538SXin Li I420TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
109*4e366538SXin Li benchmark_width_, kRotate270, benchmark_iterations_,
110*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
111*4e366538SXin Li }
112*4e366538SXin Li
113*4e366538SXin Li // TODO(fbarchard): Remove odd width tests.
114*4e366538SXin Li // Odd width tests work but disabled because they use C code and can be
115*4e366538SXin Li // tested by passing an odd width command line or environment variable.
TEST_F(LibYUVRotateTest,DISABLED_I420Rotate0_Odd)116*4e366538SXin Li TEST_F(LibYUVRotateTest, DISABLED_I420Rotate0_Odd) {
117*4e366538SXin Li I420TestRotate(benchmark_width_ + 1, benchmark_height_ + 1,
118*4e366538SXin Li benchmark_width_ + 1, benchmark_height_ + 1, kRotate0,
119*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
120*4e366538SXin Li benchmark_cpu_info_);
121*4e366538SXin Li }
122*4e366538SXin Li
TEST_F(LibYUVRotateTest,DISABLED_I420Rotate90_Odd)123*4e366538SXin Li TEST_F(LibYUVRotateTest, DISABLED_I420Rotate90_Odd) {
124*4e366538SXin Li I420TestRotate(benchmark_width_ + 1, benchmark_height_ + 1,
125*4e366538SXin Li benchmark_height_ + 1, benchmark_width_ + 1, kRotate90,
126*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
127*4e366538SXin Li benchmark_cpu_info_);
128*4e366538SXin Li }
129*4e366538SXin Li
TEST_F(LibYUVRotateTest,DISABLED_I420Rotate180_Odd)130*4e366538SXin Li TEST_F(LibYUVRotateTest, DISABLED_I420Rotate180_Odd) {
131*4e366538SXin Li I420TestRotate(benchmark_width_ + 1, benchmark_height_ + 1,
132*4e366538SXin Li benchmark_width_ + 1, benchmark_height_ + 1, kRotate180,
133*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
134*4e366538SXin Li benchmark_cpu_info_);
135*4e366538SXin Li }
136*4e366538SXin Li
TEST_F(LibYUVRotateTest,DISABLED_I420Rotate270_Odd)137*4e366538SXin Li TEST_F(LibYUVRotateTest, DISABLED_I420Rotate270_Odd) {
138*4e366538SXin Li I420TestRotate(benchmark_width_ + 1, benchmark_height_ + 1,
139*4e366538SXin Li benchmark_height_ + 1, benchmark_width_ + 1, kRotate270,
140*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
141*4e366538SXin Li benchmark_cpu_info_);
142*4e366538SXin Li }
143*4e366538SXin Li
I422TestRotate(int src_width,int src_height,int dst_width,int dst_height,libyuv::RotationMode mode,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)144*4e366538SXin Li static void I422TestRotate(int src_width,
145*4e366538SXin Li int src_height,
146*4e366538SXin Li int dst_width,
147*4e366538SXin Li int dst_height,
148*4e366538SXin Li libyuv::RotationMode mode,
149*4e366538SXin Li int benchmark_iterations,
150*4e366538SXin Li int disable_cpu_flags,
151*4e366538SXin Li int benchmark_cpu_info) {
152*4e366538SXin Li if (src_width < 1) {
153*4e366538SXin Li src_width = 1;
154*4e366538SXin Li }
155*4e366538SXin Li if (src_height == 0) {
156*4e366538SXin Li src_height = 1;
157*4e366538SXin Li }
158*4e366538SXin Li if (dst_width < 1) {
159*4e366538SXin Li dst_width = 1;
160*4e366538SXin Li }
161*4e366538SXin Li if (dst_height < 1) {
162*4e366538SXin Li dst_height = 1;
163*4e366538SXin Li }
164*4e366538SXin Li int src_i422_y_size = src_width * Abs(src_height);
165*4e366538SXin Li int src_i422_uv_size = ((src_width + 1) / 2) * Abs(src_height);
166*4e366538SXin Li int src_i422_size = src_i422_y_size + src_i422_uv_size * 2;
167*4e366538SXin Li align_buffer_page_end(src_i422, src_i422_size);
168*4e366538SXin Li for (int i = 0; i < src_i422_size; ++i) {
169*4e366538SXin Li src_i422[i] = fastrand() & 0xff;
170*4e366538SXin Li }
171*4e366538SXin Li
172*4e366538SXin Li int dst_i422_y_size = dst_width * dst_height;
173*4e366538SXin Li int dst_i422_uv_size = ((dst_width + 1) / 2) * dst_height;
174*4e366538SXin Li int dst_i422_size = dst_i422_y_size + dst_i422_uv_size * 2;
175*4e366538SXin Li align_buffer_page_end(dst_i422_c, dst_i422_size);
176*4e366538SXin Li align_buffer_page_end(dst_i422_opt, dst_i422_size);
177*4e366538SXin Li memset(dst_i422_c, 2, dst_i422_size);
178*4e366538SXin Li memset(dst_i422_opt, 3, dst_i422_size);
179*4e366538SXin Li
180*4e366538SXin Li MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
181*4e366538SXin Li I422Rotate(src_i422, src_width, src_i422 + src_i422_y_size,
182*4e366538SXin Li (src_width + 1) / 2, src_i422 + src_i422_y_size + src_i422_uv_size,
183*4e366538SXin Li (src_width + 1) / 2, dst_i422_c, dst_width,
184*4e366538SXin Li dst_i422_c + dst_i422_y_size, (dst_width + 1) / 2,
185*4e366538SXin Li dst_i422_c + dst_i422_y_size + dst_i422_uv_size,
186*4e366538SXin Li (dst_width + 1) / 2, src_width, src_height, mode);
187*4e366538SXin Li
188*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
189*4e366538SXin Li for (int i = 0; i < benchmark_iterations; ++i) {
190*4e366538SXin Li I422Rotate(
191*4e366538SXin Li src_i422, src_width, src_i422 + src_i422_y_size, (src_width + 1) / 2,
192*4e366538SXin Li src_i422 + src_i422_y_size + src_i422_uv_size, (src_width + 1) / 2,
193*4e366538SXin Li dst_i422_opt, dst_width, dst_i422_opt + dst_i422_y_size,
194*4e366538SXin Li (dst_width + 1) / 2, dst_i422_opt + dst_i422_y_size + dst_i422_uv_size,
195*4e366538SXin Li (dst_width + 1) / 2, src_width, src_height, mode);
196*4e366538SXin Li }
197*4e366538SXin Li
198*4e366538SXin Li // Rotation should be exact.
199*4e366538SXin Li for (int i = 0; i < dst_i422_size; ++i) {
200*4e366538SXin Li EXPECT_EQ(dst_i422_c[i], dst_i422_opt[i]);
201*4e366538SXin Li }
202*4e366538SXin Li
203*4e366538SXin Li free_aligned_buffer_page_end(dst_i422_c);
204*4e366538SXin Li free_aligned_buffer_page_end(dst_i422_opt);
205*4e366538SXin Li free_aligned_buffer_page_end(src_i422);
206*4e366538SXin Li }
207*4e366538SXin Li
TEST_F(LibYUVRotateTest,I422Rotate0_Opt)208*4e366538SXin Li TEST_F(LibYUVRotateTest, I422Rotate0_Opt) {
209*4e366538SXin Li I422TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
210*4e366538SXin Li benchmark_height_, kRotate0, benchmark_iterations_,
211*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
212*4e366538SXin Li }
213*4e366538SXin Li
TEST_F(LibYUVRotateTest,I422Rotate90_Opt)214*4e366538SXin Li TEST_F(LibYUVRotateTest, I422Rotate90_Opt) {
215*4e366538SXin Li I422TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
216*4e366538SXin Li benchmark_width_, kRotate90, benchmark_iterations_,
217*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
218*4e366538SXin Li }
219*4e366538SXin Li
TEST_F(LibYUVRotateTest,I422Rotate180_Opt)220*4e366538SXin Li TEST_F(LibYUVRotateTest, I422Rotate180_Opt) {
221*4e366538SXin Li I422TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
222*4e366538SXin Li benchmark_height_, kRotate180, benchmark_iterations_,
223*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
224*4e366538SXin Li }
225*4e366538SXin Li
TEST_F(LibYUVRotateTest,I422Rotate270_Opt)226*4e366538SXin Li TEST_F(LibYUVRotateTest, I422Rotate270_Opt) {
227*4e366538SXin Li I422TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
228*4e366538SXin Li benchmark_width_, kRotate270, benchmark_iterations_,
229*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
230*4e366538SXin Li }
231*4e366538SXin Li
I444TestRotate(int src_width,int src_height,int dst_width,int dst_height,libyuv::RotationMode mode,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)232*4e366538SXin Li static void I444TestRotate(int src_width,
233*4e366538SXin Li int src_height,
234*4e366538SXin Li int dst_width,
235*4e366538SXin Li int dst_height,
236*4e366538SXin Li libyuv::RotationMode mode,
237*4e366538SXin Li int benchmark_iterations,
238*4e366538SXin Li int disable_cpu_flags,
239*4e366538SXin Li int benchmark_cpu_info) {
240*4e366538SXin Li if (src_width < 1) {
241*4e366538SXin Li src_width = 1;
242*4e366538SXin Li }
243*4e366538SXin Li if (src_height == 0) {
244*4e366538SXin Li src_height = 1;
245*4e366538SXin Li }
246*4e366538SXin Li if (dst_width < 1) {
247*4e366538SXin Li dst_width = 1;
248*4e366538SXin Li }
249*4e366538SXin Li if (dst_height < 1) {
250*4e366538SXin Li dst_height = 1;
251*4e366538SXin Li }
252*4e366538SXin Li int src_i444_y_size = src_width * Abs(src_height);
253*4e366538SXin Li int src_i444_uv_size = src_width * Abs(src_height);
254*4e366538SXin Li int src_i444_size = src_i444_y_size + src_i444_uv_size * 2;
255*4e366538SXin Li align_buffer_page_end(src_i444, src_i444_size);
256*4e366538SXin Li for (int i = 0; i < src_i444_size; ++i) {
257*4e366538SXin Li src_i444[i] = fastrand() & 0xff;
258*4e366538SXin Li }
259*4e366538SXin Li
260*4e366538SXin Li int dst_i444_y_size = dst_width * dst_height;
261*4e366538SXin Li int dst_i444_uv_size = dst_width * dst_height;
262*4e366538SXin Li int dst_i444_size = dst_i444_y_size + dst_i444_uv_size * 2;
263*4e366538SXin Li align_buffer_page_end(dst_i444_c, dst_i444_size);
264*4e366538SXin Li align_buffer_page_end(dst_i444_opt, dst_i444_size);
265*4e366538SXin Li memset(dst_i444_c, 2, dst_i444_size);
266*4e366538SXin Li memset(dst_i444_opt, 3, dst_i444_size);
267*4e366538SXin Li
268*4e366538SXin Li MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
269*4e366538SXin Li I444Rotate(src_i444, src_width, src_i444 + src_i444_y_size, src_width,
270*4e366538SXin Li src_i444 + src_i444_y_size + src_i444_uv_size, src_width,
271*4e366538SXin Li dst_i444_c, dst_width, dst_i444_c + dst_i444_y_size, dst_width,
272*4e366538SXin Li dst_i444_c + dst_i444_y_size + dst_i444_uv_size, dst_width,
273*4e366538SXin Li src_width, src_height, mode);
274*4e366538SXin Li
275*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
276*4e366538SXin Li for (int i = 0; i < benchmark_iterations; ++i) {
277*4e366538SXin Li I444Rotate(src_i444, src_width, src_i444 + src_i444_y_size, src_width,
278*4e366538SXin Li src_i444 + src_i444_y_size + src_i444_uv_size, src_width,
279*4e366538SXin Li dst_i444_opt, dst_width, dst_i444_opt + dst_i444_y_size,
280*4e366538SXin Li dst_width, dst_i444_opt + dst_i444_y_size + dst_i444_uv_size,
281*4e366538SXin Li dst_width, src_width, src_height, mode);
282*4e366538SXin Li }
283*4e366538SXin Li
284*4e366538SXin Li // Rotation should be exact.
285*4e366538SXin Li for (int i = 0; i < dst_i444_size; ++i) {
286*4e366538SXin Li EXPECT_EQ(dst_i444_c[i], dst_i444_opt[i]);
287*4e366538SXin Li }
288*4e366538SXin Li
289*4e366538SXin Li free_aligned_buffer_page_end(dst_i444_c);
290*4e366538SXin Li free_aligned_buffer_page_end(dst_i444_opt);
291*4e366538SXin Li free_aligned_buffer_page_end(src_i444);
292*4e366538SXin Li }
293*4e366538SXin Li
TEST_F(LibYUVRotateTest,I444Rotate0_Opt)294*4e366538SXin Li TEST_F(LibYUVRotateTest, I444Rotate0_Opt) {
295*4e366538SXin Li I444TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
296*4e366538SXin Li benchmark_height_, kRotate0, benchmark_iterations_,
297*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
298*4e366538SXin Li }
299*4e366538SXin Li
TEST_F(LibYUVRotateTest,I444Rotate90_Opt)300*4e366538SXin Li TEST_F(LibYUVRotateTest, I444Rotate90_Opt) {
301*4e366538SXin Li I444TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
302*4e366538SXin Li benchmark_width_, kRotate90, benchmark_iterations_,
303*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
304*4e366538SXin Li }
305*4e366538SXin Li
TEST_F(LibYUVRotateTest,I444Rotate180_Opt)306*4e366538SXin Li TEST_F(LibYUVRotateTest, I444Rotate180_Opt) {
307*4e366538SXin Li I444TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
308*4e366538SXin Li benchmark_height_, kRotate180, benchmark_iterations_,
309*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
310*4e366538SXin Li }
311*4e366538SXin Li
TEST_F(LibYUVRotateTest,I444Rotate270_Opt)312*4e366538SXin Li TEST_F(LibYUVRotateTest, I444Rotate270_Opt) {
313*4e366538SXin Li I444TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
314*4e366538SXin Li benchmark_width_, kRotate270, benchmark_iterations_,
315*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
316*4e366538SXin Li }
317*4e366538SXin Li
318*4e366538SXin Li // TODO(fbarchard): Remove odd width tests.
319*4e366538SXin Li // Odd width tests work but disabled because they use C code and can be
320*4e366538SXin Li // tested by passing an odd width command line or environment variable.
TEST_F(LibYUVRotateTest,DISABLED_I444Rotate0_Odd)321*4e366538SXin Li TEST_F(LibYUVRotateTest, DISABLED_I444Rotate0_Odd) {
322*4e366538SXin Li I444TestRotate(benchmark_width_ + 1, benchmark_height_ + 1,
323*4e366538SXin Li benchmark_width_ + 1, benchmark_height_ + 1, kRotate0,
324*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
325*4e366538SXin Li benchmark_cpu_info_);
326*4e366538SXin Li }
327*4e366538SXin Li
TEST_F(LibYUVRotateTest,DISABLED_I444Rotate90_Odd)328*4e366538SXin Li TEST_F(LibYUVRotateTest, DISABLED_I444Rotate90_Odd) {
329*4e366538SXin Li I444TestRotate(benchmark_width_ + 1, benchmark_height_ + 1,
330*4e366538SXin Li benchmark_height_ + 1, benchmark_width_ + 1, kRotate90,
331*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
332*4e366538SXin Li benchmark_cpu_info_);
333*4e366538SXin Li }
334*4e366538SXin Li
TEST_F(LibYUVRotateTest,DISABLED_I444Rotate180_Odd)335*4e366538SXin Li TEST_F(LibYUVRotateTest, DISABLED_I444Rotate180_Odd) {
336*4e366538SXin Li I444TestRotate(benchmark_width_ + 1, benchmark_height_ + 1,
337*4e366538SXin Li benchmark_width_ + 1, benchmark_height_ + 1, kRotate180,
338*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
339*4e366538SXin Li benchmark_cpu_info_);
340*4e366538SXin Li }
341*4e366538SXin Li
TEST_F(LibYUVRotateTest,DISABLED_I444Rotate270_Odd)342*4e366538SXin Li TEST_F(LibYUVRotateTest, DISABLED_I444Rotate270_Odd) {
343*4e366538SXin Li I444TestRotate(benchmark_width_ + 1, benchmark_height_ + 1,
344*4e366538SXin Li benchmark_height_ + 1, benchmark_width_ + 1, kRotate270,
345*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
346*4e366538SXin Li benchmark_cpu_info_);
347*4e366538SXin Li }
348*4e366538SXin Li
NV12TestRotate(int src_width,int src_height,int dst_width,int dst_height,libyuv::RotationMode mode,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)349*4e366538SXin Li static void NV12TestRotate(int src_width,
350*4e366538SXin Li int src_height,
351*4e366538SXin Li int dst_width,
352*4e366538SXin Li int dst_height,
353*4e366538SXin Li libyuv::RotationMode mode,
354*4e366538SXin Li int benchmark_iterations,
355*4e366538SXin Li int disable_cpu_flags,
356*4e366538SXin Li int benchmark_cpu_info) {
357*4e366538SXin Li if (src_width < 1) {
358*4e366538SXin Li src_width = 1;
359*4e366538SXin Li }
360*4e366538SXin Li if (src_height == 0) { // allow negative for inversion test.
361*4e366538SXin Li src_height = 1;
362*4e366538SXin Li }
363*4e366538SXin Li if (dst_width < 1) {
364*4e366538SXin Li dst_width = 1;
365*4e366538SXin Li }
366*4e366538SXin Li if (dst_height < 1) {
367*4e366538SXin Li dst_height = 1;
368*4e366538SXin Li }
369*4e366538SXin Li int src_nv12_y_size = src_width * Abs(src_height);
370*4e366538SXin Li int src_nv12_uv_size =
371*4e366538SXin Li ((src_width + 1) / 2) * ((Abs(src_height) + 1) / 2) * 2;
372*4e366538SXin Li int src_nv12_size = src_nv12_y_size + src_nv12_uv_size;
373*4e366538SXin Li align_buffer_page_end(src_nv12, src_nv12_size);
374*4e366538SXin Li for (int i = 0; i < src_nv12_size; ++i) {
375*4e366538SXin Li src_nv12[i] = fastrand() & 0xff;
376*4e366538SXin Li }
377*4e366538SXin Li
378*4e366538SXin Li int dst_i420_y_size = dst_width * dst_height;
379*4e366538SXin Li int dst_i420_uv_size = ((dst_width + 1) / 2) * ((dst_height + 1) / 2);
380*4e366538SXin Li int dst_i420_size = dst_i420_y_size + dst_i420_uv_size * 2;
381*4e366538SXin Li align_buffer_page_end(dst_i420_c, dst_i420_size);
382*4e366538SXin Li align_buffer_page_end(dst_i420_opt, dst_i420_size);
383*4e366538SXin Li memset(dst_i420_c, 2, dst_i420_size);
384*4e366538SXin Li memset(dst_i420_opt, 3, dst_i420_size);
385*4e366538SXin Li
386*4e366538SXin Li MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
387*4e366538SXin Li NV12ToI420Rotate(src_nv12, src_width, src_nv12 + src_nv12_y_size,
388*4e366538SXin Li (src_width + 1) & ~1, dst_i420_c, dst_width,
389*4e366538SXin Li dst_i420_c + dst_i420_y_size, (dst_width + 1) / 2,
390*4e366538SXin Li dst_i420_c + dst_i420_y_size + dst_i420_uv_size,
391*4e366538SXin Li (dst_width + 1) / 2, src_width, src_height, mode);
392*4e366538SXin Li
393*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
394*4e366538SXin Li for (int i = 0; i < benchmark_iterations; ++i) {
395*4e366538SXin Li NV12ToI420Rotate(src_nv12, src_width, src_nv12 + src_nv12_y_size,
396*4e366538SXin Li (src_width + 1) & ~1, dst_i420_opt, dst_width,
397*4e366538SXin Li dst_i420_opt + dst_i420_y_size, (dst_width + 1) / 2,
398*4e366538SXin Li dst_i420_opt + dst_i420_y_size + dst_i420_uv_size,
399*4e366538SXin Li (dst_width + 1) / 2, src_width, src_height, mode);
400*4e366538SXin Li }
401*4e366538SXin Li
402*4e366538SXin Li // Rotation should be exact.
403*4e366538SXin Li for (int i = 0; i < dst_i420_size; ++i) {
404*4e366538SXin Li EXPECT_EQ(dst_i420_c[i], dst_i420_opt[i]);
405*4e366538SXin Li }
406*4e366538SXin Li
407*4e366538SXin Li free_aligned_buffer_page_end(dst_i420_c);
408*4e366538SXin Li free_aligned_buffer_page_end(dst_i420_opt);
409*4e366538SXin Li free_aligned_buffer_page_end(src_nv12);
410*4e366538SXin Li }
411*4e366538SXin Li
TEST_F(LibYUVRotateTest,NV12Rotate0_Opt)412*4e366538SXin Li TEST_F(LibYUVRotateTest, NV12Rotate0_Opt) {
413*4e366538SXin Li NV12TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
414*4e366538SXin Li benchmark_height_, kRotate0, benchmark_iterations_,
415*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
416*4e366538SXin Li }
417*4e366538SXin Li
TEST_F(LibYUVRotateTest,NV12Rotate90_Opt)418*4e366538SXin Li TEST_F(LibYUVRotateTest, NV12Rotate90_Opt) {
419*4e366538SXin Li NV12TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
420*4e366538SXin Li benchmark_width_, kRotate90, benchmark_iterations_,
421*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
422*4e366538SXin Li }
423*4e366538SXin Li
TEST_F(LibYUVRotateTest,NV12Rotate180_Opt)424*4e366538SXin Li TEST_F(LibYUVRotateTest, NV12Rotate180_Opt) {
425*4e366538SXin Li NV12TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
426*4e366538SXin Li benchmark_height_, kRotate180, benchmark_iterations_,
427*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
428*4e366538SXin Li }
429*4e366538SXin Li
TEST_F(LibYUVRotateTest,NV12Rotate270_Opt)430*4e366538SXin Li TEST_F(LibYUVRotateTest, NV12Rotate270_Opt) {
431*4e366538SXin Li NV12TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
432*4e366538SXin Li benchmark_width_, kRotate270, benchmark_iterations_,
433*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
434*4e366538SXin Li }
435*4e366538SXin Li
TEST_F(LibYUVRotateTest,DISABLED_NV12Rotate0_Odd)436*4e366538SXin Li TEST_F(LibYUVRotateTest, DISABLED_NV12Rotate0_Odd) {
437*4e366538SXin Li NV12TestRotate(benchmark_width_ + 1, benchmark_height_ + 1,
438*4e366538SXin Li benchmark_width_ + 1, benchmark_height_ + 1, kRotate0,
439*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
440*4e366538SXin Li benchmark_cpu_info_);
441*4e366538SXin Li }
442*4e366538SXin Li
TEST_F(LibYUVRotateTest,DISABLED_NV12Rotate90_Odd)443*4e366538SXin Li TEST_F(LibYUVRotateTest, DISABLED_NV12Rotate90_Odd) {
444*4e366538SXin Li NV12TestRotate(benchmark_width_ + 1, benchmark_height_ + 1,
445*4e366538SXin Li benchmark_height_ + 1, benchmark_width_ + 1, kRotate90,
446*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
447*4e366538SXin Li benchmark_cpu_info_);
448*4e366538SXin Li }
449*4e366538SXin Li
TEST_F(LibYUVRotateTest,DISABLED_NV12Rotate180_Odd)450*4e366538SXin Li TEST_F(LibYUVRotateTest, DISABLED_NV12Rotate180_Odd) {
451*4e366538SXin Li NV12TestRotate(benchmark_width_ + 1, benchmark_height_ + 1,
452*4e366538SXin Li benchmark_width_ + 1, benchmark_height_ + 1, kRotate180,
453*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
454*4e366538SXin Li benchmark_cpu_info_);
455*4e366538SXin Li }
456*4e366538SXin Li
TEST_F(LibYUVRotateTest,DISABLED_NV12Rotate270_Odd)457*4e366538SXin Li TEST_F(LibYUVRotateTest, DISABLED_NV12Rotate270_Odd) {
458*4e366538SXin Li NV12TestRotate(benchmark_width_ + 1, benchmark_height_ + 1,
459*4e366538SXin Li benchmark_height_ + 1, benchmark_width_ + 1, kRotate270,
460*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
461*4e366538SXin Li benchmark_cpu_info_);
462*4e366538SXin Li }
463*4e366538SXin Li
TEST_F(LibYUVRotateTest,NV12Rotate0_Invert)464*4e366538SXin Li TEST_F(LibYUVRotateTest, NV12Rotate0_Invert) {
465*4e366538SXin Li NV12TestRotate(benchmark_width_, -benchmark_height_, benchmark_width_,
466*4e366538SXin Li benchmark_height_, kRotate0, benchmark_iterations_,
467*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
468*4e366538SXin Li }
469*4e366538SXin Li
TEST_F(LibYUVRotateTest,NV12Rotate90_Invert)470*4e366538SXin Li TEST_F(LibYUVRotateTest, NV12Rotate90_Invert) {
471*4e366538SXin Li NV12TestRotate(benchmark_width_, -benchmark_height_, benchmark_height_,
472*4e366538SXin Li benchmark_width_, kRotate90, benchmark_iterations_,
473*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
474*4e366538SXin Li }
475*4e366538SXin Li
TEST_F(LibYUVRotateTest,NV12Rotate180_Invert)476*4e366538SXin Li TEST_F(LibYUVRotateTest, NV12Rotate180_Invert) {
477*4e366538SXin Li NV12TestRotate(benchmark_width_, -benchmark_height_, benchmark_width_,
478*4e366538SXin Li benchmark_height_, kRotate180, benchmark_iterations_,
479*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
480*4e366538SXin Li }
481*4e366538SXin Li
TEST_F(LibYUVRotateTest,NV12Rotate270_Invert)482*4e366538SXin Li TEST_F(LibYUVRotateTest, NV12Rotate270_Invert) {
483*4e366538SXin Li NV12TestRotate(benchmark_width_, -benchmark_height_, benchmark_height_,
484*4e366538SXin Li benchmark_width_, kRotate270, benchmark_iterations_,
485*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
486*4e366538SXin Li }
487*4e366538SXin Li
488*4e366538SXin Li // Test Android 420 to I420 Rotate
489*4e366538SXin Li #define TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, \
490*4e366538SXin Li SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
491*4e366538SXin Li W1280, N, NEG, OFF, PN, OFF_U, OFF_V, ROT) \
492*4e366538SXin Li TEST_F(LibYUVRotateTest, \
493*4e366538SXin Li SRC_FMT_PLANAR##To##FMT_PLANAR##Rotate##ROT##To##PN##N) { \
494*4e366538SXin Li const int kWidth = W1280; \
495*4e366538SXin Li const int kHeight = benchmark_height_; \
496*4e366538SXin Li const int kSizeUV = \
497*4e366538SXin Li SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \
498*4e366538SXin Li align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
499*4e366538SXin Li align_buffer_page_end(src_uv, \
500*4e366538SXin Li kSizeUV*((PIXEL_STRIDE == 3) ? 3 : 2) + OFF); \
501*4e366538SXin Li align_buffer_page_end(dst_y_c, kWidth* kHeight); \
502*4e366538SXin Li align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
503*4e366538SXin Li SUBSAMPLE(kHeight, SUBSAMP_Y)); \
504*4e366538SXin Li align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
505*4e366538SXin Li SUBSAMPLE(kHeight, SUBSAMP_Y)); \
506*4e366538SXin Li align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
507*4e366538SXin Li align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
508*4e366538SXin Li SUBSAMPLE(kHeight, SUBSAMP_Y)); \
509*4e366538SXin Li align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
510*4e366538SXin Li SUBSAMPLE(kHeight, SUBSAMP_Y)); \
511*4e366538SXin Li uint8_t* src_u = src_uv + OFF_U; \
512*4e366538SXin Li uint8_t* src_v = src_uv + (PIXEL_STRIDE == 1 ? kSizeUV : OFF_V); \
513*4e366538SXin Li int src_stride_uv = SUBSAMPLE(kWidth, SUBSAMP_X) * PIXEL_STRIDE; \
514*4e366538SXin Li for (int i = 0; i < kHeight; ++i) \
515*4e366538SXin Li for (int j = 0; j < kWidth; ++j) \
516*4e366538SXin Li src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
517*4e366538SXin Li for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \
518*4e366538SXin Li for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \
519*4e366538SXin Li src_u[(i * src_stride_uv) + j * PIXEL_STRIDE + OFF] = \
520*4e366538SXin Li (fastrand() & 0xff); \
521*4e366538SXin Li src_v[(i * src_stride_uv) + j * PIXEL_STRIDE + OFF] = \
522*4e366538SXin Li (fastrand() & 0xff); \
523*4e366538SXin Li } \
524*4e366538SXin Li } \
525*4e366538SXin Li memset(dst_y_c, 1, kWidth* kHeight); \
526*4e366538SXin Li memset(dst_u_c, 2, \
527*4e366538SXin Li SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
528*4e366538SXin Li memset(dst_v_c, 3, \
529*4e366538SXin Li SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
530*4e366538SXin Li memset(dst_y_opt, 101, kWidth* kHeight); \
531*4e366538SXin Li memset(dst_u_opt, 102, \
532*4e366538SXin Li SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
533*4e366538SXin Li memset(dst_v_opt, 103, \
534*4e366538SXin Li SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
535*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_); \
536*4e366538SXin Li SRC_FMT_PLANAR##To##FMT_PLANAR##Rotate( \
537*4e366538SXin Li src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
538*4e366538SXin Li src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), PIXEL_STRIDE, dst_y_c, \
539*4e366538SXin Li kWidth, dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_c, \
540*4e366538SXin Li SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight, \
541*4e366538SXin Li (libyuv::RotationMode)ROT); \
542*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_); \
543*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) { \
544*4e366538SXin Li SRC_FMT_PLANAR##To##FMT_PLANAR##Rotate( \
545*4e366538SXin Li src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
546*4e366538SXin Li src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), PIXEL_STRIDE, \
547*4e366538SXin Li dst_y_opt, kWidth, dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \
548*4e366538SXin Li dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight, \
549*4e366538SXin Li (libyuv::RotationMode)ROT); \
550*4e366538SXin Li } \
551*4e366538SXin Li for (int i = 0; i < kHeight; ++i) { \
552*4e366538SXin Li for (int j = 0; j < kWidth; ++j) { \
553*4e366538SXin Li EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
554*4e366538SXin Li } \
555*4e366538SXin Li } \
556*4e366538SXin Li for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
557*4e366538SXin Li for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
558*4e366538SXin Li EXPECT_EQ(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \
559*4e366538SXin Li dst_u_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \
560*4e366538SXin Li } \
561*4e366538SXin Li } \
562*4e366538SXin Li for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
563*4e366538SXin Li for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
564*4e366538SXin Li EXPECT_EQ(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \
565*4e366538SXin Li dst_v_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \
566*4e366538SXin Li } \
567*4e366538SXin Li } \
568*4e366538SXin Li free_aligned_buffer_page_end(dst_y_c); \
569*4e366538SXin Li free_aligned_buffer_page_end(dst_u_c); \
570*4e366538SXin Li free_aligned_buffer_page_end(dst_v_c); \
571*4e366538SXin Li free_aligned_buffer_page_end(dst_y_opt); \
572*4e366538SXin Li free_aligned_buffer_page_end(dst_u_opt); \
573*4e366538SXin Li free_aligned_buffer_page_end(dst_v_opt); \
574*4e366538SXin Li free_aligned_buffer_page_end(src_y); \
575*4e366538SXin Li free_aligned_buffer_page_end(src_uv); \
576*4e366538SXin Li }
577*4e366538SXin Li
578*4e366538SXin Li #define TESTAPLANARTOP(SRC_FMT_PLANAR, PN, PIXEL_STRIDE, OFF_U, OFF_V, \
579*4e366538SXin Li SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X, \
580*4e366538SXin Li SUBSAMP_Y) \
581*4e366538SXin Li TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
582*4e366538SXin Li FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_ + 1, \
583*4e366538SXin Li _Any, +, 0, PN, OFF_U, OFF_V, 0) \
584*4e366538SXin Li TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
585*4e366538SXin Li FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, \
586*4e366538SXin Li _Unaligned, +, 2, PN, OFF_U, OFF_V, 0) \
587*4e366538SXin Li TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
588*4e366538SXin Li FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, \
589*4e366538SXin Li -, 0, PN, OFF_U, OFF_V, 0) \
590*4e366538SXin Li TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
591*4e366538SXin Li FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, \
592*4e366538SXin Li 0, PN, OFF_U, OFF_V, 0) \
593*4e366538SXin Li TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
594*4e366538SXin Li FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, \
595*4e366538SXin Li 0, PN, OFF_U, OFF_V, 180)
596*4e366538SXin Li
597*4e366538SXin Li TESTAPLANARTOP(Android420, I420, 1, 0, 0, 2, 2, I420, 2, 2)
598*4e366538SXin Li TESTAPLANARTOP(Android420, NV12, 2, 0, 1, 2, 2, I420, 2, 2)
599*4e366538SXin Li TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2)
600*4e366538SXin Li #undef TESTAPLANARTOP
601*4e366538SXin Li #undef TESTAPLANARTOPI
602*4e366538SXin Li
I010TestRotate(int src_width,int src_height,int dst_width,int dst_height,libyuv::RotationMode mode,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)603*4e366538SXin Li static void I010TestRotate(int src_width,
604*4e366538SXin Li int src_height,
605*4e366538SXin Li int dst_width,
606*4e366538SXin Li int dst_height,
607*4e366538SXin Li libyuv::RotationMode mode,
608*4e366538SXin Li int benchmark_iterations,
609*4e366538SXin Li int disable_cpu_flags,
610*4e366538SXin Li int benchmark_cpu_info) {
611*4e366538SXin Li if (src_width < 1) {
612*4e366538SXin Li src_width = 1;
613*4e366538SXin Li }
614*4e366538SXin Li if (src_height == 0) {
615*4e366538SXin Li src_height = 1;
616*4e366538SXin Li }
617*4e366538SXin Li if (dst_width < 1) {
618*4e366538SXin Li dst_width = 1;
619*4e366538SXin Li }
620*4e366538SXin Li if (dst_height < 1) {
621*4e366538SXin Li dst_height = 1;
622*4e366538SXin Li }
623*4e366538SXin Li int src_i010_y_size = src_width * Abs(src_height);
624*4e366538SXin Li int src_i010_uv_size = ((src_width + 1) / 2) * ((Abs(src_height) + 1) / 2);
625*4e366538SXin Li int src_i010_size = src_i010_y_size + src_i010_uv_size * 2;
626*4e366538SXin Li align_buffer_page_end_16(src_i010, src_i010_size);
627*4e366538SXin Li for (int i = 0; i < src_i010_size; ++i) {
628*4e366538SXin Li src_i010[i] = fastrand() & 0x3ff;
629*4e366538SXin Li }
630*4e366538SXin Li
631*4e366538SXin Li int dst_i010_y_size = dst_width * dst_height;
632*4e366538SXin Li int dst_i010_uv_size = ((dst_width + 1) / 2) * ((dst_height + 1) / 2);
633*4e366538SXin Li int dst_i010_size = dst_i010_y_size + dst_i010_uv_size * 2;
634*4e366538SXin Li align_buffer_page_end_16(dst_i010_c, dst_i010_size);
635*4e366538SXin Li align_buffer_page_end_16(dst_i010_opt, dst_i010_size);
636*4e366538SXin Li memset(dst_i010_c, 2, dst_i010_size * 2);
637*4e366538SXin Li memset(dst_i010_opt, 3, dst_i010_size * 2);
638*4e366538SXin Li
639*4e366538SXin Li MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
640*4e366538SXin Li I010Rotate(src_i010, src_width, src_i010 + src_i010_y_size,
641*4e366538SXin Li (src_width + 1) / 2, src_i010 + src_i010_y_size + src_i010_uv_size,
642*4e366538SXin Li (src_width + 1) / 2, dst_i010_c, dst_width,
643*4e366538SXin Li dst_i010_c + dst_i010_y_size, (dst_width + 1) / 2,
644*4e366538SXin Li dst_i010_c + dst_i010_y_size + dst_i010_uv_size,
645*4e366538SXin Li (dst_width + 1) / 2, src_width, src_height, mode);
646*4e366538SXin Li
647*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
648*4e366538SXin Li for (int i = 0; i < benchmark_iterations; ++i) {
649*4e366538SXin Li I010Rotate(
650*4e366538SXin Li src_i010, src_width, src_i010 + src_i010_y_size, (src_width + 1) / 2,
651*4e366538SXin Li src_i010 + src_i010_y_size + src_i010_uv_size, (src_width + 1) / 2,
652*4e366538SXin Li dst_i010_opt, dst_width, dst_i010_opt + dst_i010_y_size,
653*4e366538SXin Li (dst_width + 1) / 2, dst_i010_opt + dst_i010_y_size + dst_i010_uv_size,
654*4e366538SXin Li (dst_width + 1) / 2, src_width, src_height, mode);
655*4e366538SXin Li }
656*4e366538SXin Li
657*4e366538SXin Li // Rotation should be exact.
658*4e366538SXin Li for (int i = 0; i < dst_i010_size; ++i) {
659*4e366538SXin Li EXPECT_EQ(dst_i010_c[i], dst_i010_opt[i]);
660*4e366538SXin Li }
661*4e366538SXin Li
662*4e366538SXin Li free_aligned_buffer_page_end_16(dst_i010_c);
663*4e366538SXin Li free_aligned_buffer_page_end_16(dst_i010_opt);
664*4e366538SXin Li free_aligned_buffer_page_end_16(src_i010);
665*4e366538SXin Li }
666*4e366538SXin Li
TEST_F(LibYUVRotateTest,I010Rotate0_Opt)667*4e366538SXin Li TEST_F(LibYUVRotateTest, I010Rotate0_Opt) {
668*4e366538SXin Li I010TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
669*4e366538SXin Li benchmark_height_, kRotate0, benchmark_iterations_,
670*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
671*4e366538SXin Li }
672*4e366538SXin Li
TEST_F(LibYUVRotateTest,I010Rotate90_Opt)673*4e366538SXin Li TEST_F(LibYUVRotateTest, I010Rotate90_Opt) {
674*4e366538SXin Li I010TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
675*4e366538SXin Li benchmark_width_, kRotate90, benchmark_iterations_,
676*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
677*4e366538SXin Li }
678*4e366538SXin Li
TEST_F(LibYUVRotateTest,I010Rotate180_Opt)679*4e366538SXin Li TEST_F(LibYUVRotateTest, I010Rotate180_Opt) {
680*4e366538SXin Li I010TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
681*4e366538SXin Li benchmark_height_, kRotate180, benchmark_iterations_,
682*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
683*4e366538SXin Li }
684*4e366538SXin Li
TEST_F(LibYUVRotateTest,I010Rotate270_Opt)685*4e366538SXin Li TEST_F(LibYUVRotateTest, I010Rotate270_Opt) {
686*4e366538SXin Li I010TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
687*4e366538SXin Li benchmark_width_, kRotate270, benchmark_iterations_,
688*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
689*4e366538SXin Li }
690*4e366538SXin Li
I210TestRotate(int src_width,int src_height,int dst_width,int dst_height,libyuv::RotationMode mode,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)691*4e366538SXin Li static void I210TestRotate(int src_width,
692*4e366538SXin Li int src_height,
693*4e366538SXin Li int dst_width,
694*4e366538SXin Li int dst_height,
695*4e366538SXin Li libyuv::RotationMode mode,
696*4e366538SXin Li int benchmark_iterations,
697*4e366538SXin Li int disable_cpu_flags,
698*4e366538SXin Li int benchmark_cpu_info) {
699*4e366538SXin Li if (src_width < 1) {
700*4e366538SXin Li src_width = 1;
701*4e366538SXin Li }
702*4e366538SXin Li if (src_height == 0) {
703*4e366538SXin Li src_height = 1;
704*4e366538SXin Li }
705*4e366538SXin Li if (dst_width < 1) {
706*4e366538SXin Li dst_width = 1;
707*4e366538SXin Li }
708*4e366538SXin Li if (dst_height < 1) {
709*4e366538SXin Li dst_height = 1;
710*4e366538SXin Li }
711*4e366538SXin Li int src_i210_y_size = src_width * Abs(src_height);
712*4e366538SXin Li int src_i210_uv_size = ((src_width + 1) / 2) * Abs(src_height);
713*4e366538SXin Li int src_i210_size = src_i210_y_size + src_i210_uv_size * 2;
714*4e366538SXin Li align_buffer_page_end_16(src_i210, src_i210_size);
715*4e366538SXin Li for (int i = 0; i < src_i210_size; ++i) {
716*4e366538SXin Li src_i210[i] = fastrand() & 0x3ff;
717*4e366538SXin Li }
718*4e366538SXin Li
719*4e366538SXin Li int dst_i210_y_size = dst_width * dst_height;
720*4e366538SXin Li int dst_i210_uv_size = ((dst_width + 1) / 2) * dst_height;
721*4e366538SXin Li int dst_i210_size = dst_i210_y_size + dst_i210_uv_size * 2;
722*4e366538SXin Li align_buffer_page_end_16(dst_i210_c, dst_i210_size);
723*4e366538SXin Li align_buffer_page_end_16(dst_i210_opt, dst_i210_size);
724*4e366538SXin Li memset(dst_i210_c, 2, dst_i210_size * 2);
725*4e366538SXin Li memset(dst_i210_opt, 3, dst_i210_size * 2);
726*4e366538SXin Li
727*4e366538SXin Li MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
728*4e366538SXin Li I210Rotate(src_i210, src_width, src_i210 + src_i210_y_size,
729*4e366538SXin Li (src_width + 1) / 2, src_i210 + src_i210_y_size + src_i210_uv_size,
730*4e366538SXin Li (src_width + 1) / 2, dst_i210_c, dst_width,
731*4e366538SXin Li dst_i210_c + dst_i210_y_size, (dst_width + 1) / 2,
732*4e366538SXin Li dst_i210_c + dst_i210_y_size + dst_i210_uv_size,
733*4e366538SXin Li (dst_width + 1) / 2, src_width, src_height, mode);
734*4e366538SXin Li
735*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
736*4e366538SXin Li for (int i = 0; i < benchmark_iterations; ++i) {
737*4e366538SXin Li I210Rotate(
738*4e366538SXin Li src_i210, src_width, src_i210 + src_i210_y_size, (src_width + 1) / 2,
739*4e366538SXin Li src_i210 + src_i210_y_size + src_i210_uv_size, (src_width + 1) / 2,
740*4e366538SXin Li dst_i210_opt, dst_width, dst_i210_opt + dst_i210_y_size,
741*4e366538SXin Li (dst_width + 1) / 2, dst_i210_opt + dst_i210_y_size + dst_i210_uv_size,
742*4e366538SXin Li (dst_width + 1) / 2, src_width, src_height, mode);
743*4e366538SXin Li }
744*4e366538SXin Li
745*4e366538SXin Li // Rotation should be exact.
746*4e366538SXin Li for (int i = 0; i < dst_i210_size; ++i) {
747*4e366538SXin Li EXPECT_EQ(dst_i210_c[i], dst_i210_opt[i]);
748*4e366538SXin Li }
749*4e366538SXin Li
750*4e366538SXin Li free_aligned_buffer_page_end_16(dst_i210_c);
751*4e366538SXin Li free_aligned_buffer_page_end_16(dst_i210_opt);
752*4e366538SXin Li free_aligned_buffer_page_end_16(src_i210);
753*4e366538SXin Li }
754*4e366538SXin Li
TEST_F(LibYUVRotateTest,I210Rotate0_Opt)755*4e366538SXin Li TEST_F(LibYUVRotateTest, I210Rotate0_Opt) {
756*4e366538SXin Li I210TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
757*4e366538SXin Li benchmark_height_, kRotate0, benchmark_iterations_,
758*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
759*4e366538SXin Li }
760*4e366538SXin Li
TEST_F(LibYUVRotateTest,I210Rotate90_Opt)761*4e366538SXin Li TEST_F(LibYUVRotateTest, I210Rotate90_Opt) {
762*4e366538SXin Li I210TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
763*4e366538SXin Li benchmark_width_, kRotate90, benchmark_iterations_,
764*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
765*4e366538SXin Li }
766*4e366538SXin Li
TEST_F(LibYUVRotateTest,I210Rotate180_Opt)767*4e366538SXin Li TEST_F(LibYUVRotateTest, I210Rotate180_Opt) {
768*4e366538SXin Li I210TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
769*4e366538SXin Li benchmark_height_, kRotate180, benchmark_iterations_,
770*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
771*4e366538SXin Li }
772*4e366538SXin Li
TEST_F(LibYUVRotateTest,I210Rotate270_Opt)773*4e366538SXin Li TEST_F(LibYUVRotateTest, I210Rotate270_Opt) {
774*4e366538SXin Li I210TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
775*4e366538SXin Li benchmark_width_, kRotate270, benchmark_iterations_,
776*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
777*4e366538SXin Li }
778*4e366538SXin Li
I410TestRotate(int src_width,int src_height,int dst_width,int dst_height,libyuv::RotationMode mode,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)779*4e366538SXin Li static void I410TestRotate(int src_width,
780*4e366538SXin Li int src_height,
781*4e366538SXin Li int dst_width,
782*4e366538SXin Li int dst_height,
783*4e366538SXin Li libyuv::RotationMode mode,
784*4e366538SXin Li int benchmark_iterations,
785*4e366538SXin Li int disable_cpu_flags,
786*4e366538SXin Li int benchmark_cpu_info) {
787*4e366538SXin Li if (src_width < 1) {
788*4e366538SXin Li src_width = 1;
789*4e366538SXin Li }
790*4e366538SXin Li if (src_height == 0) {
791*4e366538SXin Li src_height = 1;
792*4e366538SXin Li }
793*4e366538SXin Li if (dst_width < 1) {
794*4e366538SXin Li dst_width = 1;
795*4e366538SXin Li }
796*4e366538SXin Li if (dst_height < 1) {
797*4e366538SXin Li dst_height = 1;
798*4e366538SXin Li }
799*4e366538SXin Li int src_i410_y_size = src_width * Abs(src_height);
800*4e366538SXin Li int src_i410_uv_size = src_width * Abs(src_height);
801*4e366538SXin Li int src_i410_size = src_i410_y_size + src_i410_uv_size * 2;
802*4e366538SXin Li align_buffer_page_end_16(src_i410, src_i410_size);
803*4e366538SXin Li for (int i = 0; i < src_i410_size; ++i) {
804*4e366538SXin Li src_i410[i] = fastrand() & 0x3ff;
805*4e366538SXin Li }
806*4e366538SXin Li
807*4e366538SXin Li int dst_i410_y_size = dst_width * dst_height;
808*4e366538SXin Li int dst_i410_uv_size = dst_width * dst_height;
809*4e366538SXin Li int dst_i410_size = dst_i410_y_size + dst_i410_uv_size * 2;
810*4e366538SXin Li align_buffer_page_end_16(dst_i410_c, dst_i410_size);
811*4e366538SXin Li align_buffer_page_end_16(dst_i410_opt, dst_i410_size);
812*4e366538SXin Li memset(dst_i410_c, 2, dst_i410_size * 2);
813*4e366538SXin Li memset(dst_i410_opt, 3, dst_i410_size * 2);
814*4e366538SXin Li
815*4e366538SXin Li MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
816*4e366538SXin Li I410Rotate(src_i410, src_width, src_i410 + src_i410_y_size, src_width,
817*4e366538SXin Li src_i410 + src_i410_y_size + src_i410_uv_size, src_width,
818*4e366538SXin Li dst_i410_c, dst_width, dst_i410_c + dst_i410_y_size, dst_width,
819*4e366538SXin Li dst_i410_c + dst_i410_y_size + dst_i410_uv_size, dst_width,
820*4e366538SXin Li src_width, src_height, mode);
821*4e366538SXin Li
822*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
823*4e366538SXin Li for (int i = 0; i < benchmark_iterations; ++i) {
824*4e366538SXin Li I410Rotate(src_i410, src_width, src_i410 + src_i410_y_size, src_width,
825*4e366538SXin Li src_i410 + src_i410_y_size + src_i410_uv_size, src_width,
826*4e366538SXin Li dst_i410_opt, dst_width, dst_i410_opt + dst_i410_y_size,
827*4e366538SXin Li dst_width, dst_i410_opt + dst_i410_y_size + dst_i410_uv_size,
828*4e366538SXin Li dst_width, src_width, src_height, mode);
829*4e366538SXin Li }
830*4e366538SXin Li
831*4e366538SXin Li // Rotation should be exact.
832*4e366538SXin Li for (int i = 0; i < dst_i410_size; ++i) {
833*4e366538SXin Li EXPECT_EQ(dst_i410_c[i], dst_i410_opt[i]);
834*4e366538SXin Li }
835*4e366538SXin Li
836*4e366538SXin Li free_aligned_buffer_page_end_16(dst_i410_c);
837*4e366538SXin Li free_aligned_buffer_page_end_16(dst_i410_opt);
838*4e366538SXin Li free_aligned_buffer_page_end_16(src_i410);
839*4e366538SXin Li }
840*4e366538SXin Li
TEST_F(LibYUVRotateTest,I410Rotate0_Opt)841*4e366538SXin Li TEST_F(LibYUVRotateTest, I410Rotate0_Opt) {
842*4e366538SXin Li I410TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
843*4e366538SXin Li benchmark_height_, kRotate0, benchmark_iterations_,
844*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
845*4e366538SXin Li }
846*4e366538SXin Li
TEST_F(LibYUVRotateTest,I410Rotate90_Opt)847*4e366538SXin Li TEST_F(LibYUVRotateTest, I410Rotate90_Opt) {
848*4e366538SXin Li I410TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
849*4e366538SXin Li benchmark_width_, kRotate90, benchmark_iterations_,
850*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
851*4e366538SXin Li }
852*4e366538SXin Li
TEST_F(LibYUVRotateTest,I410Rotate180_Opt)853*4e366538SXin Li TEST_F(LibYUVRotateTest, I410Rotate180_Opt) {
854*4e366538SXin Li I410TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
855*4e366538SXin Li benchmark_height_, kRotate180, benchmark_iterations_,
856*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
857*4e366538SXin Li }
858*4e366538SXin Li
TEST_F(LibYUVRotateTest,I410Rotate270_Opt)859*4e366538SXin Li TEST_F(LibYUVRotateTest, I410Rotate270_Opt) {
860*4e366538SXin Li I410TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
861*4e366538SXin Li benchmark_width_, kRotate270, benchmark_iterations_,
862*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_);
863*4e366538SXin Li }
864*4e366538SXin Li
865*4e366538SXin Li #if defined(ENABLE_ROW_TESTS)
866*4e366538SXin Li
TEST_F(LibYUVRotateTest,Transpose4x4_Test)867*4e366538SXin Li TEST_F(LibYUVRotateTest, Transpose4x4_Test) {
868*4e366538SXin Li // dst width and height
869*4e366538SXin Li const int width = 4;
870*4e366538SXin Li const int height = 4;
871*4e366538SXin Li int src_pixels[4][4];
872*4e366538SXin Li int dst_pixels_c[4][4];
873*4e366538SXin Li int dst_pixels_opt[4][4];
874*4e366538SXin Li
875*4e366538SXin Li for (int i = 0; i < 4; ++i) {
876*4e366538SXin Li for (int j = 0; j < 4; ++j) {
877*4e366538SXin Li src_pixels[i][j] = i * 10 + j;
878*4e366538SXin Li }
879*4e366538SXin Li }
880*4e366538SXin Li memset(dst_pixels_c, 1, width * height * 4);
881*4e366538SXin Li memset(dst_pixels_opt, 2, width * height * 4);
882*4e366538SXin Li
883*4e366538SXin Li Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4,
884*4e366538SXin Li (uint8_t*)dst_pixels_c, width * 4, width);
885*4e366538SXin Li
886*4e366538SXin Li const int benchmark_iterations =
887*4e366538SXin Li (benchmark_iterations_ * benchmark_width_ * benchmark_height_ + 15) /
888*4e366538SXin Li (4 * 4);
889*4e366538SXin Li for (int i = 0; i < benchmark_iterations; ++i) {
890*4e366538SXin Li #if defined(HAS_TRANSPOSE4X4_32_NEON)
891*4e366538SXin Li if (TestCpuFlag(kCpuHasNEON)) {
892*4e366538SXin Li Transpose4x4_32_NEON((const uint8_t*)src_pixels, height * 4,
893*4e366538SXin Li (uint8_t*)dst_pixels_opt, width * 4, width);
894*4e366538SXin Li } else
895*4e366538SXin Li #elif defined(HAS_TRANSPOSE4X4_32_SSE2)
896*4e366538SXin Li if (TestCpuFlag(kCpuHasSSE2)) {
897*4e366538SXin Li Transpose4x4_32_SSE2((const uint8_t*)src_pixels, height * 4,
898*4e366538SXin Li (uint8_t*)dst_pixels_opt, width * 4, width);
899*4e366538SXin Li } else
900*4e366538SXin Li #endif
901*4e366538SXin Li {
902*4e366538SXin Li Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4,
903*4e366538SXin Li (uint8_t*)dst_pixels_opt, width * 4, width);
904*4e366538SXin Li }
905*4e366538SXin Li }
906*4e366538SXin Li
907*4e366538SXin Li for (int i = 0; i < 4; ++i) {
908*4e366538SXin Li for (int j = 0; j < 4; ++j) {
909*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i][j], src_pixels[j][i]);
910*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i][j], dst_pixels_opt[i][j]);
911*4e366538SXin Li }
912*4e366538SXin Li }
913*4e366538SXin Li }
914*4e366538SXin Li
TEST_F(LibYUVRotateTest,Transpose4x4_Opt)915*4e366538SXin Li TEST_F(LibYUVRotateTest, Transpose4x4_Opt) {
916*4e366538SXin Li // dst width and height
917*4e366538SXin Li const int width = ((benchmark_width_ * benchmark_height_ + 3) / 4 + 3) & ~3;
918*4e366538SXin Li const int height = 4;
919*4e366538SXin Li align_buffer_page_end(src_pixels, height * width * 4);
920*4e366538SXin Li align_buffer_page_end(dst_pixels_c, width * height * 4);
921*4e366538SXin Li align_buffer_page_end(dst_pixels_opt, width * height * 4);
922*4e366538SXin Li
923*4e366538SXin Li MemRandomize(src_pixels, height * width * 4);
924*4e366538SXin Li memset(dst_pixels_c, 1, width * height * 4);
925*4e366538SXin Li memset(dst_pixels_opt, 2, width * height * 4);
926*4e366538SXin Li
927*4e366538SXin Li Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4,
928*4e366538SXin Li (uint8_t*)dst_pixels_c, width * 4, width);
929*4e366538SXin Li
930*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) {
931*4e366538SXin Li #if defined(HAS_TRANSPOSE4X4_32_NEON)
932*4e366538SXin Li if (TestCpuFlag(kCpuHasNEON)) {
933*4e366538SXin Li Transpose4x4_32_NEON((const uint8_t*)src_pixels, height * 4,
934*4e366538SXin Li (uint8_t*)dst_pixels_opt, width * 4, width);
935*4e366538SXin Li } else
936*4e366538SXin Li #elif defined(HAS_TRANSPOSE4X4_32_AVX2)
937*4e366538SXin Li if (TestCpuFlag(kCpuHasAVX2)) {
938*4e366538SXin Li Transpose4x4_32_AVX2((const uint8_t*)src_pixels, height * 4,
939*4e366538SXin Li (uint8_t*)dst_pixels_opt, width * 4, width);
940*4e366538SXin Li } else if (TestCpuFlag(kCpuHasSSE2)) {
941*4e366538SXin Li Transpose4x4_32_SSE2((const uint8_t*)src_pixels, height * 4,
942*4e366538SXin Li (uint8_t*)dst_pixels_opt, width * 4, width);
943*4e366538SXin Li } else
944*4e366538SXin Li #endif
945*4e366538SXin Li {
946*4e366538SXin Li Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4,
947*4e366538SXin Li (uint8_t*)dst_pixels_opt, width * 4, width);
948*4e366538SXin Li }
949*4e366538SXin Li }
950*4e366538SXin Li
951*4e366538SXin Li for (int i = 0; i < width * height; ++i) {
952*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
953*4e366538SXin Li }
954*4e366538SXin Li
955*4e366538SXin Li free_aligned_buffer_page_end(src_pixels);
956*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_c);
957*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_opt);
958*4e366538SXin Li }
959*4e366538SXin Li
960*4e366538SXin Li #endif // ENABLE_ROW_TESTS
961*4e366538SXin Li
962*4e366538SXin Li } // namespace libyuv
963