1 /*
2 * Copyright (c) 2022 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <assert.h>
12 #include "./vpx_dsp_rtcd.h"
13 #include "vpx_dsp/loongarch/vpx_convolve_lsx.h"
14
common_vt_8t_4w_lsx(const uint8_t * src,int32_t src_stride,uint8_t * dst,int32_t dst_stride,int8_t * filter,int32_t height)15 static void common_vt_8t_4w_lsx(const uint8_t *src, int32_t src_stride,
16 uint8_t *dst, int32_t dst_stride,
17 int8_t *filter, int32_t height) {
18 uint32_t loop_cnt = height >> 2;
19 int32_t src_stride2 = src_stride << 1;
20 int32_t src_stride3 = src_stride + src_stride2;
21 int32_t src_stride4 = src_stride2 << 1;
22 __m128i src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
23 __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
24 __m128i reg0, reg1, reg2, reg3, reg4;
25 __m128i filter0, filter1, filter2, filter3;
26 __m128i out0, out1;
27 uint8_t *_src = (uint8_t *)src - src_stride3;
28
29 DUP4_ARG2(__lsx_vldrepl_h, filter, 0, filter, 2, filter, 4, filter, 6,
30 filter0, filter1, filter2, filter3);
31 src0 = __lsx_vld(_src, 0);
32 DUP2_ARG2(__lsx_vldx, _src, src_stride, _src, src_stride2, src1, src2);
33 src3 = __lsx_vldx(_src, src_stride3);
34 _src += src_stride4;
35 src4 = __lsx_vld(_src, 0);
36 DUP2_ARG2(__lsx_vldx, _src, src_stride, _src, src_stride2, src5, src6);
37 _src += src_stride3;
38 DUP4_ARG2(__lsx_vilvl_b, src1, src0, src3, src2, src5, src4, src2, src1, tmp0,
39 tmp1, tmp2, tmp3);
40 DUP2_ARG2(__lsx_vilvl_b, src4, src3, src6, src5, tmp4, tmp5);
41 DUP2_ARG2(__lsx_vilvl_d, tmp3, tmp0, tmp4, tmp1, reg0, reg1);
42 reg2 = __lsx_vilvl_d(tmp5, tmp2);
43 DUP2_ARG2(__lsx_vxori_b, reg0, 128, reg1, 128, reg0, reg1);
44 reg2 = __lsx_vxori_b(reg2, 128);
45
46 for (; loop_cnt--;) {
47 src7 = __lsx_vld(_src, 0);
48 DUP2_ARG2(__lsx_vldx, _src, src_stride, _src, src_stride2, src8, src9);
49 src10 = __lsx_vldx(_src, src_stride3);
50 _src += src_stride4;
51 DUP4_ARG2(__lsx_vilvl_b, src7, src6, src8, src7, src9, src8, src10, src9,
52 tmp0, tmp1, tmp2, tmp3);
53 DUP2_ARG2(__lsx_vilvl_d, tmp1, tmp0, tmp3, tmp2, reg3, reg4);
54 DUP2_ARG2(__lsx_vxori_b, reg3, 128, reg4, 128, reg3, reg4);
55 out0 = filt_8tap_dpadd_s_h(reg0, reg1, reg2, reg3, filter0, filter1,
56 filter2, filter3);
57 out1 = filt_8tap_dpadd_s_h(reg1, reg2, reg3, reg4, filter0, filter1,
58 filter2, filter3);
59 out0 = __lsx_vssrarni_b_h(out1, out0, 7);
60 out0 = __lsx_vxori_b(out0, 128);
61 __lsx_vstelm_w(out0, dst, 0, 0);
62 dst += dst_stride;
63 __lsx_vstelm_w(out0, dst, 0, 1);
64 dst += dst_stride;
65 __lsx_vstelm_w(out0, dst, 0, 2);
66 dst += dst_stride;
67 __lsx_vstelm_w(out0, dst, 0, 3);
68 dst += dst_stride;
69
70 reg0 = reg2;
71 reg1 = reg3;
72 reg2 = reg4;
73 src6 = src10;
74 }
75 }
76
common_vt_8t_8w_lsx(const uint8_t * src,int32_t src_stride,uint8_t * dst,int32_t dst_stride,int8_t * filter,int32_t height)77 static void common_vt_8t_8w_lsx(const uint8_t *src, int32_t src_stride,
78 uint8_t *dst, int32_t dst_stride,
79 int8_t *filter, int32_t height) {
80 uint32_t loop_cnt = height >> 2;
81 __m128i src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
82 __m128i tmp0, tmp1, tmp2, tmp3;
83 __m128i reg0, reg1, reg2, reg3, reg4, reg5;
84 __m128i filter0, filter1, filter2, filter3;
85 __m128i out0, out1, out2, out3;
86 int32_t src_stride2 = src_stride << 1;
87 int32_t src_stride3 = src_stride + src_stride2;
88 int32_t src_stride4 = src_stride2 << 1;
89 src = src - src_stride3;
90
91 DUP4_ARG2(__lsx_vldrepl_h, filter, 0, filter, 2, filter, 4, filter, 6,
92 filter0, filter1, filter2, filter3);
93
94 src0 = __lsx_vld(src, 0);
95 DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride2, src1, src2);
96 src3 = __lsx_vldx(src, src_stride3);
97 src += src_stride4;
98 src4 = __lsx_vld(src, 0);
99 DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride2, src5, src6);
100 src += src_stride3;
101
102 DUP4_ARG2(__lsx_vxori_b, src0, 128, src1, 128, src2, 128, src3, 128, src0,
103 src1, src2, src3);
104 DUP2_ARG2(__lsx_vxori_b, src4, 128, src5, 128, src4, src5);
105 src6 = __lsx_vxori_b(src6, 128);
106 DUP4_ARG2(__lsx_vilvl_b, src1, src0, src3, src2, src5, src4, src2, src1, reg0,
107 reg1, reg2, reg3);
108 DUP2_ARG2(__lsx_vilvl_b, src4, src3, src6, src5, reg4, reg5);
109
110 for (; loop_cnt--;) {
111 src7 = __lsx_vld(src, 0);
112 DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride2, src8, src9);
113 src10 = __lsx_vldx(src, src_stride3);
114 src += src_stride4;
115 DUP4_ARG2(__lsx_vxori_b, src7, 128, src8, 128, src9, 128, src10, 128, src7,
116 src8, src9, src10);
117 DUP4_ARG2(__lsx_vilvl_b, src7, src6, src8, src7, src9, src8, src10, src9,
118 tmp0, tmp1, tmp2, tmp3);
119 out0 = filt_8tap_dpadd_s_h(reg0, reg1, reg2, tmp0, filter0, filter1,
120 filter2, filter3);
121 out1 = filt_8tap_dpadd_s_h(reg3, reg4, reg5, tmp1, filter0, filter1,
122 filter2, filter3);
123 out2 = filt_8tap_dpadd_s_h(reg1, reg2, tmp0, tmp2, filter0, filter1,
124 filter2, filter3);
125 out3 = filt_8tap_dpadd_s_h(reg4, reg5, tmp1, tmp3, filter0, filter1,
126 filter2, filter3);
127 DUP2_ARG3(__lsx_vssrarni_b_h, out1, out0, 7, out3, out2, 7, out0, out1);
128 DUP2_ARG2(__lsx_vxori_b, out0, 128, out1, 128, out0, out1);
129 __lsx_vstelm_d(out0, dst, 0, 0);
130 dst += dst_stride;
131 __lsx_vstelm_d(out0, dst, 0, 1);
132 dst += dst_stride;
133 __lsx_vstelm_d(out1, dst, 0, 0);
134 dst += dst_stride;
135 __lsx_vstelm_d(out1, dst, 0, 1);
136 dst += dst_stride;
137
138 reg0 = reg2;
139 reg1 = tmp0;
140 reg2 = tmp2;
141 reg3 = reg5;
142 reg4 = tmp1;
143 reg5 = tmp3;
144 src6 = src10;
145 }
146 }
147
common_vt_8t_16w_lsx(const uint8_t * src,int32_t src_stride,uint8_t * dst,int32_t dst_stride,int8_t * filter,int32_t height)148 static void common_vt_8t_16w_lsx(const uint8_t *src, int32_t src_stride,
149 uint8_t *dst, int32_t dst_stride,
150 int8_t *filter, int32_t height) {
151 uint32_t loop_cnt = height >> 2;
152 __m128i src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
153 __m128i filter0, filter1, filter2, filter3;
154 __m128i reg0, reg1, reg2, reg3, reg4, reg5;
155 __m128i reg6, reg7, reg8, reg9, reg10, reg11;
156 __m128i tmp0, tmp1, tmp2, tmp3;
157 int32_t src_stride2 = src_stride << 1;
158 int32_t src_stride3 = src_stride + src_stride2;
159 int32_t src_stride4 = src_stride2 << 1;
160 // uint8_t *_src = (uint8_t *)src - src_stride3;
161 src -= src_stride3;
162 DUP4_ARG2(__lsx_vldrepl_h, filter, 0, filter, 2, filter, 4, filter, 6,
163 filter0, filter1, filter2, filter3);
164
165 src0 = __lsx_vld(src, 0);
166 DUP4_ARG2(__lsx_vldx, src, src_stride, src, src_stride2, src, src_stride3,
167 src, src_stride4, src1, src2, src3, src4);
168 src += src_stride4;
169 DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride2, src5, src6);
170 src += src_stride3;
171
172 DUP4_ARG2(__lsx_vxori_b, src0, 128, src1, 128, src2, 128, src3, 128, src0,
173 src1, src2, src3);
174 DUP2_ARG2(__lsx_vxori_b, src4, 128, src5, 128, src4, src5);
175 src6 = __lsx_vxori_b(src6, 128);
176 DUP4_ARG2(__lsx_vilvl_b, src1, src0, src3, src2, src5, src4, src2, src1, reg0,
177 reg1, reg2, reg3);
178 DUP2_ARG2(__lsx_vilvl_b, src4, src3, src6, src5, reg4, reg5);
179 DUP4_ARG2(__lsx_vilvh_b, src1, src0, src3, src2, src5, src4, src2, src1, reg6,
180 reg7, reg8, reg9);
181 DUP2_ARG2(__lsx_vilvh_b, src4, src3, src6, src5, reg10, reg11);
182
183 for (; loop_cnt--;) {
184 src7 = __lsx_vld(src, 0);
185 DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride2, src8, src9);
186 src10 = __lsx_vldx(src, src_stride3);
187 src += src_stride4;
188
189 DUP4_ARG2(__lsx_vxori_b, src7, 128, src8, 128, src9, 128, src10, 128, src7,
190 src8, src9, src10);
191 DUP4_ARG2(__lsx_vilvl_b, src7, src6, src8, src7, src9, src8, src10, src9,
192 src0, src1, src2, src3);
193 DUP4_ARG2(__lsx_vilvh_b, src7, src6, src8, src7, src9, src8, src10, src9,
194 src4, src5, src7, src8);
195 tmp0 = filt_8tap_dpadd_s_h(reg0, reg1, reg2, src0, filter0, filter1,
196 filter2, filter3);
197 tmp1 = filt_8tap_dpadd_s_h(reg3, reg4, reg5, src1, filter0, filter1,
198 filter2, filter3);
199 tmp2 = filt_8tap_dpadd_s_h(reg6, reg7, reg8, src4, filter0, filter1,
200 filter2, filter3);
201 tmp3 = filt_8tap_dpadd_s_h(reg9, reg10, reg11, src5, filter0, filter1,
202 filter2, filter3);
203 DUP2_ARG3(__lsx_vssrarni_b_h, tmp2, tmp0, 7, tmp3, tmp1, 7, tmp0, tmp1);
204 DUP2_ARG2(__lsx_vxori_b, tmp0, 128, tmp1, 128, tmp0, tmp1);
205 __lsx_vst(tmp0, dst, 0);
206 dst += dst_stride;
207 __lsx_vst(tmp1, dst, 0);
208 dst += dst_stride;
209 tmp0 = filt_8tap_dpadd_s_h(reg1, reg2, src0, src2, filter0, filter1,
210 filter2, filter3);
211 tmp1 = filt_8tap_dpadd_s_h(reg4, reg5, src1, src3, filter0, filter1,
212 filter2, filter3);
213 tmp2 = filt_8tap_dpadd_s_h(reg7, reg8, src4, src7, filter0, filter1,
214 filter2, filter3);
215 tmp3 = filt_8tap_dpadd_s_h(reg10, reg11, src5, src8, filter0, filter1,
216 filter2, filter3);
217 DUP2_ARG3(__lsx_vssrarni_b_h, tmp2, tmp0, 7, tmp3, tmp1, 7, tmp0, tmp1);
218 DUP2_ARG2(__lsx_vxori_b, tmp0, 128, tmp1, 128, tmp0, tmp1);
219 __lsx_vst(tmp0, dst, 0);
220 dst += dst_stride;
221 __lsx_vst(tmp1, dst, 0);
222 dst += dst_stride;
223
224 reg0 = reg2;
225 reg1 = src0;
226 reg2 = src2;
227 reg3 = reg5;
228 reg4 = src1;
229 reg5 = src3;
230 reg6 = reg8;
231 reg7 = src4;
232 reg8 = src7;
233 reg9 = reg11;
234 reg10 = src5;
235 reg11 = src8;
236 src6 = src10;
237 }
238 }
239
common_vt_8t_16w_mult_lsx(const uint8_t * src,int32_t src_stride,uint8_t * dst,int32_t dst_stride,int8_t * filter,int32_t height,int32_t width)240 static void common_vt_8t_16w_mult_lsx(const uint8_t *src, int32_t src_stride,
241 uint8_t *dst, int32_t dst_stride,
242 int8_t *filter, int32_t height,
243 int32_t width) {
244 uint8_t *src_tmp;
245 uint8_t *dst_tmp;
246 uint32_t cnt = width >> 4;
247 __m128i src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
248 __m128i filter0, filter1, filter2, filter3;
249 __m128i reg0, reg1, reg2, reg3, reg4, reg5;
250 __m128i reg6, reg7, reg8, reg9, reg10, reg11;
251 __m128i tmp0, tmp1, tmp2, tmp3;
252 int32_t src_stride2 = src_stride << 1;
253 int32_t src_stride3 = src_stride + src_stride2;
254 int32_t src_stride4 = src_stride2 << 1;
255 int32_t dst_stride2 = dst_stride << 1;
256 int32_t dst_stride3 = dst_stride2 + dst_stride;
257 int32_t dst_stride4 = dst_stride2 << 1;
258 src -= src_stride3;
259 DUP4_ARG2(__lsx_vldrepl_h, filter, 0, filter, 2, filter, 4, filter, 6,
260 filter0, filter1, filter2, filter3);
261
262 for (; cnt--;) {
263 uint32_t loop_cnt = height >> 2;
264
265 src_tmp = src;
266 dst_tmp = dst;
267
268 src0 = __lsx_vld(src_tmp, 0);
269 DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride2, src1,
270 src2);
271 src3 = __lsx_vldx(src_tmp, src_stride3);
272 src_tmp += src_stride4;
273 src4 = __lsx_vld(src_tmp, 0);
274 DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride2, src5,
275 src6);
276 src_tmp += src_stride3;
277
278 DUP4_ARG2(__lsx_vxori_b, src0, 128, src1, 128, src2, 128, src3, 128, src0,
279 src1, src2, src3);
280 DUP2_ARG2(__lsx_vxori_b, src4, 128, src5, 128, src4, src5);
281 src6 = __lsx_vxori_b(src6, 128);
282 DUP4_ARG2(__lsx_vilvl_b, src1, src0, src3, src2, src5, src4, src2, src1,
283 reg0, reg1, reg2, reg3);
284 DUP2_ARG2(__lsx_vilvl_b, src4, src3, src6, src5, reg4, reg5);
285 DUP4_ARG2(__lsx_vilvh_b, src1, src0, src3, src2, src5, src4, src2, src1,
286 reg6, reg7, reg8, reg9);
287 DUP2_ARG2(__lsx_vilvh_b, src4, src3, src6, src5, reg10, reg11);
288
289 for (; loop_cnt--;) {
290 src7 = __lsx_vld(src_tmp, 0);
291 DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride2, src8,
292 src9);
293 src10 = __lsx_vldx(src_tmp, src_stride3);
294 src_tmp += src_stride4;
295 DUP4_ARG2(__lsx_vxori_b, src7, 128, src8, 128, src9, 128, src10, 128,
296 src7, src8, src9, src10);
297 DUP4_ARG2(__lsx_vilvl_b, src7, src6, src8, src7, src9, src8, src10, src9,
298 src0, src1, src2, src3);
299 DUP4_ARG2(__lsx_vilvh_b, src7, src6, src8, src7, src9, src8, src10, src9,
300 src4, src5, src7, src8);
301 tmp0 = filt_8tap_dpadd_s_h(reg0, reg1, reg2, src0, filter0, filter1,
302 filter2, filter3);
303 tmp1 = filt_8tap_dpadd_s_h(reg3, reg4, reg5, src1, filter0, filter1,
304 filter2, filter3);
305 tmp2 = filt_8tap_dpadd_s_h(reg6, reg7, reg8, src4, filter0, filter1,
306 filter2, filter3);
307 tmp3 = filt_8tap_dpadd_s_h(reg9, reg10, reg11, src5, filter0, filter1,
308 filter2, filter3);
309 DUP2_ARG3(__lsx_vssrarni_b_h, tmp2, tmp0, 7, tmp3, tmp1, 7, tmp0, tmp1);
310 DUP2_ARG2(__lsx_vxori_b, tmp0, 128, tmp1, 128, tmp0, tmp1);
311 __lsx_vst(tmp0, dst_tmp, 0);
312 __lsx_vstx(tmp1, dst_tmp, dst_stride);
313 tmp0 = filt_8tap_dpadd_s_h(reg1, reg2, src0, src2, filter0, filter1,
314 filter2, filter3);
315 tmp1 = filt_8tap_dpadd_s_h(reg4, reg5, src1, src3, filter0, filter1,
316 filter2, filter3);
317 tmp2 = filt_8tap_dpadd_s_h(reg7, reg8, src4, src7, filter0, filter1,
318 filter2, filter3);
319 tmp3 = filt_8tap_dpadd_s_h(reg10, reg11, src5, src8, filter0, filter1,
320 filter2, filter3);
321 DUP2_ARG3(__lsx_vssrarni_b_h, tmp2, tmp0, 7, tmp3, tmp1, 7, tmp0, tmp1);
322 DUP2_ARG2(__lsx_vxori_b, tmp0, 128, tmp1, 128, tmp0, tmp1);
323 __lsx_vstx(tmp0, dst_tmp, dst_stride2);
324 __lsx_vstx(tmp1, dst_tmp, dst_stride3);
325 dst_tmp += dst_stride4;
326
327 reg0 = reg2;
328 reg1 = src0;
329 reg2 = src2;
330 reg3 = reg5;
331 reg4 = src1;
332 reg5 = src3;
333 reg6 = reg8;
334 reg7 = src4;
335 reg8 = src7;
336 reg9 = reg11;
337 reg10 = src5;
338 reg11 = src8;
339 src6 = src10;
340 }
341 src += 16;
342 dst += 16;
343 }
344 }
345
common_vt_8t_32w_lsx(const uint8_t * src,int32_t src_stride,uint8_t * dst,int32_t dst_stride,int8_t * filter,int32_t height)346 static void common_vt_8t_32w_lsx(const uint8_t *src, int32_t src_stride,
347 uint8_t *dst, int32_t dst_stride,
348 int8_t *filter, int32_t height) {
349 common_vt_8t_16w_mult_lsx(src, src_stride, dst, dst_stride, filter, height,
350 32);
351 }
352
common_vt_8t_64w_lsx(const uint8_t * src,int32_t src_stride,uint8_t * dst,int32_t dst_stride,int8_t * filter,int32_t height)353 static void common_vt_8t_64w_lsx(const uint8_t *src, int32_t src_stride,
354 uint8_t *dst, int32_t dst_stride,
355 int8_t *filter, int32_t height) {
356 common_vt_8t_16w_mult_lsx(src, src_stride, dst, dst_stride, filter, height,
357 64);
358 }
359
common_vt_2t_4x4_lsx(const uint8_t * src,int32_t src_stride,uint8_t * dst,int32_t dst_stride,int8_t * filter)360 static void common_vt_2t_4x4_lsx(const uint8_t *src, int32_t src_stride,
361 uint8_t *dst, int32_t dst_stride,
362 int8_t *filter) {
363 __m128i src0, src1, src2, src3, src4;
364 __m128i vec0, vec1, vec2, vec3, vec4, vec5;
365 __m128i filt0, tmp0, tmp1;
366
367 int32_t src_stride2 = src_stride << 1;
368 int32_t src_stride3 = src_stride2 + src_stride;
369 int32_t src_stride4 = src_stride2 << 1;
370 int32_t dst_stride2 = dst_stride << 1;
371 int32_t dst_stride3 = dst_stride2 + dst_stride;
372
373 filt0 = __lsx_vldrepl_h(filter, 0);
374
375 src0 = __lsx_vld(src, 0);
376 DUP4_ARG2(__lsx_vldx, src, src_stride, src, src_stride2, src, src_stride3,
377 src, src_stride4, src1, src2, src3, src4);
378 src += (src_stride4 + src_stride);
379
380 DUP4_ARG2(__lsx_vilvl_b, src1, src0, src2, src1, src3, src2, src4, src3, vec0,
381 vec1, vec2, vec3);
382 DUP2_ARG2(__lsx_vilvl_d, vec1, vec0, vec3, vec2, vec4, vec5);
383 DUP2_ARG2(__lsx_vdp2_h_bu, vec4, filt0, vec5, filt0, tmp0, tmp1);
384 tmp0 = __lsx_vssrarni_bu_h(tmp1, tmp0, FILTER_BITS);
385
386 __lsx_vstelm_w(tmp0, dst, 0, 0);
387 __lsx_vstelm_w(tmp0, dst + dst_stride, 0, 1);
388 __lsx_vstelm_w(tmp0, dst + dst_stride2, 0, 2);
389 __lsx_vstelm_w(tmp0, dst + dst_stride3, 0, 3);
390 }
391
common_vt_2t_4x8_lsx(const uint8_t * src,int32_t src_stride,uint8_t * dst,int32_t dst_stride,int8_t * filter)392 static void common_vt_2t_4x8_lsx(const uint8_t *src, int32_t src_stride,
393 uint8_t *dst, int32_t dst_stride,
394 int8_t *filter) {
395 __m128i src0, src1, src2, src3, src4, src5, src6, src7, src8;
396 __m128i vec0, vec1, vec2, vec3, vec4, vec5;
397 __m128i vec6, vec7, vec8, vec9, vec10, vec11;
398 __m128i tmp0, tmp1, tmp2, tmp3;
399 __m128i filt0;
400
401 int32_t src_stride2 = src_stride << 1;
402 int32_t src_stride3 = src_stride2 + src_stride;
403 int32_t src_stride4 = src_stride2 << 1;
404 int32_t dst_stride2 = dst_stride << 1;
405 int32_t dst_stride3 = dst_stride2 + dst_stride;
406 int32_t dst_stride4 = dst_stride2 << 1;
407 uint8_t *dst_tmp1 = dst + dst_stride4;
408
409 filt0 = __lsx_vldrepl_h(filter, 0);
410 src0 = __lsx_vld(src, 0);
411 DUP4_ARG2(__lsx_vldx, src, src_stride, src, src_stride2, src, src_stride3,
412 src, src_stride4, src1, src2, src3, src4);
413 src += src_stride4;
414 DUP4_ARG2(__lsx_vldx, src, src_stride, src, src_stride2, src, src_stride3,
415 src, src_stride4, src5, src6, src7, src8);
416 src += (src_stride4 + src_stride);
417
418 DUP4_ARG2(__lsx_vilvl_b, src1, src0, src2, src1, src3, src2, src4, src3, vec0,
419 vec1, vec2, vec3);
420 DUP4_ARG2(__lsx_vilvl_b, src5, src4, src6, src5, src7, src6, src8, src7, vec4,
421 vec5, vec6, vec7);
422 DUP4_ARG2(__lsx_vilvl_d, vec1, vec0, vec3, vec2, vec5, vec4, vec7, vec6, vec8,
423 vec9, vec10, vec11);
424
425 DUP4_ARG2(__lsx_vdp2_h_bu, vec8, filt0, vec9, filt0, vec10, filt0, vec11,
426 filt0, tmp0, tmp1, tmp2, tmp3);
427 DUP2_ARG3(__lsx_vssrarni_bu_h, tmp1, tmp0, FILTER_BITS, tmp3, tmp2,
428 FILTER_BITS, tmp0, tmp1);
429
430 __lsx_vstelm_w(tmp0, dst, 0, 0);
431 __lsx_vstelm_w(tmp0, dst + dst_stride, 0, 1);
432 __lsx_vstelm_w(tmp0, dst + dst_stride2, 0, 2);
433 __lsx_vstelm_w(tmp0, dst + dst_stride3, 0, 3);
434
435 __lsx_vstelm_w(tmp1, dst_tmp1, 0, 0);
436 __lsx_vstelm_w(tmp1, dst_tmp1 + dst_stride, 0, 1);
437 __lsx_vstelm_w(tmp1, dst_tmp1 + dst_stride2, 0, 2);
438 __lsx_vstelm_w(tmp1, dst_tmp1 + dst_stride3, 0, 3);
439 }
440
common_vt_2t_4w_lsx(const uint8_t * src,int32_t src_stride,uint8_t * dst,int32_t dst_stride,int8_t * filter,int32_t height)441 static void common_vt_2t_4w_lsx(const uint8_t *src, int32_t src_stride,
442 uint8_t *dst, int32_t dst_stride,
443 int8_t *filter, int32_t height) {
444 if (height == 4) {
445 common_vt_2t_4x4_lsx(src, src_stride, dst, dst_stride, filter);
446 } else if (height == 8) {
447 common_vt_2t_4x8_lsx(src, src_stride, dst, dst_stride, filter);
448 }
449 }
450
common_vt_2t_8x4_lsx(const uint8_t * src,int32_t src_stride,uint8_t * dst,int32_t dst_stride,int8_t * filter)451 static void common_vt_2t_8x4_lsx(const uint8_t *src, int32_t src_stride,
452 uint8_t *dst, int32_t dst_stride,
453 int8_t *filter) {
454 __m128i src0, src1, src2, src3, src4, vec0, vec1, vec2, vec3, filt0;
455 __m128i out0, out1, tmp0, tmp1, tmp2, tmp3;
456
457 int32_t src_stride2 = src_stride << 1;
458 int32_t src_stride3 = src_stride2 + src_stride;
459 int32_t src_stride4 = src_stride2 << 1;
460 int32_t dst_stride2 = dst_stride << 1;
461 int32_t dst_stride3 = dst_stride2 + dst_stride;
462
463 filt0 = __lsx_vldrepl_h(filter, 0);
464
465 src0 = __lsx_vld(src, 0);
466 DUP4_ARG2(__lsx_vldx, src, src_stride, src, src_stride2, src, src_stride3,
467 src, src_stride4, src1, src2, src3, src4);
468
469 DUP4_ARG2(__lsx_vilvl_b, src1, src0, src2, src1, src3, src2, src4, src3, vec0,
470 vec1, vec2, vec3);
471 DUP4_ARG2(__lsx_vdp2_h_bu, vec0, filt0, vec1, filt0, vec2, filt0, vec3, filt0,
472 tmp0, tmp1, tmp2, tmp3);
473 DUP2_ARG3(__lsx_vssrarni_bu_h, tmp1, tmp0, FILTER_BITS, tmp3, tmp2,
474 FILTER_BITS, out0, out1);
475
476 __lsx_vstelm_d(out0, dst, 0, 0);
477 __lsx_vstelm_d(out0, dst + dst_stride, 0, 1);
478 __lsx_vstelm_d(out1, dst + dst_stride2, 0, 0);
479 __lsx_vstelm_d(out1, dst + dst_stride3, 0, 1);
480 }
481
common_vt_2t_8x8mult_lsx(const uint8_t * src,int32_t src_stride,uint8_t * dst,int32_t dst_stride,int8_t * filter,int32_t height)482 static void common_vt_2t_8x8mult_lsx(const uint8_t *src, int32_t src_stride,
483 uint8_t *dst, int32_t dst_stride,
484 int8_t *filter, int32_t height) {
485 uint32_t loop_cnt = (height >> 3);
486 __m128i src0, src1, src2, src3, src4, src5, src6, src7, src8;
487 __m128i vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
488 __m128i out0, out1, tmp0, tmp1, tmp2, tmp3;
489
490 int32_t src_stride2 = src_stride << 1;
491 int32_t src_stride3 = src_stride2 + src_stride;
492 int32_t src_stride4 = src_stride2 << 1;
493 int32_t dst_stride2 = dst_stride << 1;
494 int32_t dst_stride3 = dst_stride2 + dst_stride;
495 int32_t dst_stride4 = dst_stride2 << 1;
496
497 filt0 = __lsx_vldrepl_h(filter, 0);
498 src0 = __lsx_vld(src, 0);
499 src += src_stride;
500
501 for (; loop_cnt--;) {
502 src1 = __lsx_vld(src, 0);
503 DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride2, src2, src3);
504 src4 = __lsx_vldx(src, src_stride3);
505 src += src_stride4;
506 src5 = __lsx_vld(src, 0);
507 DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride2, src6, src7)
508 src8 = __lsx_vldx(src, src_stride3);
509 src += src_stride4;
510
511 DUP4_ARG2(__lsx_vilvl_b, src1, src0, src2, src1, src3, src2, src4, src3,
512 vec0, vec1, vec2, vec3);
513 DUP4_ARG2(__lsx_vilvl_b, src5, src4, src6, src5, src7, src6, src8, src7,
514 vec4, vec5, vec6, vec7);
515 DUP4_ARG2(__lsx_vdp2_h_bu, vec0, filt0, vec1, filt0, vec2, filt0, vec3,
516 filt0, tmp0, tmp1, tmp2, tmp3);
517 DUP2_ARG3(__lsx_vssrarni_bu_h, tmp1, tmp0, FILTER_BITS, tmp3, tmp2,
518 FILTER_BITS, out0, out1);
519
520 __lsx_vstelm_d(out0, dst, 0, 0);
521 __lsx_vstelm_d(out0, dst + dst_stride, 0, 1);
522 __lsx_vstelm_d(out1, dst + dst_stride2, 0, 0);
523 __lsx_vstelm_d(out1, dst + dst_stride3, 0, 1);
524 dst += dst_stride4;
525
526 DUP4_ARG2(__lsx_vdp2_h_bu, vec4, filt0, vec5, filt0, vec6, filt0, vec7,
527 filt0, tmp0, tmp1, tmp2, tmp3);
528 DUP2_ARG3(__lsx_vssrarni_bu_h, tmp1, tmp0, FILTER_BITS, tmp3, tmp2,
529 FILTER_BITS, out0, out1);
530
531 __lsx_vstelm_d(out0, dst, 0, 0);
532 __lsx_vstelm_d(out0, dst + dst_stride, 0, 1);
533 __lsx_vstelm_d(out1, dst + dst_stride2, 0, 0);
534 __lsx_vstelm_d(out1, dst + dst_stride3, 0, 1);
535 dst += dst_stride4;
536
537 src0 = src8;
538 }
539 }
540
common_vt_2t_8w_lsx(const uint8_t * src,int32_t src_stride,uint8_t * dst,int32_t dst_stride,int8_t * filter,int32_t height)541 static void common_vt_2t_8w_lsx(const uint8_t *src, int32_t src_stride,
542 uint8_t *dst, int32_t dst_stride,
543 int8_t *filter, int32_t height) {
544 if (height == 4) {
545 common_vt_2t_8x4_lsx(src, src_stride, dst, dst_stride, filter);
546 } else {
547 common_vt_2t_8x8mult_lsx(src, src_stride, dst, dst_stride, filter, height);
548 }
549 }
550
common_vt_2t_16w_lsx(const uint8_t * src,int32_t src_stride,uint8_t * dst,int32_t dst_stride,int8_t * filter,int32_t height)551 static void common_vt_2t_16w_lsx(const uint8_t *src, int32_t src_stride,
552 uint8_t *dst, int32_t dst_stride,
553 int8_t *filter, int32_t height) {
554 uint32_t loop_cnt = (height >> 2);
555 __m128i src0, src1, src2, src3, src4, tmp, tmp0, tmp1;
556 __m128i vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
557 int32_t src_stride2 = src_stride << 1;
558 int32_t src_stride3 = src_stride2 + src_stride;
559 int32_t src_stride4 = src_stride2 << 1;
560
561 filt0 = __lsx_vldrepl_h(filter, 0);
562
563 src0 = __lsx_vld(src, 0);
564 src += src_stride;
565
566 for (; loop_cnt--;) {
567 src1 = __lsx_vld(src, 0);
568 DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride2, src2, src3);
569 src4 = __lsx_vldx(src, src_stride3);
570 src += src_stride4;
571
572 DUP2_ARG2(__lsx_vilvl_b, src1, src0, src2, src1, vec0, vec2);
573 DUP2_ARG2(__lsx_vilvh_b, src1, src0, src2, src1, vec1, vec3);
574 DUP2_ARG2(__lsx_vdp2_h_bu, vec0, filt0, vec1, filt0, tmp0, tmp1);
575 tmp = __lsx_vssrarni_bu_h(tmp1, tmp0, FILTER_BITS);
576 __lsx_vst(tmp, dst, 0);
577 dst += dst_stride;
578
579 DUP2_ARG2(__lsx_vilvl_b, src3, src2, src4, src3, vec4, vec6);
580 DUP2_ARG2(__lsx_vilvh_b, src3, src2, src4, src3, vec5, vec7);
581 DUP2_ARG2(__lsx_vdp2_h_bu, vec2, filt0, vec3, filt0, tmp0, tmp1);
582 tmp = __lsx_vssrarni_bu_h(tmp1, tmp0, FILTER_BITS);
583 __lsx_vst(tmp, dst, 0);
584 dst += dst_stride;
585
586 DUP2_ARG2(__lsx_vdp2_h_bu, vec4, filt0, vec5, filt0, tmp0, tmp1);
587 tmp = __lsx_vssrarni_bu_h(tmp1, tmp0, FILTER_BITS);
588 __lsx_vst(tmp, dst, 0);
589 dst += dst_stride;
590
591 DUP2_ARG2(__lsx_vdp2_h_bu, vec6, filt0, vec7, filt0, tmp0, tmp1);
592 tmp = __lsx_vssrarni_bu_h(tmp1, tmp0, FILTER_BITS);
593 __lsx_vst(tmp, dst, 0);
594 dst += dst_stride;
595
596 src0 = src4;
597 }
598 }
599
common_vt_2t_32w_lsx(const uint8_t * src,int32_t src_stride,uint8_t * dst,int32_t dst_stride,int8_t * filter,int32_t height)600 static void common_vt_2t_32w_lsx(const uint8_t *src, int32_t src_stride,
601 uint8_t *dst, int32_t dst_stride,
602 int8_t *filter, int32_t height) {
603 uint32_t loop_cnt = (height >> 2);
604 __m128i src0, src1, src2, src3, src4, src5, src6, src7, src8, src9;
605 __m128i vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
606 __m128i tmp, tmp0, tmp1;
607
608 int32_t src_stride2 = src_stride << 1;
609 int32_t src_stride3 = src_stride2 + src_stride;
610 int32_t src_stride4 = src_stride2 << 1;
611 int32_t dst_stride2 = dst_stride << 1;
612 int32_t dst_stride3 = dst_stride2 + dst_stride;
613 uint8_t *src_tmp;
614
615 filt0 = __lsx_vldrepl_h(filter, 0);
616
617 DUP2_ARG2(__lsx_vld, src, 0, src, 16, src0, src5);
618 src += src_stride;
619 src_tmp = src + 16;
620
621 for (; loop_cnt--;) {
622 DUP2_ARG2(__lsx_vld, src, 0, src_tmp, 0, src1, src6);
623 DUP4_ARG2(__lsx_vldx, src, src_stride, src_tmp, src_stride, src,
624 src_stride2, src_tmp, src_stride2, src2, src7, src3, src8);
625 DUP2_ARG2(__lsx_vldx, src, src_stride3, src_tmp, src_stride3, src4, src9);
626 DUP2_ARG2(__lsx_vilvl_b, src1, src0, src2, src1, vec0, vec2);
627 DUP2_ARG2(__lsx_vilvh_b, src1, src0, src2, src1, vec1, vec3);
628 src += src_stride4;
629 src_tmp += src_stride4;
630
631 DUP2_ARG2(__lsx_vdp2_h_bu, vec0, filt0, vec1, filt0, tmp0, tmp1);
632 tmp = __lsx_vssrarni_bu_h(tmp1, tmp0, FILTER_BITS);
633 __lsx_vst(tmp, dst, 0);
634
635 DUP2_ARG2(__lsx_vdp2_h_bu, vec2, filt0, vec3, filt0, tmp0, tmp1);
636 tmp = __lsx_vssrarni_bu_h(tmp1, tmp0, FILTER_BITS);
637 __lsx_vstx(tmp, dst, dst_stride);
638
639 DUP2_ARG2(__lsx_vilvl_b, src3, src2, src4, src3, vec4, vec6);
640 DUP2_ARG2(__lsx_vilvh_b, src3, src2, src4, src3, vec5, vec7);
641 DUP2_ARG2(__lsx_vdp2_h_bu, vec4, filt0, vec5, filt0, tmp0, tmp1);
642 tmp = __lsx_vssrarni_bu_h(tmp1, tmp0, FILTER_BITS);
643 __lsx_vstx(tmp, dst, dst_stride2);
644
645 DUP2_ARG2(__lsx_vdp2_h_bu, vec6, filt0, vec7, filt0, tmp0, tmp1);
646 tmp = __lsx_vssrarni_bu_h(tmp1, tmp0, FILTER_BITS);
647 __lsx_vstx(tmp, dst, dst_stride3);
648
649 DUP2_ARG2(__lsx_vilvl_b, src6, src5, src7, src6, vec0, vec2);
650 DUP2_ARG2(__lsx_vilvh_b, src6, src5, src7, src6, vec1, vec3);
651 DUP2_ARG2(__lsx_vdp2_h_bu, vec0, filt0, vec1, filt0, tmp0, tmp1);
652 tmp = __lsx_vssrarni_bu_h(tmp1, tmp0, FILTER_BITS);
653 __lsx_vst(tmp, dst, 16);
654
655 DUP2_ARG2(__lsx_vdp2_h_bu, vec2, filt0, vec3, filt0, tmp0, tmp1);
656 tmp = __lsx_vssrarni_bu_h(tmp1, tmp0, FILTER_BITS);
657 dst += dst_stride;
658 __lsx_vst(tmp, dst, 16);
659
660 DUP2_ARG2(__lsx_vilvl_b, src8, src7, src9, src8, vec4, vec6);
661 DUP2_ARG2(__lsx_vilvh_b, src8, src7, src9, src8, vec5, vec7);
662 DUP2_ARG2(__lsx_vdp2_h_bu, vec4, filt0, vec5, filt0, tmp0, tmp1);
663 tmp = __lsx_vssrarni_bu_h(tmp1, tmp0, FILTER_BITS);
664 dst += dst_stride;
665 __lsx_vst(tmp, dst, 16);
666
667 DUP2_ARG2(__lsx_vdp2_h_bu, vec6, filt0, vec7, filt0, tmp0, tmp1);
668 tmp = __lsx_vssrarni_bu_h(tmp1, tmp0, FILTER_BITS);
669 dst += dst_stride;
670 __lsx_vst(tmp, dst, 16);
671
672 dst += dst_stride;
673
674 src0 = src4;
675 src5 = src9;
676 }
677 }
678
common_vt_2t_64w_lsx(const uint8_t * src,int32_t src_stride,uint8_t * dst,int32_t dst_stride,int8_t * filter,int32_t height)679 static void common_vt_2t_64w_lsx(const uint8_t *src, int32_t src_stride,
680 uint8_t *dst, int32_t dst_stride,
681 int8_t *filter, int32_t height) {
682 uint32_t loop_cnt = (height >> 1);
683 __m128i src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
684 __m128i src11, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
685 __m128i tmp, tmp0, tmp1;
686
687 int32_t src_stride2 = src_stride << 1;
688 int32_t dst_stride2 = dst_stride << 1;
689 uint8_t *dst_tmp1 = dst + dst_stride;
690
691 filt0 = __lsx_vldrepl_h(filter, 0);
692
693 DUP4_ARG2(__lsx_vld, src, 0, src, 16, src, 32, src, 48, src0, src3, src6,
694 src9);
695 src += src_stride;
696
697 for (; loop_cnt--;) {
698 uint8_t *src_tmp0 = src + src_stride;
699
700 DUP4_ARG2(__lsx_vld, src, 0, src, 16, src, 32, src, 48, src1, src4, src7,
701 src10);
702 DUP4_ARG2(__lsx_vld, src_tmp0, 0, src_tmp0, 16, src_tmp0, 32, src_tmp0, 48,
703 src2, src5, src8, src11);
704 src += src_stride2;
705
706 DUP2_ARG2(__lsx_vilvl_b, src1, src0, src2, src1, vec0, vec2);
707 DUP2_ARG2(__lsx_vilvh_b, src1, src0, src2, src1, vec1, vec3);
708 DUP2_ARG2(__lsx_vdp2_h_bu, vec0, filt0, vec1, filt0, tmp0, tmp1);
709 tmp = __lsx_vssrarni_bu_h(tmp1, tmp0, FILTER_BITS);
710 __lsx_vst(tmp, dst, 0);
711
712 DUP2_ARG2(__lsx_vdp2_h_bu, vec2, filt0, vec3, filt0, tmp0, tmp1);
713 tmp = __lsx_vssrarni_bu_h(tmp1, tmp0, FILTER_BITS);
714 __lsx_vst(tmp, dst_tmp1, 0);
715
716 DUP2_ARG2(__lsx_vilvl_b, src4, src3, src5, src4, vec4, vec6);
717 DUP2_ARG2(__lsx_vilvh_b, src4, src3, src5, src4, vec5, vec7);
718 DUP2_ARG2(__lsx_vdp2_h_bu, vec4, filt0, vec5, filt0, tmp0, tmp1);
719 tmp = __lsx_vssrarni_bu_h(tmp1, tmp0, FILTER_BITS);
720 __lsx_vst(tmp, dst, 16);
721
722 DUP2_ARG2(__lsx_vdp2_h_bu, vec6, filt0, vec7, filt0, tmp0, tmp1);
723 tmp = __lsx_vssrarni_bu_h(tmp1, tmp0, FILTER_BITS);
724 __lsx_vst(tmp, dst_tmp1, 16);
725
726 DUP2_ARG2(__lsx_vilvl_b, src7, src6, src8, src7, vec0, vec2);
727 DUP2_ARG2(__lsx_vilvh_b, src7, src6, src8, src7, vec1, vec3);
728 DUP2_ARG2(__lsx_vdp2_h_bu, vec0, filt0, vec1, filt0, tmp0, tmp1);
729 tmp = __lsx_vssrarni_bu_h(tmp1, tmp0, FILTER_BITS);
730 __lsx_vst(tmp, dst, 32);
731
732 DUP2_ARG2(__lsx_vdp2_h_bu, vec2, filt0, vec3, filt0, tmp0, tmp1);
733 tmp = __lsx_vssrarni_bu_h(tmp1, tmp0, FILTER_BITS);
734 __lsx_vst(tmp, dst_tmp1, 32);
735
736 DUP2_ARG2(__lsx_vilvl_b, src10, src9, src11, src10, vec4, vec6);
737 DUP2_ARG2(__lsx_vilvh_b, src10, src9, src11, src10, vec5, vec7);
738 DUP2_ARG2(__lsx_vdp2_h_bu, vec4, filt0, vec5, filt0, tmp0, tmp1);
739 tmp = __lsx_vssrarni_bu_h(tmp1, tmp0, FILTER_BITS);
740 __lsx_vst(tmp, dst, 48);
741
742 DUP2_ARG2(__lsx_vdp2_h_bu, vec6, filt0, vec7, filt0, tmp0, tmp1);
743 tmp = __lsx_vssrarni_bu_h(tmp1, tmp0, FILTER_BITS);
744 __lsx_vst(tmp, dst_tmp1, 48);
745 dst += dst_stride2;
746 dst_tmp1 += dst_stride2;
747
748 src0 = src2;
749 src3 = src5;
750 src6 = src8;
751 src9 = src11;
752 }
753 }
754
vpx_convolve8_vert_lsx(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const InterpKernel * filter,int x0_q4,int32_t x_step_q4,int y0_q4,int y_step_q4,int w,int h)755 void vpx_convolve8_vert_lsx(const uint8_t *src, ptrdiff_t src_stride,
756 uint8_t *dst, ptrdiff_t dst_stride,
757 const InterpKernel *filter, int x0_q4,
758 int32_t x_step_q4, int y0_q4, int y_step_q4, int w,
759 int h) {
760 const int16_t *const filter_y = filter[y0_q4];
761 int8_t cnt, filt_ver[8];
762
763 assert(y_step_q4 == 16);
764 assert(((const int32_t *)filter_y)[1] != 0x800000);
765
766 for (cnt = 8; cnt--;) {
767 filt_ver[cnt] = filter_y[cnt];
768 }
769
770 if (vpx_get_filter_taps(filter_y) == 2) {
771 switch (w) {
772 case 4:
773 common_vt_2t_4w_lsx(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
774 &filt_ver[3], h);
775 break;
776 case 8:
777 common_vt_2t_8w_lsx(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
778 &filt_ver[3], h);
779 break;
780 case 16:
781 common_vt_2t_16w_lsx(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
782 &filt_ver[3], h);
783 break;
784 case 32:
785 common_vt_2t_32w_lsx(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
786 &filt_ver[3], h);
787 break;
788 case 64:
789 common_vt_2t_64w_lsx(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
790 &filt_ver[3], h);
791 break;
792 default:
793 vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4,
794 x_step_q4, y0_q4, y_step_q4, w, h);
795 break;
796 }
797 } else {
798 switch (w) {
799 case 4:
800 common_vt_8t_4w_lsx(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
801 filt_ver, h);
802 break;
803 case 8:
804 common_vt_8t_8w_lsx(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
805 filt_ver, h);
806 break;
807 case 16:
808 common_vt_8t_16w_lsx(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
809 filt_ver, h);
810 break;
811 case 32:
812 common_vt_8t_32w_lsx(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
813 filt_ver, h);
814 break;
815 case 64:
816 common_vt_8t_64w_lsx(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
817 filt_ver, h);
818 break;
819 default:
820 vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4,
821 x_step_q4, y0_q4, y_step_q4, w, h);
822 break;
823 }
824 }
825 }
826