1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "./vp8_rtcd.h"
12 #include "./vpx_dsp_rtcd.h"
13 #include "onyx_int.h"
14 #include "mcomp.h"
15 #include "vpx_mem/vpx_mem.h"
16 #include "vpx_config.h"
17 #include <stdio.h>
18 #include <limits.h>
19 #include <math.h>
20 #include "vp8/common/findnearmv.h"
21 #include "vp8/common/common.h"
22 #include "vpx_dsp/vpx_dsp_common.h"
23
vp8_mv_bit_cost(int_mv * mv,int_mv * ref,int * mvcost[2],int Weight)24 int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight) {
25 /* MV costing is based on the distribution of vectors in the previous
26 * frame and as such will tend to over state the cost of vectors. In
27 * addition coding a new vector can have a knock on effect on the cost
28 * of subsequent vectors and the quality of prediction from NEAR and
29 * NEAREST for subsequent blocks. The "Weight" parameter allows, to a
30 * limited extent, for some account to be taken of these factors.
31 */
32 const int mv_idx_row =
33 clamp((mv->as_mv.row - ref->as_mv.row) >> 1, 0, MVvals);
34 const int mv_idx_col =
35 clamp((mv->as_mv.col - ref->as_mv.col) >> 1, 0, MVvals);
36 return ((mvcost[0][mv_idx_row] + mvcost[1][mv_idx_col]) * Weight) >> 7;
37 }
38
mv_err_cost(int_mv * mv,int_mv * ref,int * mvcost[2],int error_per_bit)39 static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2],
40 int error_per_bit) {
41 /* Ignore mv costing if mvcost is NULL */
42 if (mvcost) {
43 const int mv_idx_row =
44 clamp((mv->as_mv.row - ref->as_mv.row) >> 1, 0, MVvals);
45 const int mv_idx_col =
46 clamp((mv->as_mv.col - ref->as_mv.col) >> 1, 0, MVvals);
47 return ((mvcost[0][mv_idx_row] + mvcost[1][mv_idx_col]) * error_per_bit +
48 128) >>
49 8;
50 }
51 return 0;
52 }
53
mvsad_err_cost(int_mv * mv,int_mv * ref,int * mvsadcost[2],int error_per_bit)54 static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2],
55 int error_per_bit) {
56 /* Calculate sad error cost on full pixel basis. */
57 /* Ignore mv costing if mvsadcost is NULL */
58 if (mvsadcost) {
59 return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
60 mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)]) *
61 error_per_bit +
62 128) >>
63 8;
64 }
65 return 0;
66 }
67
vp8_init_dsmotion_compensation(MACROBLOCK * x,int stride)68 void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
69 int Len;
70 int search_site_count = 0;
71
72 /* Generate offsets for 4 search sites per step. */
73 Len = MAX_FIRST_STEP;
74 x->ss[search_site_count].mv.col = 0;
75 x->ss[search_site_count].mv.row = 0;
76 x->ss[search_site_count].offset = 0;
77 search_site_count++;
78
79 while (Len > 0) {
80 /* Compute offsets for search sites. */
81 x->ss[search_site_count].mv.col = 0;
82 x->ss[search_site_count].mv.row = -Len;
83 x->ss[search_site_count].offset = -Len * stride;
84 search_site_count++;
85
86 /* Compute offsets for search sites. */
87 x->ss[search_site_count].mv.col = 0;
88 x->ss[search_site_count].mv.row = Len;
89 x->ss[search_site_count].offset = Len * stride;
90 search_site_count++;
91
92 /* Compute offsets for search sites. */
93 x->ss[search_site_count].mv.col = -Len;
94 x->ss[search_site_count].mv.row = 0;
95 x->ss[search_site_count].offset = -Len;
96 search_site_count++;
97
98 /* Compute offsets for search sites. */
99 x->ss[search_site_count].mv.col = Len;
100 x->ss[search_site_count].mv.row = 0;
101 x->ss[search_site_count].offset = Len;
102 search_site_count++;
103
104 /* Contract. */
105 Len /= 2;
106 }
107
108 x->ss_count = search_site_count;
109 x->searches_per_step = 4;
110 }
111
vp8_init3smotion_compensation(MACROBLOCK * x,int stride)112 void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) {
113 int Len;
114 int search_site_count = 0;
115
116 /* Generate offsets for 8 search sites per step. */
117 Len = MAX_FIRST_STEP;
118 x->ss[search_site_count].mv.col = 0;
119 x->ss[search_site_count].mv.row = 0;
120 x->ss[search_site_count].offset = 0;
121 search_site_count++;
122
123 while (Len > 0) {
124 /* Compute offsets for search sites. */
125 x->ss[search_site_count].mv.col = 0;
126 x->ss[search_site_count].mv.row = -Len;
127 x->ss[search_site_count].offset = -Len * stride;
128 search_site_count++;
129
130 /* Compute offsets for search sites. */
131 x->ss[search_site_count].mv.col = 0;
132 x->ss[search_site_count].mv.row = Len;
133 x->ss[search_site_count].offset = Len * stride;
134 search_site_count++;
135
136 /* Compute offsets for search sites. */
137 x->ss[search_site_count].mv.col = -Len;
138 x->ss[search_site_count].mv.row = 0;
139 x->ss[search_site_count].offset = -Len;
140 search_site_count++;
141
142 /* Compute offsets for search sites. */
143 x->ss[search_site_count].mv.col = Len;
144 x->ss[search_site_count].mv.row = 0;
145 x->ss[search_site_count].offset = Len;
146 search_site_count++;
147
148 /* Compute offsets for search sites. */
149 x->ss[search_site_count].mv.col = -Len;
150 x->ss[search_site_count].mv.row = -Len;
151 x->ss[search_site_count].offset = -Len * stride - Len;
152 search_site_count++;
153
154 /* Compute offsets for search sites. */
155 x->ss[search_site_count].mv.col = Len;
156 x->ss[search_site_count].mv.row = -Len;
157 x->ss[search_site_count].offset = -Len * stride + Len;
158 search_site_count++;
159
160 /* Compute offsets for search sites. */
161 x->ss[search_site_count].mv.col = -Len;
162 x->ss[search_site_count].mv.row = Len;
163 x->ss[search_site_count].offset = Len * stride - Len;
164 search_site_count++;
165
166 /* Compute offsets for search sites. */
167 x->ss[search_site_count].mv.col = Len;
168 x->ss[search_site_count].mv.row = Len;
169 x->ss[search_site_count].offset = Len * stride + Len;
170 search_site_count++;
171
172 /* Contract. */
173 Len /= 2;
174 }
175
176 x->ss_count = search_site_count;
177 x->searches_per_step = 8;
178 }
179
180 /*
181 * To avoid the penalty for crossing cache-line read, preload the reference
182 * area in a small buffer, which is aligned to make sure there won't be crossing
183 * cache-line read while reading from this buffer. This reduced the cpu
184 * cycles spent on reading ref data in sub-pixel filter functions.
185 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
186 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
187 * could reduce the area.
188 */
189
190 /* estimated cost of a motion vector (r,c) */
191 #define MVC(r, c) \
192 (mvcost \
193 ? ((mvcost[0][(r)-rr] + mvcost[1][(c)-rc]) * error_per_bit + 128) >> 8 \
194 : 0)
195 /* pointer to predictor base of a motionvector */
196 #define PRE(r, c) (y + (((r) >> 2) * y_stride + ((c) >> 2) - (offset)))
197 /* convert motion vector component to offset for svf calc */
198 #define SP(x) (((x)&3) << 1)
199 /* returns subpixel variance error function. */
200 #define DIST(r, c) \
201 vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, b->src_stride, &sse)
202 #define IFMVCV(r, c, s, e) \
203 if (c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
204 /* returns distortion + motion vector cost */
205 #define ERR(r, c) (MVC(r, c) + DIST(r, c))
206 /* checks if (r,c) has better score than previous best */
207 #define CHECK_BETTER(v, r, c) \
208 do { \
209 IFMVCV( \
210 r, c, \
211 { \
212 thismse = DIST(r, c); \
213 if ((v = (MVC(r, c) + thismse)) < besterr) { \
214 besterr = v; \
215 br = r; \
216 bc = c; \
217 *distortion = thismse; \
218 *sse1 = sse; \
219 } \
220 }, \
221 v = UINT_MAX;) \
222 } while (0)
223
vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)224 int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
225 int_mv *bestmv, int_mv *ref_mv,
226 int error_per_bit,
227 const vp8_variance_fn_ptr_t *vfp,
228 int *mvcost[2], int *distortion,
229 unsigned int *sse1) {
230 unsigned char *z = (*(b->base_src) + b->src);
231
232 int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
233 int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4;
234 int tr = br, tc = bc;
235 unsigned int besterr;
236 unsigned int left, right, up, down, diag;
237 unsigned int sse;
238 unsigned int whichdir;
239 unsigned int halfiters = 4;
240 unsigned int quarteriters = 4;
241 int thismse;
242
243 int minc = VPXMAX(x->mv_col_min * 4,
244 (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
245 int maxc = VPXMIN(x->mv_col_max * 4,
246 (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
247 int minr = VPXMAX(x->mv_row_min * 4,
248 (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
249 int maxr = VPXMIN(x->mv_row_max * 4,
250 (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
251
252 int y_stride;
253 int offset;
254 int pre_stride = x->e_mbd.pre.y_stride;
255 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
256
257 #if VPX_ARCH_X86 || VPX_ARCH_X86_64
258 MACROBLOCKD *xd = &x->e_mbd;
259 unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
260 bestmv->as_mv.col;
261 unsigned char *y;
262 int buf_r1, buf_r2, buf_c1;
263
264 /* Clamping to avoid out-of-range data access */
265 buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)
266 ? (bestmv->as_mv.row - x->mv_row_min)
267 : 3;
268 buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)
269 ? (x->mv_row_max - bestmv->as_mv.row)
270 : 3;
271 buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)
272 ? (bestmv->as_mv.col - x->mv_col_min)
273 : 3;
274 y_stride = 32;
275
276 /* Copy to intermediate buffer before searching. */
277 vfp->copymem(y_0 - buf_c1 - pre_stride * buf_r1, pre_stride, xd->y_buf,
278 y_stride, 16 + buf_r1 + buf_r2);
279 y = xd->y_buf + y_stride * buf_r1 + buf_c1;
280 #else
281 unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
282 bestmv->as_mv.col;
283 y_stride = pre_stride;
284 #endif
285
286 offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
287
288 /* central mv */
289 bestmv->as_mv.row = clamp(bestmv->as_mv.row * 8, SHRT_MIN, SHRT_MAX);
290 bestmv->as_mv.col = clamp(bestmv->as_mv.col * 8, SHRT_MIN, SHRT_MAX);
291
292 /* calculate central point error */
293 besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
294 *distortion = besterr;
295 besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
296
297 /* TODO: Each subsequent iteration checks at least one point in common
298 * with the last iteration could be 2 ( if diag selected)
299 */
300 while (--halfiters) {
301 /* 1/2 pel */
302 CHECK_BETTER(left, tr, tc - 2);
303 CHECK_BETTER(right, tr, tc + 2);
304 CHECK_BETTER(up, tr - 2, tc);
305 CHECK_BETTER(down, tr + 2, tc);
306
307 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
308
309 switch (whichdir) {
310 case 0: CHECK_BETTER(diag, tr - 2, tc - 2); break;
311 case 1: CHECK_BETTER(diag, tr - 2, tc + 2); break;
312 case 2: CHECK_BETTER(diag, tr + 2, tc - 2); break;
313 case 3: CHECK_BETTER(diag, tr + 2, tc + 2); break;
314 }
315
316 /* no reason to check the same one again. */
317 if (tr == br && tc == bc) break;
318
319 tr = br;
320 tc = bc;
321 }
322
323 /* TODO: Each subsequent iteration checks at least one point in common
324 * with the last iteration could be 2 ( if diag selected)
325 */
326
327 /* 1/4 pel */
328 while (--quarteriters) {
329 CHECK_BETTER(left, tr, tc - 1);
330 CHECK_BETTER(right, tr, tc + 1);
331 CHECK_BETTER(up, tr - 1, tc);
332 CHECK_BETTER(down, tr + 1, tc);
333
334 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
335
336 switch (whichdir) {
337 case 0: CHECK_BETTER(diag, tr - 1, tc - 1); break;
338 case 1: CHECK_BETTER(diag, tr - 1, tc + 1); break;
339 case 2: CHECK_BETTER(diag, tr + 1, tc - 1); break;
340 case 3: CHECK_BETTER(diag, tr + 1, tc + 1); break;
341 }
342
343 /* no reason to check the same one again. */
344 if (tr == br && tc == bc) break;
345
346 tr = br;
347 tc = bc;
348 }
349
350 bestmv->as_mv.row = clamp(br * 2, SHRT_MIN, SHRT_MAX);
351 bestmv->as_mv.col = clamp(bc * 2, SHRT_MIN, SHRT_MAX);
352
353 if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
354 (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) {
355 return INT_MAX;
356 }
357
358 return besterr;
359 }
360 #undef MVC
361 #undef PRE
362 #undef SP
363 #undef DIST
364 #undef IFMVCV
365 #undef ERR
366 #undef CHECK_BETTER
367
vp8_find_best_sub_pixel_step(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)368 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
369 int_mv *bestmv, int_mv *ref_mv,
370 int error_per_bit,
371 const vp8_variance_fn_ptr_t *vfp,
372 int *mvcost[2], int *distortion,
373 unsigned int *sse1) {
374 int bestmse = INT_MAX;
375 int_mv startmv;
376 int_mv this_mv;
377 unsigned char *z = (*(b->base_src) + b->src);
378 int left, right, up, down, diag;
379 unsigned int sse;
380 int whichdir;
381 int thismse;
382 int y_stride;
383 int pre_stride = x->e_mbd.pre.y_stride;
384 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
385
386 #if VPX_ARCH_X86 || VPX_ARCH_X86_64
387 MACROBLOCKD *xd = &x->e_mbd;
388 unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
389 bestmv->as_mv.col;
390 unsigned char *y;
391
392 y_stride = 32;
393 /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
394 vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
395 y = xd->y_buf + y_stride + 1;
396 #else
397 unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
398 bestmv->as_mv.col;
399 y_stride = pre_stride;
400 #endif
401
402 /* central mv */
403 bestmv->as_mv.row = clamp(bestmv->as_mv.row * 8, SHRT_MIN, SHRT_MAX);
404 bestmv->as_mv.col = clamp(bestmv->as_mv.col * 8, SHRT_MIN, SHRT_MAX);
405 startmv = *bestmv;
406
407 /* calculate central point error */
408 bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
409 *distortion = bestmse;
410 bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
411
412 /* go left then right and check error */
413 this_mv.as_mv.row = startmv.as_mv.row;
414 this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
415 /* "halfpix" horizontal variance */
416 thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
417 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
418
419 if (left < bestmse) {
420 *bestmv = this_mv;
421 bestmse = left;
422 *distortion = thismse;
423 *sse1 = sse;
424 }
425
426 this_mv.as_mv.col += 8;
427 /* "halfpix" horizontal variance */
428 thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
429 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
430
431 if (right < bestmse) {
432 *bestmv = this_mv;
433 bestmse = right;
434 *distortion = thismse;
435 *sse1 = sse;
436 }
437
438 /* go up then down and check error */
439 this_mv.as_mv.col = startmv.as_mv.col;
440 this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
441 /* "halfpix" vertical variance */
442 thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
443 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
444
445 if (up < bestmse) {
446 *bestmv = this_mv;
447 bestmse = up;
448 *distortion = thismse;
449 *sse1 = sse;
450 }
451
452 this_mv.as_mv.row += 8;
453 /* "halfpix" vertical variance */
454 thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
455 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
456
457 if (down < bestmse) {
458 *bestmv = this_mv;
459 bestmse = down;
460 *distortion = thismse;
461 *sse1 = sse;
462 }
463
464 /* now check 1 more diagonal */
465 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
466 this_mv = startmv;
467
468 switch (whichdir) {
469 case 0:
470 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
471 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
472 /* "halfpix" horizontal/vertical variance */
473 thismse =
474 vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
475 break;
476 case 1:
477 this_mv.as_mv.col += 4;
478 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
479 /* "halfpix" horizontal/vertical variance */
480 thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
481 break;
482 case 2:
483 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
484 this_mv.as_mv.row += 4;
485 /* "halfpix" horizontal/vertical variance */
486 thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
487 break;
488 case 3:
489 default:
490 this_mv.as_mv.col += 4;
491 this_mv.as_mv.row += 4;
492 /* "halfpix" horizontal/vertical variance */
493 thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
494 break;
495 }
496
497 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
498
499 if (diag < bestmse) {
500 *bestmv = this_mv;
501 bestmse = diag;
502 *distortion = thismse;
503 *sse1 = sse;
504 }
505
506 /* time to check quarter pels. */
507 if (bestmv->as_mv.row < startmv.as_mv.row) y -= y_stride;
508
509 if (bestmv->as_mv.col < startmv.as_mv.col) y--;
510
511 startmv = *bestmv;
512
513 /* go left then right and check error */
514 this_mv.as_mv.row = startmv.as_mv.row;
515
516 if (startmv.as_mv.col & 7) {
517 this_mv.as_mv.col = startmv.as_mv.col - 2;
518 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
519 this_mv.as_mv.row & 7, z, b->src_stride, &sse);
520 } else {
521 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
522 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
523 b->src_stride, &sse);
524 }
525
526 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
527
528 if (left < bestmse) {
529 *bestmv = this_mv;
530 bestmse = left;
531 *distortion = thismse;
532 *sse1 = sse;
533 }
534
535 this_mv.as_mv.col += 4;
536 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
537 z, b->src_stride, &sse);
538 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
539
540 if (right < bestmse) {
541 *bestmv = this_mv;
542 bestmse = right;
543 *distortion = thismse;
544 *sse1 = sse;
545 }
546
547 /* go up then down and check error */
548 this_mv.as_mv.col = startmv.as_mv.col;
549
550 if (startmv.as_mv.row & 7) {
551 this_mv.as_mv.row = startmv.as_mv.row - 2;
552 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
553 this_mv.as_mv.row & 7, z, b->src_stride, &sse);
554 } else {
555 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
556 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
557 b->src_stride, &sse);
558 }
559
560 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
561
562 if (up < bestmse) {
563 *bestmv = this_mv;
564 bestmse = up;
565 *distortion = thismse;
566 *sse1 = sse;
567 }
568
569 this_mv.as_mv.row += 4;
570 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
571 z, b->src_stride, &sse);
572 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
573
574 if (down < bestmse) {
575 *bestmv = this_mv;
576 bestmse = down;
577 *distortion = thismse;
578 *sse1 = sse;
579 }
580
581 /* now check 1 more diagonal */
582 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
583
584 this_mv = startmv;
585
586 switch (whichdir) {
587 case 0:
588
589 if (startmv.as_mv.row & 7) {
590 this_mv.as_mv.row -= 2;
591
592 if (startmv.as_mv.col & 7) {
593 this_mv.as_mv.col -= 2;
594 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
595 this_mv.as_mv.row & 7, z, b->src_stride, &sse);
596 } else {
597 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
598 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
599 b->src_stride, &sse);
600 }
601 } else {
602 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
603
604 if (startmv.as_mv.col & 7) {
605 this_mv.as_mv.col -= 2;
606 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6,
607 z, b->src_stride, &sse);
608 } else {
609 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
610 thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride,
611 &sse);
612 }
613 }
614
615 break;
616 case 1:
617 this_mv.as_mv.col += 2;
618
619 if (startmv.as_mv.row & 7) {
620 this_mv.as_mv.row -= 2;
621 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
622 this_mv.as_mv.row & 7, z, b->src_stride, &sse);
623 } else {
624 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
625 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
626 b->src_stride, &sse);
627 }
628
629 break;
630 case 2:
631 this_mv.as_mv.row += 2;
632
633 if (startmv.as_mv.col & 7) {
634 this_mv.as_mv.col -= 2;
635 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
636 this_mv.as_mv.row & 7, z, b->src_stride, &sse);
637 } else {
638 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
639 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
640 b->src_stride, &sse);
641 }
642
643 break;
644 case 3:
645 this_mv.as_mv.col += 2;
646 this_mv.as_mv.row += 2;
647 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
648 this_mv.as_mv.row & 7, z, b->src_stride, &sse);
649 break;
650 }
651
652 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
653
654 if (diag < bestmse) {
655 *bestmv = this_mv;
656 bestmse = diag;
657 *distortion = thismse;
658 *sse1 = sse;
659 }
660
661 return bestmse;
662 }
663
vp8_find_best_half_pixel_step(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * bestmv,int_mv * ref_mv,int error_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvcost[2],int * distortion,unsigned int * sse1)664 int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
665 int_mv *bestmv, int_mv *ref_mv,
666 int error_per_bit,
667 const vp8_variance_fn_ptr_t *vfp,
668 int *mvcost[2], int *distortion,
669 unsigned int *sse1) {
670 int bestmse = INT_MAX;
671 int_mv startmv;
672 int_mv this_mv;
673 unsigned char *z = (*(b->base_src) + b->src);
674 int left, right, up, down, diag;
675 unsigned int sse;
676 int whichdir;
677 int thismse;
678 int y_stride;
679 int pre_stride = x->e_mbd.pre.y_stride;
680 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
681
682 #if VPX_ARCH_X86 || VPX_ARCH_X86_64
683 MACROBLOCKD *xd = &x->e_mbd;
684 unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
685 bestmv->as_mv.col;
686 unsigned char *y;
687
688 y_stride = 32;
689 /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
690 vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
691 y = xd->y_buf + y_stride + 1;
692 #else
693 unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
694 bestmv->as_mv.col;
695 y_stride = pre_stride;
696 #endif
697
698 /* central mv */
699 bestmv->as_mv.row = clamp(bestmv->as_mv.row * 8, SHRT_MIN, SHRT_MAX);
700 bestmv->as_mv.col = clamp(bestmv->as_mv.col * 8, SHRT_MIN, SHRT_MAX);
701 startmv = *bestmv;
702
703 /* calculate central point error */
704 bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
705 *distortion = bestmse;
706 bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
707
708 /* go left then right and check error */
709 this_mv.as_mv.row = startmv.as_mv.row;
710 this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
711 /* "halfpix" horizontal variance */
712 thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
713 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
714
715 if (left < bestmse) {
716 *bestmv = this_mv;
717 bestmse = left;
718 *distortion = thismse;
719 *sse1 = sse;
720 }
721
722 this_mv.as_mv.col += 8;
723 /* "halfpix" horizontal variance */
724 thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
725 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
726
727 if (right < bestmse) {
728 *bestmv = this_mv;
729 bestmse = right;
730 *distortion = thismse;
731 *sse1 = sse;
732 }
733
734 /* go up then down and check error */
735 this_mv.as_mv.col = startmv.as_mv.col;
736 this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
737 /* "halfpix" vertical variance */
738 thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
739 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
740
741 if (up < bestmse) {
742 *bestmv = this_mv;
743 bestmse = up;
744 *distortion = thismse;
745 *sse1 = sse;
746 }
747
748 this_mv.as_mv.row += 8;
749 /* "halfpix" vertical variance */
750 thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
751 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
752
753 if (down < bestmse) {
754 *bestmv = this_mv;
755 bestmse = down;
756 *distortion = thismse;
757 *sse1 = sse;
758 }
759
760 /* now check 1 more diagonal - */
761 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
762 this_mv = startmv;
763
764 switch (whichdir) {
765 case 0:
766 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
767 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
768 /* "halfpix" horizontal/vertical variance */
769 thismse =
770 vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
771 break;
772 case 1:
773 this_mv.as_mv.col += 4;
774 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
775 /* "halfpix" horizontal/vertical variance */
776 thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
777 break;
778 case 2:
779 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
780 this_mv.as_mv.row += 4;
781 /* "halfpix" horizontal/vertical variance */
782 thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
783 break;
784 case 3:
785 default:
786 this_mv.as_mv.col += 4;
787 this_mv.as_mv.row += 4;
788 /* "halfpix" horizontal/vertical variance */
789 thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
790 break;
791 }
792
793 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
794
795 if (diag < bestmse) {
796 *bestmv = this_mv;
797 bestmse = diag;
798 *distortion = thismse;
799 *sse1 = sse;
800 }
801
802 return bestmse;
803 }
804
805 #define CHECK_BOUNDS(range) \
806 do { \
807 all_in = 1; \
808 all_in &= ((br - range) >= x->mv_row_min); \
809 all_in &= ((br + range) <= x->mv_row_max); \
810 all_in &= ((bc - range) >= x->mv_col_min); \
811 all_in &= ((bc + range) <= x->mv_col_max); \
812 } while (0)
813
814 #define CHECK_POINT \
815 { \
816 if (this_mv.as_mv.col < x->mv_col_min) continue; \
817 if (this_mv.as_mv.col > x->mv_col_max) continue; \
818 if (this_mv.as_mv.row < x->mv_row_min) continue; \
819 if (this_mv.as_mv.row > x->mv_row_max) continue; \
820 }
821
822 #define CHECK_BETTER \
823 do { \
824 if (thissad < bestsad) { \
825 thissad += \
826 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); \
827 if (thissad < bestsad) { \
828 bestsad = thissad; \
829 best_site = i; \
830 } \
831 } \
832 } while (0)
833
834 static const MV next_chkpts[6][3] = {
835 { { -2, 0 }, { -1, -2 }, { 1, -2 } }, { { -1, -2 }, { 1, -2 }, { 2, 0 } },
836 { { 1, -2 }, { 2, 0 }, { 1, 2 } }, { { 2, 0 }, { 1, 2 }, { -1, 2 } },
837 { { 1, 2 }, { -1, 2 }, { -2, 0 } }, { { -1, 2 }, { -2, 0 }, { -1, -2 } }
838 };
839
vp8_hex_search(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,const vp8_variance_fn_ptr_t * vfp,int * mvsadcost[2],int_mv * center_mv)840 int vp8_hex_search(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
841 int_mv *best_mv, int search_param, int sad_per_bit,
842 const vp8_variance_fn_ptr_t *vfp, int *mvsadcost[2],
843 int_mv *center_mv) {
844 MV hex[6] = {
845 { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 }
846 };
847 MV neighbors[4] = { { 0, -1 }, { -1, 0 }, { 1, 0 }, { 0, 1 } };
848 int i, j;
849
850 unsigned char *what = (*(b->base_src) + b->src);
851 int what_stride = b->src_stride;
852 int pre_stride = x->e_mbd.pre.y_stride;
853 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
854
855 int in_what_stride = pre_stride;
856 int br, bc;
857 int_mv this_mv;
858 unsigned int bestsad;
859 unsigned int thissad;
860 unsigned char *base_offset;
861 unsigned char *this_offset;
862 int k = -1;
863 int all_in;
864 int best_site = -1;
865 int hex_range = 127;
866 int dia_range = 8;
867
868 int_mv fcenter_mv;
869 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
870 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
871
872 /* adjust ref_mv to make sure it is within MV range */
873 vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
874 x->mv_row_max);
875 br = ref_mv->as_mv.row;
876 bc = ref_mv->as_mv.col;
877
878 /* Work out the start point for the search */
879 base_offset = (unsigned char *)(base_pre + d->offset);
880 this_offset = base_offset + (br * (pre_stride)) + bc;
881 this_mv.as_mv.row = br;
882 this_mv.as_mv.col = bc;
883 bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride) +
884 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
885
886 #if CONFIG_MULTI_RES_ENCODING
887 /* Lower search range based on prediction info */
888 if (search_param >= 6)
889 goto cal_neighbors;
890 else if (search_param >= 5)
891 hex_range = 4;
892 else if (search_param >= 4)
893 hex_range = 6;
894 else if (search_param >= 3)
895 hex_range = 15;
896 else if (search_param >= 2)
897 hex_range = 31;
898 else if (search_param >= 1)
899 hex_range = 63;
900
901 dia_range = 8;
902 #else
903 (void)search_param;
904 #endif
905
906 /* hex search */
907 CHECK_BOUNDS(2);
908
909 if (all_in) {
910 for (i = 0; i < 6; ++i) {
911 this_mv.as_mv.row = br + hex[i].row;
912 this_mv.as_mv.col = bc + hex[i].col;
913 this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
914 this_mv.as_mv.col;
915 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
916 CHECK_BETTER;
917 }
918 } else {
919 for (i = 0; i < 6; ++i) {
920 this_mv.as_mv.row = br + hex[i].row;
921 this_mv.as_mv.col = bc + hex[i].col;
922 CHECK_POINT
923 this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
924 this_mv.as_mv.col;
925 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
926 CHECK_BETTER;
927 }
928 }
929
930 if (best_site == -1) {
931 goto cal_neighbors;
932 } else {
933 br += hex[best_site].row;
934 bc += hex[best_site].col;
935 k = best_site;
936 }
937
938 for (j = 1; j < hex_range; ++j) {
939 best_site = -1;
940 CHECK_BOUNDS(2);
941
942 if (all_in) {
943 for (i = 0; i < 3; ++i) {
944 this_mv.as_mv.row = br + next_chkpts[k][i].row;
945 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
946 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
947 this_mv.as_mv.col;
948 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
949 CHECK_BETTER;
950 }
951 } else {
952 for (i = 0; i < 3; ++i) {
953 this_mv.as_mv.row = br + next_chkpts[k][i].row;
954 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
955 CHECK_POINT
956 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
957 this_mv.as_mv.col;
958 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
959 CHECK_BETTER;
960 }
961 }
962
963 if (best_site == -1) {
964 break;
965 } else {
966 br += next_chkpts[k][best_site].row;
967 bc += next_chkpts[k][best_site].col;
968 k += 5 + best_site;
969 if (k >= 12) {
970 k -= 12;
971 } else if (k >= 6) {
972 k -= 6;
973 }
974 }
975 }
976
977 /* check 4 1-away neighbors */
978 cal_neighbors:
979 for (j = 0; j < dia_range; ++j) {
980 best_site = -1;
981 CHECK_BOUNDS(1);
982
983 if (all_in) {
984 for (i = 0; i < 4; ++i) {
985 this_mv.as_mv.row = br + neighbors[i].row;
986 this_mv.as_mv.col = bc + neighbors[i].col;
987 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
988 this_mv.as_mv.col;
989 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
990 CHECK_BETTER;
991 }
992 } else {
993 for (i = 0; i < 4; ++i) {
994 this_mv.as_mv.row = br + neighbors[i].row;
995 this_mv.as_mv.col = bc + neighbors[i].col;
996 CHECK_POINT
997 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
998 this_mv.as_mv.col;
999 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
1000 CHECK_BETTER;
1001 }
1002 }
1003
1004 if (best_site == -1) {
1005 break;
1006 } else {
1007 br += neighbors[best_site].row;
1008 bc += neighbors[best_site].col;
1009 }
1010 }
1011
1012 best_mv->as_mv.row = br;
1013 best_mv->as_mv.col = bc;
1014
1015 return bestsad;
1016 }
1017 #undef CHECK_BOUNDS
1018 #undef CHECK_POINT
1019 #undef CHECK_BETTER
1020
vp8_diamond_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,int * num00,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1021 int vp8_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1022 int_mv *best_mv, int search_param, int sad_per_bit,
1023 int *num00, vp8_variance_fn_ptr_t *fn_ptr,
1024 int *mvcost[2], int_mv *center_mv) {
1025 int i, j, step;
1026
1027 unsigned char *what = (*(b->base_src) + b->src);
1028 int what_stride = b->src_stride;
1029 unsigned char *in_what;
1030 int pre_stride = x->e_mbd.pre.y_stride;
1031 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1032 int in_what_stride = pre_stride;
1033 unsigned char *best_address;
1034
1035 int tot_steps;
1036 int_mv this_mv;
1037
1038 unsigned int bestsad;
1039 unsigned int thissad;
1040 int best_site = 0;
1041 int last_site = 0;
1042
1043 int ref_row;
1044 int ref_col;
1045 int this_row_offset;
1046 int this_col_offset;
1047 search_site *ss;
1048
1049 unsigned char *check_here;
1050
1051 int *mvsadcost[2];
1052 int_mv fcenter_mv;
1053
1054 mvsadcost[0] = x->mvsadcost[0];
1055 mvsadcost[1] = x->mvsadcost[1];
1056 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1057 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1058
1059 vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1060 x->mv_row_max);
1061 ref_row = ref_mv->as_mv.row;
1062 ref_col = ref_mv->as_mv.col;
1063 *num00 = 0;
1064 best_mv->as_mv.row = ref_row;
1065 best_mv->as_mv.col = ref_col;
1066
1067 /* Work out the start point for the search */
1068 in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
1069 ref_col);
1070 best_address = in_what;
1071
1072 /* Check the starting position */
1073 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
1074 mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1075
1076 /* search_param determines the length of the initial step and hence
1077 * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel :
1078 * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1079 */
1080 ss = &x->ss[search_param * x->searches_per_step];
1081 tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1082
1083 i = 1;
1084
1085 for (step = 0; step < tot_steps; ++step) {
1086 for (j = 0; j < x->searches_per_step; ++j) {
1087 /* Trap illegal vectors */
1088 this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1089 this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1090
1091 if ((this_col_offset > x->mv_col_min) &&
1092 (this_col_offset < x->mv_col_max) &&
1093 (this_row_offset > x->mv_row_min) &&
1094 (this_row_offset < x->mv_row_max))
1095
1096 {
1097 check_here = ss[i].offset + best_address;
1098 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1099
1100 if (thissad < bestsad) {
1101 this_mv.as_mv.row = this_row_offset;
1102 this_mv.as_mv.col = this_col_offset;
1103 thissad +=
1104 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1105
1106 if (thissad < bestsad) {
1107 bestsad = thissad;
1108 best_site = i;
1109 }
1110 }
1111 }
1112
1113 i++;
1114 }
1115
1116 if (best_site != last_site) {
1117 best_mv->as_mv.row += ss[best_site].mv.row;
1118 best_mv->as_mv.col += ss[best_site].mv.col;
1119 best_address += ss[best_site].offset;
1120 last_site = best_site;
1121 } else if (best_address == in_what) {
1122 (*num00)++;
1123 }
1124 }
1125
1126 this_mv.as_mv.row = clamp(best_mv->as_mv.row * 8, SHRT_MIN, SHRT_MAX);
1127 this_mv.as_mv.col = clamp(best_mv->as_mv.col * 8, SHRT_MIN, SHRT_MAX);
1128
1129 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1130 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1131 }
1132
1133 #if HAVE_SSE2 || HAVE_MSA || HAVE_LSX
vp8_diamond_search_sadx4(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int_mv * best_mv,int search_param,int sad_per_bit,int * num00,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1134 int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1135 int_mv *best_mv, int search_param, int sad_per_bit,
1136 int *num00, vp8_variance_fn_ptr_t *fn_ptr,
1137 int *mvcost[2], int_mv *center_mv) {
1138 int i, j, step;
1139
1140 unsigned char *what = (*(b->base_src) + b->src);
1141 int what_stride = b->src_stride;
1142 unsigned char *in_what;
1143 int pre_stride = x->e_mbd.pre.y_stride;
1144 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1145 int in_what_stride = pre_stride;
1146 unsigned char *best_address;
1147
1148 int tot_steps;
1149 int_mv this_mv;
1150
1151 unsigned int bestsad;
1152 unsigned int thissad;
1153 int best_site = 0;
1154 int last_site = 0;
1155
1156 int ref_row;
1157 int ref_col;
1158 int this_row_offset;
1159 int this_col_offset;
1160 search_site *ss;
1161
1162 unsigned char *check_here;
1163
1164 int *mvsadcost[2];
1165 int_mv fcenter_mv;
1166
1167 mvsadcost[0] = x->mvsadcost[0];
1168 mvsadcost[1] = x->mvsadcost[1];
1169 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1170 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1171
1172 vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1173 x->mv_row_max);
1174 ref_row = ref_mv->as_mv.row;
1175 ref_col = ref_mv->as_mv.col;
1176 *num00 = 0;
1177 best_mv->as_mv.row = ref_row;
1178 best_mv->as_mv.col = ref_col;
1179
1180 /* Work out the start point for the search */
1181 in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
1182 ref_col);
1183 best_address = in_what;
1184
1185 /* Check the starting position */
1186 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
1187 mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1188
1189 /* search_param determines the length of the initial step and hence the
1190 * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 =
1191 * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1192 */
1193 ss = &x->ss[search_param * x->searches_per_step];
1194 tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1195
1196 i = 1;
1197
1198 for (step = 0; step < tot_steps; ++step) {
1199 int all_in = 1, t;
1200
1201 /* To know if all neighbor points are within the bounds, 4 bounds
1202 * checking are enough instead of checking 4 bounds for each
1203 * points.
1204 */
1205 all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min);
1206 all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max);
1207 all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min);
1208 all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max);
1209
1210 if (all_in) {
1211 unsigned int sad_array[4];
1212
1213 for (j = 0; j < x->searches_per_step; j += 4) {
1214 const unsigned char *block_offset[4];
1215
1216 for (t = 0; t < 4; ++t) {
1217 block_offset[t] = ss[i + t].offset + best_address;
1218 }
1219
1220 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1221 sad_array);
1222
1223 for (t = 0; t < 4; t++, i++) {
1224 if (sad_array[t] < bestsad) {
1225 this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
1226 this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
1227 sad_array[t] +=
1228 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1229
1230 if (sad_array[t] < bestsad) {
1231 bestsad = sad_array[t];
1232 best_site = i;
1233 }
1234 }
1235 }
1236 }
1237 } else {
1238 for (j = 0; j < x->searches_per_step; ++j) {
1239 /* Trap illegal vectors */
1240 this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1241 this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1242
1243 if ((this_col_offset > x->mv_col_min) &&
1244 (this_col_offset < x->mv_col_max) &&
1245 (this_row_offset > x->mv_row_min) &&
1246 (this_row_offset < x->mv_row_max)) {
1247 check_here = ss[i].offset + best_address;
1248 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1249
1250 if (thissad < bestsad) {
1251 this_mv.as_mv.row = this_row_offset;
1252 this_mv.as_mv.col = this_col_offset;
1253 thissad +=
1254 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1255
1256 if (thissad < bestsad) {
1257 bestsad = thissad;
1258 best_site = i;
1259 }
1260 }
1261 }
1262 i++;
1263 }
1264 }
1265
1266 if (best_site != last_site) {
1267 best_mv->as_mv.row += ss[best_site].mv.row;
1268 best_mv->as_mv.col += ss[best_site].mv.col;
1269 best_address += ss[best_site].offset;
1270 last_site = best_site;
1271 } else if (best_address == in_what) {
1272 (*num00)++;
1273 }
1274 }
1275
1276 this_mv.as_mv.row = clamp(best_mv->as_mv.row * 8, SHRT_MIN, SHRT_MAX);
1277 this_mv.as_mv.col = clamp(best_mv->as_mv.col * 8, SHRT_MIN, SHRT_MAX);
1278
1279 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1280 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1281 }
1282 #endif // HAVE_SSE2 || HAVE_MSA || HAVE_LSX
1283
vp8_full_search_sad(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int sad_per_bit,int distance,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1284 int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1285 int sad_per_bit, int distance,
1286 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1287 int_mv *center_mv) {
1288 unsigned char *what = (*(b->base_src) + b->src);
1289 int what_stride = b->src_stride;
1290 unsigned char *in_what;
1291 int pre_stride = x->e_mbd.pre.y_stride;
1292 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1293 int in_what_stride = pre_stride;
1294 int mv_stride = pre_stride;
1295 unsigned char *bestaddress;
1296 int_mv *best_mv = &d->bmi.mv;
1297 int_mv this_mv;
1298 unsigned int bestsad;
1299 unsigned int thissad;
1300 int r, c;
1301
1302 unsigned char *check_here;
1303
1304 int ref_row = ref_mv->as_mv.row;
1305 int ref_col = ref_mv->as_mv.col;
1306
1307 int row_min = ref_row - distance;
1308 int row_max = ref_row + distance;
1309 int col_min = ref_col - distance;
1310 int col_max = ref_col + distance;
1311
1312 int *mvsadcost[2];
1313 int_mv fcenter_mv;
1314
1315 mvsadcost[0] = x->mvsadcost[0];
1316 mvsadcost[1] = x->mvsadcost[1];
1317 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1318 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1319
1320 /* Work out the mid point for the search */
1321 in_what = base_pre + d->offset;
1322 bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1323
1324 best_mv->as_mv.row = ref_row;
1325 best_mv->as_mv.col = ref_col;
1326
1327 /* Baseline value at the centre */
1328 bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
1329 mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1330
1331 /* Apply further limits to prevent us looking using vectors that stretch
1332 * beyond the UMV border
1333 */
1334 if (col_min < x->mv_col_min) col_min = x->mv_col_min;
1335
1336 if (col_max > x->mv_col_max) col_max = x->mv_col_max;
1337
1338 if (row_min < x->mv_row_min) row_min = x->mv_row_min;
1339
1340 if (row_max > x->mv_row_max) row_max = x->mv_row_max;
1341
1342 for (r = row_min; r < row_max; ++r) {
1343 this_mv.as_mv.row = r;
1344 check_here = r * mv_stride + in_what + col_min;
1345
1346 for (c = col_min; c < col_max; ++c) {
1347 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1348
1349 if (thissad < bestsad) {
1350 this_mv.as_mv.col = c;
1351 thissad +=
1352 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1353
1354 if (thissad < bestsad) {
1355 bestsad = thissad;
1356 best_mv->as_mv.row = r;
1357 best_mv->as_mv.col = c;
1358 bestaddress = check_here;
1359 }
1360 }
1361
1362 check_here++;
1363 }
1364 }
1365
1366 this_mv.as_mv.row = clamp(best_mv->as_mv.row * 8, SHRT_MIN, SHRT_MAX);
1367 this_mv.as_mv.col = clamp(best_mv->as_mv.col * 8, SHRT_MIN, SHRT_MAX);
1368
1369 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
1370 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1371 }
1372
vp8_refining_search_sad_c(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int error_per_bit,int search_range,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1373 int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1374 int_mv *ref_mv, int error_per_bit,
1375 int search_range, vp8_variance_fn_ptr_t *fn_ptr,
1376 int *mvcost[2], int_mv *center_mv) {
1377 MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
1378 int i, j;
1379 short this_row_offset, this_col_offset;
1380
1381 int what_stride = b->src_stride;
1382 int pre_stride = x->e_mbd.pre.y_stride;
1383 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1384 int in_what_stride = pre_stride;
1385 unsigned char *what = (*(b->base_src) + b->src);
1386 unsigned char *best_address =
1387 (unsigned char *)(base_pre + d->offset +
1388 (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1389 unsigned char *check_here;
1390 int_mv this_mv;
1391 unsigned int bestsad;
1392 unsigned int thissad;
1393
1394 int *mvsadcost[2];
1395 int_mv fcenter_mv;
1396
1397 mvsadcost[0] = x->mvsadcost[0];
1398 mvsadcost[1] = x->mvsadcost[1];
1399 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1400 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1401
1402 bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
1403 mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1404
1405 for (i = 0; i < search_range; ++i) {
1406 int best_site = -1;
1407
1408 for (j = 0; j < 4; ++j) {
1409 this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1410 this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1411
1412 if ((this_col_offset > x->mv_col_min) &&
1413 (this_col_offset < x->mv_col_max) &&
1414 (this_row_offset > x->mv_row_min) &&
1415 (this_row_offset < x->mv_row_max)) {
1416 check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
1417 best_address;
1418 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1419
1420 if (thissad < bestsad) {
1421 this_mv.as_mv.row = this_row_offset;
1422 this_mv.as_mv.col = this_col_offset;
1423 thissad +=
1424 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1425
1426 if (thissad < bestsad) {
1427 bestsad = thissad;
1428 best_site = j;
1429 }
1430 }
1431 }
1432 }
1433
1434 if (best_site == -1) {
1435 break;
1436 } else {
1437 ref_mv->as_mv.row += neighbors[best_site].row;
1438 ref_mv->as_mv.col += neighbors[best_site].col;
1439 best_address += (neighbors[best_site].row) * in_what_stride +
1440 neighbors[best_site].col;
1441 }
1442 }
1443
1444 this_mv.as_mv.row = clamp(ref_mv->as_mv.row * 8, SHRT_MIN, SHRT_MAX);
1445 this_mv.as_mv.col = clamp(ref_mv->as_mv.col * 8, SHRT_MIN, SHRT_MAX);
1446
1447 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1448 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1449 }
1450
1451 #if HAVE_SSE2 || HAVE_MSA
vp8_refining_search_sadx4(MACROBLOCK * x,BLOCK * b,BLOCKD * d,int_mv * ref_mv,int error_per_bit,int search_range,vp8_variance_fn_ptr_t * fn_ptr,int * mvcost[2],int_mv * center_mv)1452 int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1453 int_mv *ref_mv, int error_per_bit,
1454 int search_range, vp8_variance_fn_ptr_t *fn_ptr,
1455 int *mvcost[2], int_mv *center_mv) {
1456 MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
1457 int i, j;
1458 short this_row_offset, this_col_offset;
1459
1460 int what_stride = b->src_stride;
1461 int pre_stride = x->e_mbd.pre.y_stride;
1462 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1463 int in_what_stride = pre_stride;
1464 unsigned char *what = (*(b->base_src) + b->src);
1465 unsigned char *best_address =
1466 (unsigned char *)(base_pre + d->offset +
1467 (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1468 unsigned char *check_here;
1469 int_mv this_mv;
1470 unsigned int bestsad;
1471 unsigned int thissad;
1472
1473 int *mvsadcost[2];
1474 int_mv fcenter_mv;
1475
1476 mvsadcost[0] = x->mvsadcost[0];
1477 mvsadcost[1] = x->mvsadcost[1];
1478 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1479 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1480
1481 bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
1482 mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1483
1484 for (i = 0; i < search_range; ++i) {
1485 int best_site = -1;
1486 int all_in = 1;
1487
1488 all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
1489 all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
1490 all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
1491 all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
1492
1493 if (all_in) {
1494 unsigned int sad_array[4];
1495 const unsigned char *block_offset[4];
1496 block_offset[0] = best_address - in_what_stride;
1497 block_offset[1] = best_address - 1;
1498 block_offset[2] = best_address + 1;
1499 block_offset[3] = best_address + in_what_stride;
1500
1501 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1502 sad_array);
1503
1504 for (j = 0; j < 4; ++j) {
1505 if (sad_array[j] < bestsad) {
1506 this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
1507 this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
1508 sad_array[j] +=
1509 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1510
1511 if (sad_array[j] < bestsad) {
1512 bestsad = sad_array[j];
1513 best_site = j;
1514 }
1515 }
1516 }
1517 } else {
1518 for (j = 0; j < 4; ++j) {
1519 this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1520 this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1521
1522 if ((this_col_offset > x->mv_col_min) &&
1523 (this_col_offset < x->mv_col_max) &&
1524 (this_row_offset > x->mv_row_min) &&
1525 (this_row_offset < x->mv_row_max)) {
1526 check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
1527 best_address;
1528 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1529
1530 if (thissad < bestsad) {
1531 this_mv.as_mv.row = this_row_offset;
1532 this_mv.as_mv.col = this_col_offset;
1533 thissad +=
1534 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1535
1536 if (thissad < bestsad) {
1537 bestsad = thissad;
1538 best_site = j;
1539 }
1540 }
1541 }
1542 }
1543 }
1544
1545 if (best_site == -1) {
1546 break;
1547 } else {
1548 ref_mv->as_mv.row += neighbors[best_site].row;
1549 ref_mv->as_mv.col += neighbors[best_site].col;
1550 best_address += (neighbors[best_site].row) * in_what_stride +
1551 neighbors[best_site].col;
1552 }
1553 }
1554
1555 this_mv.as_mv.row = clamp(ref_mv->as_mv.row * 8, SHRT_MIN, SHRT_MAX);
1556 this_mv.as_mv.col = clamp(ref_mv->as_mv.col * 8, SHRT_MIN, SHRT_MAX);
1557
1558 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1559 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1560 }
1561 #endif // HAVE_SSE2 || HAVE_MSA
1562