1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11 #include <math.h>
12 #include <limits.h>
13
14 #include "config/aom_config.h"
15
16 #include "aom_dsp/mathutils.h"
17 #include "aom_mem/aom_mem.h"
18
19 #include "av1/common/av1_common_int.h"
20 #include "av1/encoder/encoder.h"
21 #include "av1/encoder/optical_flow.h"
22 #include "av1/encoder/sparse_linear_solver.h"
23 #include "av1/encoder/reconinter_enc.h"
24
25 #if CONFIG_OPTICAL_FLOW_API
26
av1_init_opfl_params(OPFL_PARAMS * opfl_params)27 void av1_init_opfl_params(OPFL_PARAMS *opfl_params) {
28 opfl_params->pyramid_levels = OPFL_PYRAMID_LEVELS;
29 opfl_params->warping_steps = OPFL_WARPING_STEPS;
30 opfl_params->lk_params = NULL;
31 }
32
av1_init_lk_params(LK_PARAMS * lk_params)33 void av1_init_lk_params(LK_PARAMS *lk_params) {
34 lk_params->window_size = OPFL_WINDOW_SIZE;
35 }
36
37 // Helper function to determine whether a frame is encoded with high bit-depth.
is_frame_high_bitdepth(const YV12_BUFFER_CONFIG * frame)38 static inline int is_frame_high_bitdepth(const YV12_BUFFER_CONFIG *frame) {
39 return (frame->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
40 }
41
42 // Helper function to determine whether optical flow method is sparse.
is_sparse(const OPFL_PARAMS * opfl_params)43 static inline int is_sparse(const OPFL_PARAMS *opfl_params) {
44 return (opfl_params->flags & OPFL_FLAG_SPARSE) ? 1 : 0;
45 }
46
47 static void gradients_over_window(const YV12_BUFFER_CONFIG *frame,
48 const YV12_BUFFER_CONFIG *ref_frame,
49 const double x_coord, const double y_coord,
50 const int window_size, const int bit_depth,
51 double *ix, double *iy, double *it,
52 LOCALMV *mv);
53
54 // coefficients for bilinear interpolation on unit square
pixel_interp(const double x,const double y,const double b00,const double b01,const double b10,const double b11)55 static int pixel_interp(const double x, const double y, const double b00,
56 const double b01, const double b10, const double b11) {
57 const int xint = (int)x;
58 const int yint = (int)y;
59 const double xdec = x - xint;
60 const double ydec = y - yint;
61 const double a = (1 - xdec) * (1 - ydec);
62 const double b = xdec * (1 - ydec);
63 const double c = (1 - xdec) * ydec;
64 const double d = xdec * ydec;
65 // if x, y are already integers, this results to b00
66 int interp = (int)round(a * b00 + b * b01 + c * b10 + d * b11);
67 return interp;
68 }
69
70 // Scharr filter to compute spatial gradient
spatial_gradient(const YV12_BUFFER_CONFIG * frame,const int x_coord,const int y_coord,const int direction,double * derivative)71 static void spatial_gradient(const YV12_BUFFER_CONFIG *frame, const int x_coord,
72 const int y_coord, const int direction,
73 double *derivative) {
74 double *filter;
75 // Scharr filters
76 double gx[9] = { -3, 0, 3, -10, 0, 10, -3, 0, 3 };
77 double gy[9] = { -3, -10, -3, 0, 0, 0, 3, 10, 3 };
78 if (direction == 0) { // x direction
79 filter = gx;
80 } else { // y direction
81 filter = gy;
82 }
83 int idx = 0;
84 double d = 0;
85 for (int yy = -1; yy <= 1; yy++) {
86 for (int xx = -1; xx <= 1; xx++) {
87 d += filter[idx] *
88 frame->y_buffer[(y_coord + yy) * frame->y_stride + (x_coord + xx)];
89 idx++;
90 }
91 }
92 // normalization scaling factor for scharr
93 *derivative = d / 32.0;
94 }
95
96 // Determine the spatial gradient at subpixel locations
97 // For example, when reducing images for pyramidal LK,
98 // corners found in original image may be at subpixel locations.
gradient_interp(double * fullpel_deriv,const double x_coord,const double y_coord,const int w,const int h,double * derivative)99 static void gradient_interp(double *fullpel_deriv, const double x_coord,
100 const double y_coord, const int w, const int h,
101 double *derivative) {
102 const int xint = (int)x_coord;
103 const int yint = (int)y_coord;
104 double interp;
105 if (xint + 1 > w - 1 || yint + 1 > h - 1) {
106 interp = fullpel_deriv[yint * w + xint];
107 } else {
108 interp = pixel_interp(x_coord, y_coord, fullpel_deriv[yint * w + xint],
109 fullpel_deriv[yint * w + (xint + 1)],
110 fullpel_deriv[(yint + 1) * w + xint],
111 fullpel_deriv[(yint + 1) * w + (xint + 1)]);
112 }
113
114 *derivative = interp;
115 }
116
temporal_gradient(const YV12_BUFFER_CONFIG * frame,const YV12_BUFFER_CONFIG * frame2,const double x_coord,const double y_coord,const int bit_depth,double * derivative,LOCALMV * mv)117 static void temporal_gradient(const YV12_BUFFER_CONFIG *frame,
118 const YV12_BUFFER_CONFIG *frame2,
119 const double x_coord, const double y_coord,
120 const int bit_depth, double *derivative,
121 LOCALMV *mv) {
122 const int w = 2;
123 const int h = 2;
124 uint8_t pred1[4];
125 uint8_t pred2[4];
126
127 const int y = (int)y_coord;
128 const int x = (int)x_coord;
129 const double ydec = y_coord - y;
130 const double xdec = x_coord - x;
131 const int is_intrabc = 0; // Is intra-copied?
132 const int is_high_bitdepth = is_frame_high_bitdepth(frame2);
133 const int subsampling_x = 0, subsampling_y = 0; // for y-buffer
134 const int_interpfilters interp_filters =
135 av1_broadcast_interp_filter(MULTITAP_SHARP);
136 const int plane = 0; // y-plane
137 const struct buf_2d ref_buf2 = { NULL, frame2->y_buffer, frame2->y_crop_width,
138 frame2->y_crop_height, frame2->y_stride };
139 struct scale_factors scale;
140 av1_setup_scale_factors_for_frame(&scale, frame->y_crop_width,
141 frame->y_crop_height, frame->y_crop_width,
142 frame->y_crop_height);
143 InterPredParams inter_pred_params;
144 av1_init_inter_params(&inter_pred_params, w, h, y, x, subsampling_x,
145 subsampling_y, bit_depth, is_high_bitdepth, is_intrabc,
146 &scale, &ref_buf2, interp_filters);
147 inter_pred_params.interp_filter_params[0] =
148 &av1_interp_filter_params_list[interp_filters.as_filters.x_filter];
149 inter_pred_params.interp_filter_params[1] =
150 &av1_interp_filter_params_list[interp_filters.as_filters.y_filter];
151 inter_pred_params.conv_params = get_conv_params(0, plane, bit_depth);
152 MV newmv = { .row = (int16_t)round((mv->row + xdec) * 8),
153 .col = (int16_t)round((mv->col + ydec) * 8) };
154 av1_enc_build_one_inter_predictor(pred2, w, &newmv, &inter_pred_params);
155 const struct buf_2d ref_buf1 = { NULL, frame->y_buffer, frame->y_crop_width,
156 frame->y_crop_height, frame->y_stride };
157 av1_init_inter_params(&inter_pred_params, w, h, y, x, subsampling_x,
158 subsampling_y, bit_depth, is_high_bitdepth, is_intrabc,
159 &scale, &ref_buf1, interp_filters);
160 inter_pred_params.interp_filter_params[0] =
161 &av1_interp_filter_params_list[interp_filters.as_filters.x_filter];
162 inter_pred_params.interp_filter_params[1] =
163 &av1_interp_filter_params_list[interp_filters.as_filters.y_filter];
164 inter_pred_params.conv_params = get_conv_params(0, plane, bit_depth);
165 MV zeroMV = { .row = (int16_t)round(xdec * 8),
166 .col = (int16_t)round(ydec * 8) };
167 av1_enc_build_one_inter_predictor(pred1, w, &zeroMV, &inter_pred_params);
168
169 *derivative = pred2[0] - pred1[0];
170 }
171
172 // Numerical differentiate over window_size x window_size surrounding (x,y)
173 // location. Alters ix, iy, it to contain numerical partial derivatives
gradients_over_window(const YV12_BUFFER_CONFIG * frame,const YV12_BUFFER_CONFIG * ref_frame,const double x_coord,const double y_coord,const int window_size,const int bit_depth,double * ix,double * iy,double * it,LOCALMV * mv)174 static void gradients_over_window(const YV12_BUFFER_CONFIG *frame,
175 const YV12_BUFFER_CONFIG *ref_frame,
176 const double x_coord, const double y_coord,
177 const int window_size, const int bit_depth,
178 double *ix, double *iy, double *it,
179 LOCALMV *mv) {
180 const double left = x_coord - window_size / 2.0;
181 const double top = y_coord - window_size / 2.0;
182 // gradient operators need pixel before and after (start at 1)
183 const double x_start = AOMMAX(1, left);
184 const double y_start = AOMMAX(1, top);
185 const int frame_height = frame->y_crop_height;
186 const int frame_width = frame->y_crop_width;
187 double deriv_x;
188 double deriv_y;
189 double deriv_t;
190
191 const double x_end = AOMMIN(x_coord + window_size / 2.0, frame_width - 2);
192 const double y_end = AOMMIN(y_coord + window_size / 2.0, frame_height - 2);
193 const int xs = (int)AOMMAX(1, x_start - 1);
194 const int ys = (int)AOMMAX(1, y_start - 1);
195 const int xe = (int)AOMMIN(x_end + 2, frame_width - 2);
196 const int ye = (int)AOMMIN(y_end + 2, frame_height - 2);
197 // with normalization, gradients may be double values
198 double *fullpel_dx = aom_malloc((ye - ys) * (xe - xs) * sizeof(deriv_x));
199 double *fullpel_dy = aom_malloc((ye - ys) * (xe - xs) * sizeof(deriv_y));
200 if (!fullpel_dx || !fullpel_dy) {
201 aom_free(fullpel_dx);
202 aom_free(fullpel_dy);
203 return;
204 }
205
206 // TODO(any): This could be more efficient in the case that x_coord
207 // and y_coord are integers.. but it may look more messy.
208
209 // calculate spatial gradients at full pixel locations
210 for (int j = ys; j < ye; j++) {
211 for (int i = xs; i < xe; i++) {
212 spatial_gradient(frame, i, j, 0, &deriv_x);
213 spatial_gradient(frame, i, j, 1, &deriv_y);
214 int idx = (j - ys) * (xe - xs) + (i - xs);
215 fullpel_dx[idx] = deriv_x;
216 fullpel_dy[idx] = deriv_y;
217 }
218 }
219 // compute numerical differentiation for every pixel in window
220 // (this potentially includes subpixels)
221 for (double j = y_start; j < y_end; j++) {
222 for (double i = x_start; i < x_end; i++) {
223 temporal_gradient(frame, ref_frame, i, j, bit_depth, &deriv_t, mv);
224 gradient_interp(fullpel_dx, i - xs, j - ys, xe - xs, ye - ys, &deriv_x);
225 gradient_interp(fullpel_dy, i - xs, j - ys, xe - xs, ye - ys, &deriv_y);
226 int idx = (int)(j - top) * window_size + (int)(i - left);
227 ix[idx] = deriv_x;
228 iy[idx] = deriv_y;
229 it[idx] = deriv_t;
230 }
231 }
232 // TODO(any): to avoid setting deriv arrays to zero for every iteration,
233 // could instead pass these two values back through function call
234 // int first_idx = (int)(y_start - top) * window_size + (int)(x_start - left);
235 // int width = window_size - ((int)(x_start - left) + (int)(left + window_size
236 // - x_end));
237
238 aom_free(fullpel_dx);
239 aom_free(fullpel_dy);
240 }
241
242 // To compute eigenvalues of 2x2 matrix: Solve for lambda where
243 // Determinant(matrix - lambda*identity) == 0
eigenvalues_2x2(const double * matrix,double * eig)244 static void eigenvalues_2x2(const double *matrix, double *eig) {
245 const double a = 1;
246 const double b = -1 * matrix[0] - matrix[3];
247 const double c = -1 * matrix[1] * matrix[2] + matrix[0] * matrix[3];
248 // quadratic formula
249 const double discriminant = b * b - 4 * a * c;
250 eig[0] = (-b - sqrt(discriminant)) / (2.0 * a);
251 eig[1] = (-b + sqrt(discriminant)) / (2.0 * a);
252 // double check that eigenvalues are ordered by magnitude
253 if (fabs(eig[0]) > fabs(eig[1])) {
254 double tmp = eig[0];
255 eig[0] = eig[1];
256 eig[1] = tmp;
257 }
258 }
259
260 // Shi-Tomasi corner detection criteria
corner_score(const YV12_BUFFER_CONFIG * frame_to_filter,const YV12_BUFFER_CONFIG * ref_frame,const int x,const int y,double * i_x,double * i_y,double * i_t,const int n,const int bit_depth)261 static double corner_score(const YV12_BUFFER_CONFIG *frame_to_filter,
262 const YV12_BUFFER_CONFIG *ref_frame, const int x,
263 const int y, double *i_x, double *i_y, double *i_t,
264 const int n, const int bit_depth) {
265 double eig[2];
266 LOCALMV mv = { .row = 0, .col = 0 };
267 // TODO(any): technically, ref_frame and i_t are not used by corner score
268 // so these could be replaced by dummy variables,
269 // or change this to spatial gradient function over window only
270 gradients_over_window(frame_to_filter, ref_frame, x, y, n, bit_depth, i_x,
271 i_y, i_t, &mv);
272 double Mres1[1] = { 0 }, Mres2[1] = { 0 }, Mres3[1] = { 0 };
273 multiply_mat(i_x, i_x, Mres1, 1, n * n, 1);
274 multiply_mat(i_x, i_y, Mres2, 1, n * n, 1);
275 multiply_mat(i_y, i_y, Mres3, 1, n * n, 1);
276 double M[4] = { Mres1[0], Mres2[0], Mres2[0], Mres3[0] };
277 eigenvalues_2x2(M, eig);
278 return fabs(eig[0]);
279 }
280
281 // Finds corners in frame_to_filter
282 // For less strict requirements (i.e. more corners), decrease threshold
detect_corners(const YV12_BUFFER_CONFIG * frame_to_filter,const YV12_BUFFER_CONFIG * ref_frame,const int maxcorners,int * ref_corners,const int bit_depth)283 static int detect_corners(const YV12_BUFFER_CONFIG *frame_to_filter,
284 const YV12_BUFFER_CONFIG *ref_frame,
285 const int maxcorners, int *ref_corners,
286 const int bit_depth) {
287 const int frame_height = frame_to_filter->y_crop_height;
288 const int frame_width = frame_to_filter->y_crop_width;
289 // TODO(any): currently if maxcorners is decreased, then it only means
290 // corners will be omited from bottom-right of image. if maxcorners
291 // is actually used, then this algorithm would need to re-iterate
292 // and choose threshold based on that
293 assert(maxcorners == frame_height * frame_width);
294 int countcorners = 0;
295 const double threshold = 0.1;
296 double score;
297 const int n = 3;
298 double i_x[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
299 double i_y[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
300 double i_t[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
301 const int fromedge = n;
302 double max_score = corner_score(frame_to_filter, ref_frame, fromedge,
303 fromedge, i_x, i_y, i_t, n, bit_depth);
304 // rough estimate of max corner score in image
305 for (int x = fromedge; x < frame_width - fromedge; x += 1) {
306 for (int y = fromedge; y < frame_height - fromedge; y += frame_height / 5) {
307 for (int i = 0; i < n * n; i++) {
308 i_x[i] = 0;
309 i_y[i] = 0;
310 i_t[i] = 0;
311 }
312 score = corner_score(frame_to_filter, ref_frame, x, y, i_x, i_y, i_t, n,
313 bit_depth);
314 if (score > max_score) {
315 max_score = score;
316 }
317 }
318 }
319 // score all the points and choose corners over threshold
320 for (int x = fromedge; x < frame_width - fromedge; x += 1) {
321 for (int y = fromedge;
322 (y < frame_height - fromedge) && countcorners < maxcorners; y += 1) {
323 for (int i = 0; i < n * n; i++) {
324 i_x[i] = 0;
325 i_y[i] = 0;
326 i_t[i] = 0;
327 }
328 score = corner_score(frame_to_filter, ref_frame, x, y, i_x, i_y, i_t, n,
329 bit_depth);
330 if (score > threshold * max_score) {
331 ref_corners[countcorners * 2] = x;
332 ref_corners[countcorners * 2 + 1] = y;
333 countcorners++;
334 }
335 }
336 }
337 return countcorners;
338 }
339
340 // weights is an nxn matrix. weights is filled with a gaussian function,
341 // with independent variable: distance from the center point.
gaussian(const double sigma,const int n,const int normalize,double * weights)342 static void gaussian(const double sigma, const int n, const int normalize,
343 double *weights) {
344 double total_weight = 0;
345 for (int j = 0; j < n; j++) {
346 for (int i = 0; i < n; i++) {
347 double distance = sqrt(pow(n / 2 - i, 2) + pow(n / 2 - j, 2));
348 double weight = exp(-0.5 * pow(distance / sigma, 2));
349 weights[j * n + i] = weight;
350 total_weight += weight;
351 }
352 }
353 if (normalize == 1) {
354 for (int j = 0; j < n; j++) {
355 weights[j] = weights[j] / total_weight;
356 }
357 }
358 }
359
convolve(const double * filter,const int * img,const int size)360 static double convolve(const double *filter, const int *img, const int size) {
361 double result = 0;
362 for (int i = 0; i < size; i++) {
363 result += filter[i] * img[i];
364 }
365 return result;
366 }
367
368 // Applies a Gaussian low-pass smoothing filter to produce
369 // a corresponding lower resolution image with halved dimensions
reduce(uint8_t * img,int height,int width,int stride,uint8_t * reduced_img)370 static void reduce(uint8_t *img, int height, int width, int stride,
371 uint8_t *reduced_img) {
372 const int new_width = width / 2;
373 const int window_size = 5;
374 const double gaussian_filter[25] = {
375 1. / 256, 1.0 / 64, 3. / 128, 1. / 64, 1. / 256, 1. / 64, 1. / 16,
376 3. / 32, 1. / 16, 1. / 64, 3. / 128, 3. / 32, 9. / 64, 3. / 32,
377 3. / 128, 1. / 64, 1. / 16, 3. / 32, 1. / 16, 1. / 64, 1. / 256,
378 1. / 64, 3. / 128, 1. / 64, 1. / 256
379 };
380 // filter is 5x5 so need prev and forward 2 pixels
381 int img_section[25];
382 for (int y = 0; y < height - 1; y += 2) {
383 for (int x = 0; x < width - 1; x += 2) {
384 int i = 0;
385 for (int yy = y - window_size / 2; yy <= y + window_size / 2; yy++) {
386 for (int xx = x - window_size / 2; xx <= x + window_size / 2; xx++) {
387 int yvalue = yy;
388 int xvalue = xx;
389 // copied pixels outside the boundary
390 if (yvalue < 0) yvalue = 0;
391 if (xvalue < 0) xvalue = 0;
392 if (yvalue >= height) yvalue = height - 1;
393 if (xvalue >= width) xvalue = width - 1;
394 img_section[i++] = img[yvalue * stride + xvalue];
395 }
396 }
397 reduced_img[(y / 2) * new_width + (x / 2)] = (uint8_t)convolve(
398 gaussian_filter, img_section, window_size * window_size);
399 }
400 }
401 }
402
cmpfunc(const void * a,const void * b)403 static int cmpfunc(const void *a, const void *b) {
404 return (*(int *)a - *(int *)b);
405 }
filter_mvs(const MV_FILTER_TYPE mv_filter,const int frame_height,const int frame_width,LOCALMV * localmvs,MV * mvs)406 static void filter_mvs(const MV_FILTER_TYPE mv_filter, const int frame_height,
407 const int frame_width, LOCALMV *localmvs, MV *mvs) {
408 const int n = 5; // window size
409 // for smoothing filter
410 const double gaussian_filter[25] = {
411 1. / 256, 1. / 64, 3. / 128, 1. / 64, 1. / 256, 1. / 64, 1. / 16,
412 3. / 32, 1. / 16, 1. / 64, 3. / 128, 3. / 32, 9. / 64, 3. / 32,
413 3. / 128, 1. / 64, 1. / 16, 3. / 32, 1. / 16, 1. / 64, 1. / 256,
414 1. / 64, 3. / 128, 1. / 64, 1. / 256
415 };
416 // for median filter
417 int mvrows[25];
418 int mvcols[25];
419 if (mv_filter != MV_FILTER_NONE) {
420 for (int y = 0; y < frame_height; y++) {
421 for (int x = 0; x < frame_width; x++) {
422 int center_idx = y * frame_width + x;
423 int i = 0;
424 double filtered_row = 0;
425 double filtered_col = 0;
426 for (int yy = y - n / 2; yy <= y + n / 2; yy++) {
427 for (int xx = x - n / 2; xx <= x + n / 2; xx++) {
428 int yvalue = yy;
429 int xvalue = xx;
430 // copied pixels outside the boundary
431 if (yvalue < 0) yvalue = 0;
432 if (xvalue < 0) xvalue = 0;
433 if (yvalue >= frame_height) yvalue = frame_height - 1;
434 if (xvalue >= frame_width) xvalue = frame_width - 1;
435 int index = yvalue * frame_width + xvalue;
436 if (mv_filter == MV_FILTER_SMOOTH) {
437 filtered_row += mvs[index].row * gaussian_filter[i];
438 filtered_col += mvs[index].col * gaussian_filter[i];
439 } else if (mv_filter == MV_FILTER_MEDIAN) {
440 mvrows[i] = mvs[index].row;
441 mvcols[i] = mvs[index].col;
442 }
443 i++;
444 }
445 }
446
447 MV mv = mvs[center_idx];
448 if (mv_filter == MV_FILTER_SMOOTH) {
449 mv.row = (int16_t)filtered_row;
450 mv.col = (int16_t)filtered_col;
451 } else if (mv_filter == MV_FILTER_MEDIAN) {
452 qsort(mvrows, 25, sizeof(mv.row), cmpfunc);
453 qsort(mvcols, 25, sizeof(mv.col), cmpfunc);
454 mv.row = mvrows[25 / 2];
455 mv.col = mvcols[25 / 2];
456 }
457 LOCALMV localmv = { .row = ((double)mv.row) / 8,
458 .col = ((double)mv.row) / 8 };
459 localmvs[y * frame_width + x] = localmv;
460 // if mvs array is immediately updated here, then the result may
461 // propagate to other pixels.
462 }
463 }
464 for (int i = 0; i < frame_height * frame_width; i++) {
465 MV mv = { .row = (int16_t)round(8 * localmvs[i].row),
466 .col = (int16_t)round(8 * localmvs[i].col) };
467 mvs[i] = mv;
468 }
469 }
470 }
471
472 // Computes optical flow at a single pyramid level,
473 // using Lucas-Kanade algorithm.
474 // Modifies mvs array.
lucas_kanade(const YV12_BUFFER_CONFIG * from_frame,const YV12_BUFFER_CONFIG * to_frame,const int level,const LK_PARAMS * lk_params,const int num_ref_corners,int * ref_corners,const int mv_stride,const int bit_depth,LOCALMV * mvs)475 static void lucas_kanade(const YV12_BUFFER_CONFIG *from_frame,
476 const YV12_BUFFER_CONFIG *to_frame, const int level,
477 const LK_PARAMS *lk_params, const int num_ref_corners,
478 int *ref_corners, const int mv_stride,
479 const int bit_depth, LOCALMV *mvs) {
480 assert(lk_params->window_size > 0 && lk_params->window_size % 2 == 0);
481 const int n = lk_params->window_size;
482 // algorithm is sensitive to window size
483 double *i_x = (double *)aom_malloc(n * n * sizeof(*i_x));
484 double *i_y = (double *)aom_malloc(n * n * sizeof(*i_y));
485 double *i_t = (double *)aom_malloc(n * n * sizeof(*i_t));
486 double *weights = (double *)aom_malloc(n * n * sizeof(*weights));
487 if (!i_x || !i_y || !i_t || !weights) goto free_lk_buf;
488
489 const int expand_multiplier = (int)pow(2, level);
490 double sigma = 0.2 * n;
491 // normalizing doesn't really affect anything since it's applied
492 // to every component of M and b
493 gaussian(sigma, n, 0, weights);
494 for (int i = 0; i < num_ref_corners; i++) {
495 const double x_coord = 1.0 * ref_corners[i * 2] / expand_multiplier;
496 const double y_coord = 1.0 * ref_corners[i * 2 + 1] / expand_multiplier;
497 int highres_x = ref_corners[i * 2];
498 int highres_y = ref_corners[i * 2 + 1];
499 int mv_idx = highres_y * (mv_stride) + highres_x;
500 LOCALMV mv_old = mvs[mv_idx];
501 mv_old.row = mv_old.row / expand_multiplier;
502 mv_old.col = mv_old.col / expand_multiplier;
503 // using this instead of memset, since it's not completely
504 // clear if zero memset works on double arrays
505 for (int j = 0; j < n * n; j++) {
506 i_x[j] = 0;
507 i_y[j] = 0;
508 i_t[j] = 0;
509 }
510 gradients_over_window(from_frame, to_frame, x_coord, y_coord, n, bit_depth,
511 i_x, i_y, i_t, &mv_old);
512 double Mres1[1] = { 0 }, Mres2[1] = { 0 }, Mres3[1] = { 0 };
513 double bres1[1] = { 0 }, bres2[1] = { 0 };
514 for (int j = 0; j < n * n; j++) {
515 Mres1[0] += weights[j] * i_x[j] * i_x[j];
516 Mres2[0] += weights[j] * i_x[j] * i_y[j];
517 Mres3[0] += weights[j] * i_y[j] * i_y[j];
518 bres1[0] += weights[j] * i_x[j] * i_t[j];
519 bres2[0] += weights[j] * i_y[j] * i_t[j];
520 }
521 double M[4] = { Mres1[0], Mres2[0], Mres2[0], Mres3[0] };
522 double b[2] = { -1 * bres1[0], -1 * bres2[0] };
523 double eig[2] = { 1, 1 };
524 eigenvalues_2x2(M, eig);
525 double threshold = 0.1;
526 if (fabs(eig[0]) > threshold) {
527 // if M is not invertible, then displacement
528 // will default to zeros
529 double u[2] = { 0, 0 };
530 linsolve(2, M, 2, b, u);
531 int mult = 1;
532 if (level != 0)
533 mult = expand_multiplier; // mv doubles when resolution doubles
534 LOCALMV mv = { .row = (mult * (u[0] + mv_old.row)),
535 .col = (mult * (u[1] + mv_old.col)) };
536 mvs[mv_idx] = mv;
537 mvs[mv_idx] = mv;
538 }
539 }
540 free_lk_buf:
541 aom_free(weights);
542 aom_free(i_t);
543 aom_free(i_x);
544 aom_free(i_y);
545 }
546
547 // Warp the src_frame to warper_frame according to mvs.
548 // mvs point to src_frame
warp_back_frame(YV12_BUFFER_CONFIG * warped_frame,const YV12_BUFFER_CONFIG * src_frame,const LOCALMV * mvs,int mv_stride)549 static void warp_back_frame(YV12_BUFFER_CONFIG *warped_frame,
550 const YV12_BUFFER_CONFIG *src_frame,
551 const LOCALMV *mvs, int mv_stride) {
552 int w, h;
553 const int fw = src_frame->y_crop_width;
554 const int fh = src_frame->y_crop_height;
555 const int src_fs = src_frame->y_stride, warped_fs = warped_frame->y_stride;
556 const uint8_t *src_buf = src_frame->y_buffer;
557 uint8_t *warped_buf = warped_frame->y_buffer;
558 double temp;
559 for (h = 0; h < fh; h++) {
560 for (w = 0; w < fw; w++) {
561 double cord_x = (double)w + mvs[h * mv_stride + w].col;
562 double cord_y = (double)h + mvs[h * mv_stride + w].row;
563 cord_x = fclamp(cord_x, 0, (double)(fw - 1));
564 cord_y = fclamp(cord_y, 0, (double)(fh - 1));
565 const int floorx = (int)floor(cord_x);
566 const int floory = (int)floor(cord_y);
567 const double fracx = cord_x - (double)floorx;
568 const double fracy = cord_y - (double)floory;
569
570 temp = 0;
571 for (int hh = 0; hh < 2; hh++) {
572 const double weighth = hh ? (fracy) : (1 - fracy);
573 for (int ww = 0; ww < 2; ww++) {
574 const double weightw = ww ? (fracx) : (1 - fracx);
575 int y = floory + hh;
576 int x = floorx + ww;
577 y = clamp(y, 0, fh - 1);
578 x = clamp(x, 0, fw - 1);
579 temp += (double)src_buf[y * src_fs + x] * weightw * weighth;
580 }
581 }
582 warped_buf[h * warped_fs + w] = (uint8_t)round(temp);
583 }
584 }
585 }
586
587 // Same as warp_back_frame, but using a better interpolation filter.
warp_back_frame_intp(YV12_BUFFER_CONFIG * warped_frame,const YV12_BUFFER_CONFIG * src_frame,const LOCALMV * mvs,int mv_stride)588 static void warp_back_frame_intp(YV12_BUFFER_CONFIG *warped_frame,
589 const YV12_BUFFER_CONFIG *src_frame,
590 const LOCALMV *mvs, int mv_stride) {
591 int w, h;
592 const int fw = src_frame->y_crop_width;
593 const int fh = src_frame->y_crop_height;
594 const int warped_fs = warped_frame->y_stride;
595 uint8_t *warped_buf = warped_frame->y_buffer;
596 const int blk = 2;
597 uint8_t temp_blk[4];
598
599 const int is_intrabc = 0; // Is intra-copied?
600 const int is_high_bitdepth = is_frame_high_bitdepth(src_frame);
601 const int subsampling_x = 0, subsampling_y = 0; // for y-buffer
602 const int_interpfilters interp_filters =
603 av1_broadcast_interp_filter(MULTITAP_SHARP2);
604 const int plane = 0; // y-plane
605 const struct buf_2d ref_buf2 = { NULL, src_frame->y_buffer,
606 src_frame->y_crop_width,
607 src_frame->y_crop_height,
608 src_frame->y_stride };
609 const int bit_depth = src_frame->bit_depth;
610 struct scale_factors scale;
611 av1_setup_scale_factors_for_frame(
612 &scale, src_frame->y_crop_width, src_frame->y_crop_height,
613 src_frame->y_crop_width, src_frame->y_crop_height);
614
615 for (h = 0; h < fh; h++) {
616 for (w = 0; w < fw; w++) {
617 InterPredParams inter_pred_params;
618 av1_init_inter_params(&inter_pred_params, blk, blk, h, w, subsampling_x,
619 subsampling_y, bit_depth, is_high_bitdepth,
620 is_intrabc, &scale, &ref_buf2, interp_filters);
621 inter_pred_params.interp_filter_params[0] =
622 &av1_interp_filter_params_list[interp_filters.as_filters.x_filter];
623 inter_pred_params.interp_filter_params[1] =
624 &av1_interp_filter_params_list[interp_filters.as_filters.y_filter];
625 inter_pred_params.conv_params = get_conv_params(0, plane, bit_depth);
626 MV newmv = { .row = (int16_t)round((mvs[h * mv_stride + w].row) * 8),
627 .col = (int16_t)round((mvs[h * mv_stride + w].col) * 8) };
628 av1_enc_build_one_inter_predictor(temp_blk, blk, &newmv,
629 &inter_pred_params);
630 warped_buf[h * warped_fs + w] = temp_blk[0];
631 }
632 }
633 }
634
635 #define DERIVATIVE_FILTER_LENGTH 7
636 double filter[DERIVATIVE_FILTER_LENGTH] = { -1.0 / 60, 9.0 / 60, -45.0 / 60, 0,
637 45.0 / 60, -9.0 / 60, 1.0 / 60 };
638
639 // Get gradient of the whole frame
get_frame_gradients(const YV12_BUFFER_CONFIG * from_frame,const YV12_BUFFER_CONFIG * to_frame,double * ix,double * iy,double * it,int grad_stride)640 static void get_frame_gradients(const YV12_BUFFER_CONFIG *from_frame,
641 const YV12_BUFFER_CONFIG *to_frame, double *ix,
642 double *iy, double *it, int grad_stride) {
643 int w, h, k, idx;
644 const int fw = from_frame->y_crop_width;
645 const int fh = from_frame->y_crop_height;
646 const int from_fs = from_frame->y_stride, to_fs = to_frame->y_stride;
647 const uint8_t *from_buf = from_frame->y_buffer;
648 const uint8_t *to_buf = to_frame->y_buffer;
649
650 const int lh = DERIVATIVE_FILTER_LENGTH;
651 const int hleft = (lh - 1) / 2;
652
653 for (h = 0; h < fh; h++) {
654 for (w = 0; w < fw; w++) {
655 // x
656 ix[h * grad_stride + w] = 0;
657 for (k = 0; k < lh; k++) {
658 // if we want to make this block dependent, need to extend the
659 // boundaries using other initializations.
660 idx = w + k - hleft;
661 idx = clamp(idx, 0, fw - 1);
662 ix[h * grad_stride + w] += filter[k] * 0.5 *
663 ((double)from_buf[h * from_fs + idx] +
664 (double)to_buf[h * to_fs + idx]);
665 }
666 // y
667 iy[h * grad_stride + w] = 0;
668 for (k = 0; k < lh; k++) {
669 // if we want to make this block dependent, need to extend the
670 // boundaries using other initializations.
671 idx = h + k - hleft;
672 idx = clamp(idx, 0, fh - 1);
673 iy[h * grad_stride + w] += filter[k] * 0.5 *
674 ((double)from_buf[idx * from_fs + w] +
675 (double)to_buf[idx * to_fs + w]);
676 }
677 // t
678 it[h * grad_stride + w] =
679 (double)to_buf[h * to_fs + w] - (double)from_buf[h * from_fs + w];
680 }
681 }
682 }
683
684 // Solve for linear equations given by the H-S method
solve_horn_schunck(const double * ix,const double * iy,const double * it,int grad_stride,int width,int height,const LOCALMV * init_mvs,int init_mv_stride,LOCALMV * mvs,int mv_stride)685 static void solve_horn_schunck(const double *ix, const double *iy,
686 const double *it, int grad_stride, int width,
687 int height, const LOCALMV *init_mvs,
688 int init_mv_stride, LOCALMV *mvs,
689 int mv_stride) {
690 // TODO(bohanli): May just need to allocate the buffers once per optical flow
691 // calculation
692 int *row_pos = aom_calloc(width * height * 28, sizeof(*row_pos));
693 int *col_pos = aom_calloc(width * height * 28, sizeof(*col_pos));
694 double *values = aom_calloc(width * height * 28, sizeof(*values));
695 double *mv_vec = aom_calloc(width * height * 2, sizeof(*mv_vec));
696 double *mv_init_vec = aom_calloc(width * height * 2, sizeof(*mv_init_vec));
697 double *temp_b = aom_calloc(width * height * 2, sizeof(*temp_b));
698 double *b = aom_calloc(width * height * 2, sizeof(*b));
699 if (!row_pos || !col_pos || !values || !mv_vec || !mv_init_vec || !temp_b ||
700 !b) {
701 goto free_hs_solver_buf;
702 }
703
704 // the location idx for neighboring pixels, k < 4 are the 4 direct neighbors
705 const int check_locs_y[12] = { 0, 0, -1, 1, -1, -1, 1, 1, 0, 0, -2, 2 };
706 const int check_locs_x[12] = { -1, 1, 0, 0, -1, 1, -1, 1, -2, 2, 0, 0 };
707
708 int h, w, checkh, checkw, k, ret;
709 const int offset = height * width;
710 SPARSE_MTX A;
711 int c = 0;
712 const double lambda = 100;
713
714 for (w = 0; w < width; w++) {
715 for (h = 0; h < height; h++) {
716 mv_init_vec[w * height + h] = init_mvs[h * init_mv_stride + w].col;
717 mv_init_vec[w * height + h + offset] =
718 init_mvs[h * init_mv_stride + w].row;
719 }
720 }
721
722 // get matrix A
723 for (w = 0; w < width; w++) {
724 for (h = 0; h < height; h++) {
725 int center_num_direct = 4;
726 const int center_idx = w * height + h;
727 if (w == 0 || w == width - 1) center_num_direct--;
728 if (h == 0 || h == height - 1) center_num_direct--;
729 // diagonal entry for this row from the center pixel
730 double cor_w = center_num_direct * center_num_direct + center_num_direct;
731 row_pos[c] = center_idx;
732 col_pos[c] = center_idx;
733 values[c] = lambda * cor_w;
734 c++;
735 row_pos[c] = center_idx + offset;
736 col_pos[c] = center_idx + offset;
737 values[c] = lambda * cor_w;
738 c++;
739 // other entries from direct neighbors
740 for (k = 0; k < 4; k++) {
741 checkh = h + check_locs_y[k];
742 checkw = w + check_locs_x[k];
743 if (checkh < 0 || checkh >= height || checkw < 0 || checkw >= width) {
744 continue;
745 }
746 int this_idx = checkw * height + checkh;
747 int this_num_direct = 4;
748 if (checkw == 0 || checkw == width - 1) this_num_direct--;
749 if (checkh == 0 || checkh == height - 1) this_num_direct--;
750 cor_w = -center_num_direct - this_num_direct;
751 row_pos[c] = center_idx;
752 col_pos[c] = this_idx;
753 values[c] = lambda * cor_w;
754 c++;
755 row_pos[c] = center_idx + offset;
756 col_pos[c] = this_idx + offset;
757 values[c] = lambda * cor_w;
758 c++;
759 }
760 // entries from neighbors on the diagonal corners
761 for (k = 4; k < 8; k++) {
762 checkh = h + check_locs_y[k];
763 checkw = w + check_locs_x[k];
764 if (checkh < 0 || checkh >= height || checkw < 0 || checkw >= width) {
765 continue;
766 }
767 int this_idx = checkw * height + checkh;
768 cor_w = 2;
769 row_pos[c] = center_idx;
770 col_pos[c] = this_idx;
771 values[c] = lambda * cor_w;
772 c++;
773 row_pos[c] = center_idx + offset;
774 col_pos[c] = this_idx + offset;
775 values[c] = lambda * cor_w;
776 c++;
777 }
778 // entries from neighbors with dist of 2
779 for (k = 8; k < 12; k++) {
780 checkh = h + check_locs_y[k];
781 checkw = w + check_locs_x[k];
782 if (checkh < 0 || checkh >= height || checkw < 0 || checkw >= width) {
783 continue;
784 }
785 int this_idx = checkw * height + checkh;
786 cor_w = 1;
787 row_pos[c] = center_idx;
788 col_pos[c] = this_idx;
789 values[c] = lambda * cor_w;
790 c++;
791 row_pos[c] = center_idx + offset;
792 col_pos[c] = this_idx + offset;
793 values[c] = lambda * cor_w;
794 c++;
795 }
796 }
797 }
798 ret = av1_init_sparse_mtx(row_pos, col_pos, values, c, 2 * width * height,
799 2 * width * height, &A);
800 if (ret < 0) goto free_hs_solver_buf;
801 // subtract init mv part from b
802 av1_mtx_vect_multi_left(&A, mv_init_vec, temp_b, 2 * width * height);
803 for (int i = 0; i < 2 * width * height; i++) {
804 b[i] = -temp_b[i];
805 }
806 av1_free_sparse_mtx_elems(&A);
807
808 // add cross terms to A and modify b with ExEt / EyEt
809 for (w = 0; w < width; w++) {
810 for (h = 0; h < height; h++) {
811 int curidx = w * height + h;
812 // modify b
813 b[curidx] += -ix[h * grad_stride + w] * it[h * grad_stride + w];
814 b[curidx + offset] += -iy[h * grad_stride + w] * it[h * grad_stride + w];
815 // add cross terms to A
816 row_pos[c] = curidx;
817 col_pos[c] = curidx + offset;
818 values[c] = ix[h * grad_stride + w] * iy[h * grad_stride + w];
819 c++;
820 row_pos[c] = curidx + offset;
821 col_pos[c] = curidx;
822 values[c] = ix[h * grad_stride + w] * iy[h * grad_stride + w];
823 c++;
824 }
825 }
826 // Add diagonal terms to A
827 for (int i = 0; i < c; i++) {
828 if (row_pos[i] == col_pos[i]) {
829 if (row_pos[i] < offset) {
830 w = row_pos[i] / height;
831 h = row_pos[i] % height;
832 values[i] += pow(ix[h * grad_stride + w], 2);
833 } else {
834 w = (row_pos[i] - offset) / height;
835 h = (row_pos[i] - offset) % height;
836 values[i] += pow(iy[h * grad_stride + w], 2);
837 }
838 }
839 }
840
841 ret = av1_init_sparse_mtx(row_pos, col_pos, values, c, 2 * width * height,
842 2 * width * height, &A);
843 if (ret < 0) goto free_hs_solver_buf;
844
845 // solve for the mvs
846 ret = av1_conjugate_gradient_sparse(&A, b, 2 * width * height, mv_vec);
847 if (ret < 0) goto free_hs_solver_buf;
848
849 // copy mvs
850 for (w = 0; w < width; w++) {
851 for (h = 0; h < height; h++) {
852 mvs[h * mv_stride + w].col = mv_vec[w * height + h];
853 mvs[h * mv_stride + w].row = mv_vec[w * height + h + offset];
854 }
855 }
856 free_hs_solver_buf:
857 aom_free(row_pos);
858 aom_free(col_pos);
859 aom_free(values);
860 aom_free(mv_vec);
861 aom_free(mv_init_vec);
862 aom_free(b);
863 aom_free(temp_b);
864 av1_free_sparse_mtx_elems(&A);
865 }
866
867 // Calculate optical flow from from_frame to to_frame using the H-S method.
horn_schunck(const YV12_BUFFER_CONFIG * from_frame,const YV12_BUFFER_CONFIG * to_frame,const int level,const int mv_stride,const int mv_height,const int mv_width,const OPFL_PARAMS * opfl_params,LOCALMV * mvs)868 static void horn_schunck(const YV12_BUFFER_CONFIG *from_frame,
869 const YV12_BUFFER_CONFIG *to_frame, const int level,
870 const int mv_stride, const int mv_height,
871 const int mv_width, const OPFL_PARAMS *opfl_params,
872 LOCALMV *mvs) {
873 // mvs are always on level 0, here we define two new mv arrays that is of size
874 // of this level.
875 const int fw = from_frame->y_crop_width;
876 const int fh = from_frame->y_crop_height;
877 const int factor = (int)pow(2, level);
878 int w, h, k, init_mv_stride;
879 LOCALMV *init_mvs = NULL, *refine_mvs = NULL;
880 double *ix = NULL, *iy = NULL, *it = NULL;
881 YV12_BUFFER_CONFIG temp_frame;
882 temp_frame.y_buffer = NULL;
883 if (level == 0) {
884 init_mvs = mvs;
885 init_mv_stride = mv_stride;
886 } else {
887 init_mvs = aom_calloc(fw * fh, sizeof(*mvs));
888 if (!init_mvs) goto free_hs_buf;
889 init_mv_stride = fw;
890 for (h = 0; h < fh; h++) {
891 for (w = 0; w < fw; w++) {
892 init_mvs[h * init_mv_stride + w].row =
893 mvs[h * factor * mv_stride + w * factor].row / (double)factor;
894 init_mvs[h * init_mv_stride + w].col =
895 mvs[h * factor * mv_stride + w * factor].col / (double)factor;
896 }
897 }
898 }
899 refine_mvs = aom_calloc(fw * fh, sizeof(*mvs));
900 if (!refine_mvs) goto free_hs_buf;
901 // temp frame for warping
902 temp_frame.y_buffer =
903 (uint8_t *)aom_calloc(fh * fw, sizeof(*temp_frame.y_buffer));
904 if (!temp_frame.y_buffer) goto free_hs_buf;
905 temp_frame.y_crop_height = fh;
906 temp_frame.y_crop_width = fw;
907 temp_frame.y_stride = fw;
908 // gradient buffers
909 ix = aom_calloc(fw * fh, sizeof(*ix));
910 iy = aom_calloc(fw * fh, sizeof(*iy));
911 it = aom_calloc(fw * fh, sizeof(*it));
912 if (!ix || !iy || !it) goto free_hs_buf;
913 // For each warping step
914 for (k = 0; k < opfl_params->warping_steps; k++) {
915 // warp from_frame with init_mv
916 if (level == 0) {
917 warp_back_frame_intp(&temp_frame, to_frame, init_mvs, init_mv_stride);
918 } else {
919 warp_back_frame(&temp_frame, to_frame, init_mvs, init_mv_stride);
920 }
921 // calculate frame gradients
922 get_frame_gradients(from_frame, &temp_frame, ix, iy, it, fw);
923 // form linear equations and solve mvs
924 solve_horn_schunck(ix, iy, it, fw, fw, fh, init_mvs, init_mv_stride,
925 refine_mvs, fw);
926 // update init_mvs
927 for (h = 0; h < fh; h++) {
928 for (w = 0; w < fw; w++) {
929 init_mvs[h * init_mv_stride + w].col += refine_mvs[h * fw + w].col;
930 init_mvs[h * init_mv_stride + w].row += refine_mvs[h * fw + w].row;
931 }
932 }
933 }
934 // copy back the mvs if needed
935 if (level != 0) {
936 for (h = 0; h < mv_height; h++) {
937 for (w = 0; w < mv_width; w++) {
938 mvs[h * mv_stride + w].row =
939 init_mvs[h / factor * init_mv_stride + w / factor].row *
940 (double)factor;
941 mvs[h * mv_stride + w].col =
942 init_mvs[h / factor * init_mv_stride + w / factor].col *
943 (double)factor;
944 }
945 }
946 }
947 free_hs_buf:
948 if (level != 0) aom_free(init_mvs);
949 aom_free(refine_mvs);
950 aom_free(temp_frame.y_buffer);
951 aom_free(ix);
952 aom_free(iy);
953 aom_free(it);
954 }
955
956 // Apply optical flow iteratively at each pyramid level
pyramid_optical_flow(const YV12_BUFFER_CONFIG * from_frame,const YV12_BUFFER_CONFIG * to_frame,const int bit_depth,const OPFL_PARAMS * opfl_params,const OPTFLOW_METHOD method,LOCALMV * mvs)957 static void pyramid_optical_flow(const YV12_BUFFER_CONFIG *from_frame,
958 const YV12_BUFFER_CONFIG *to_frame,
959 const int bit_depth,
960 const OPFL_PARAMS *opfl_params,
961 const OPTFLOW_METHOD method, LOCALMV *mvs) {
962 assert(opfl_params->pyramid_levels > 0 &&
963 opfl_params->pyramid_levels <= MAX_PYRAMID_LEVELS);
964 int levels = opfl_params->pyramid_levels;
965 const int frame_height = from_frame->y_crop_height;
966 const int frame_width = from_frame->y_crop_width;
967 if ((frame_height / pow(2.0, levels - 1) < 50 ||
968 frame_height / pow(2.0, levels - 1) < 50) &&
969 levels > 1)
970 levels = levels - 1;
971 uint8_t *images1[MAX_PYRAMID_LEVELS] = { NULL };
972 uint8_t *images2[MAX_PYRAMID_LEVELS] = { NULL };
973 int *ref_corners = NULL;
974
975 images1[0] = from_frame->y_buffer;
976 images2[0] = to_frame->y_buffer;
977 YV12_BUFFER_CONFIG *buffers1 = aom_malloc(levels * sizeof(*buffers1));
978 YV12_BUFFER_CONFIG *buffers2 = aom_malloc(levels * sizeof(*buffers2));
979 if (!buffers1 || !buffers2) goto free_pyramid_buf;
980 buffers1[0] = *from_frame;
981 buffers2[0] = *to_frame;
982 int fw = frame_width;
983 int fh = frame_height;
984 for (int i = 1; i < levels; i++) {
985 // TODO(bohanli): may need to extend buffers for better interpolation SIMD
986 images1[i] = (uint8_t *)aom_calloc(fh / 2 * fw / 2, sizeof(*images1[i]));
987 images2[i] = (uint8_t *)aom_calloc(fh / 2 * fw / 2, sizeof(*images2[i]));
988 if (!images1[i] || !images2[i]) goto free_pyramid_buf;
989 int stride;
990 if (i == 1)
991 stride = from_frame->y_stride;
992 else
993 stride = fw;
994 reduce(images1[i - 1], fh, fw, stride, images1[i]);
995 reduce(images2[i - 1], fh, fw, stride, images2[i]);
996 fh /= 2;
997 fw /= 2;
998 YV12_BUFFER_CONFIG a = { .y_buffer = images1[i],
999 .y_crop_width = fw,
1000 .y_crop_height = fh,
1001 .y_stride = fw };
1002 YV12_BUFFER_CONFIG b = { .y_buffer = images2[i],
1003 .y_crop_width = fw,
1004 .y_crop_height = fh,
1005 .y_stride = fw };
1006 buffers1[i] = a;
1007 buffers2[i] = b;
1008 }
1009 // Compute corners for specific frame
1010 int num_ref_corners = 0;
1011 if (is_sparse(opfl_params)) {
1012 int maxcorners = from_frame->y_crop_width * from_frame->y_crop_height;
1013 ref_corners = aom_malloc(maxcorners * 2 * sizeof(*ref_corners));
1014 if (!ref_corners) goto free_pyramid_buf;
1015 num_ref_corners = detect_corners(from_frame, to_frame, maxcorners,
1016 ref_corners, bit_depth);
1017 }
1018 const int stop_level = 0;
1019 for (int i = levels - 1; i >= stop_level; i--) {
1020 if (method == LUCAS_KANADE) {
1021 assert(is_sparse(opfl_params));
1022 lucas_kanade(&buffers1[i], &buffers2[i], i, opfl_params->lk_params,
1023 num_ref_corners, ref_corners, buffers1[0].y_crop_width,
1024 bit_depth, mvs);
1025 } else if (method == HORN_SCHUNCK) {
1026 assert(!is_sparse(opfl_params));
1027 horn_schunck(&buffers1[i], &buffers2[i], i, buffers1[0].y_crop_width,
1028 buffers1[0].y_crop_height, buffers1[0].y_crop_width,
1029 opfl_params, mvs);
1030 }
1031 }
1032 free_pyramid_buf:
1033 for (int i = 1; i < levels; i++) {
1034 aom_free(images1[i]);
1035 aom_free(images2[i]);
1036 }
1037 aom_free(ref_corners);
1038 aom_free(buffers1);
1039 aom_free(buffers2);
1040 }
1041 // Computes optical flow by applying algorithm at
1042 // multiple pyramid levels of images (lower-resolution, smoothed images)
1043 // This accounts for larger motions.
1044 // Inputs:
1045 // from_frame Frame buffer.
1046 // to_frame: Frame buffer. MVs point from_frame -> to_frame.
1047 // from_frame_idx: Index of from_frame.
1048 // to_frame_idx: Index of to_frame. Return all zero MVs when idx are equal.
1049 // bit_depth:
1050 // opfl_params: contains algorithm-specific parameters.
1051 // mv_filter: MV_FILTER_NONE, MV_FILTER_SMOOTH, or MV_FILTER_MEDIAN.
1052 // method: LUCAS_KANADE, HORN_SCHUNCK
1053 // mvs: pointer to MVs. Contains initialization, and modified
1054 // based on optical flow. Must have
1055 // dimensions = from_frame->y_crop_width * from_frame->y_crop_height
av1_optical_flow(const YV12_BUFFER_CONFIG * from_frame,const YV12_BUFFER_CONFIG * to_frame,const int from_frame_idx,const int to_frame_idx,const int bit_depth,const OPFL_PARAMS * opfl_params,const MV_FILTER_TYPE mv_filter,const OPTFLOW_METHOD method,MV * mvs)1056 void av1_optical_flow(const YV12_BUFFER_CONFIG *from_frame,
1057 const YV12_BUFFER_CONFIG *to_frame,
1058 const int from_frame_idx, const int to_frame_idx,
1059 const int bit_depth, const OPFL_PARAMS *opfl_params,
1060 const MV_FILTER_TYPE mv_filter,
1061 const OPTFLOW_METHOD method, MV *mvs) {
1062 const int frame_height = from_frame->y_crop_height;
1063 const int frame_width = from_frame->y_crop_width;
1064 // TODO(any): deal with the case where frames are not of the same dimensions
1065 assert(frame_height == to_frame->y_crop_height &&
1066 frame_width == to_frame->y_crop_width);
1067 if (from_frame_idx == to_frame_idx) {
1068 // immediately return all zero mvs when frame indices are equal
1069 for (int yy = 0; yy < frame_height; yy++) {
1070 for (int xx = 0; xx < frame_width; xx++) {
1071 MV mv = { .row = 0, .col = 0 };
1072 mvs[yy * frame_width + xx] = mv;
1073 }
1074 }
1075 return;
1076 }
1077
1078 // Initialize double mvs based on input parameter mvs array
1079 LOCALMV *localmvs =
1080 aom_malloc(frame_height * frame_width * sizeof(*localmvs));
1081 if (!localmvs) return;
1082
1083 filter_mvs(MV_FILTER_SMOOTH, frame_height, frame_width, localmvs, mvs);
1084
1085 for (int i = 0; i < frame_width * frame_height; i++) {
1086 MV mv = mvs[i];
1087 LOCALMV localmv = { .row = ((double)mv.row) / 8,
1088 .col = ((double)mv.col) / 8 };
1089 localmvs[i] = localmv;
1090 }
1091 // Apply optical flow algorithm
1092 pyramid_optical_flow(from_frame, to_frame, bit_depth, opfl_params, method,
1093 localmvs);
1094
1095 // Update original mvs array
1096 for (int j = 0; j < frame_height; j++) {
1097 for (int i = 0; i < frame_width; i++) {
1098 int idx = j * frame_width + i;
1099 if (j + localmvs[idx].row < 0 || j + localmvs[idx].row >= frame_height ||
1100 i + localmvs[idx].col < 0 || i + localmvs[idx].col >= frame_width) {
1101 continue;
1102 }
1103 MV mv = { .row = (int16_t)round(8 * localmvs[idx].row),
1104 .col = (int16_t)round(8 * localmvs[idx].col) };
1105 mvs[idx] = mv;
1106 }
1107 }
1108
1109 filter_mvs(mv_filter, frame_height, frame_width, localmvs, mvs);
1110
1111 aom_free(localmvs);
1112 }
1113 #endif
1114