xref: /aosp_15_r20/external/libgav1/src/tile/prediction.cc (revision 095378508e87ed692bf8dfeb34008b65b3735891)
1*09537850SAkhilesh Sanikop // Copyright 2019 The libgav1 Authors
2*09537850SAkhilesh Sanikop //
3*09537850SAkhilesh Sanikop // Licensed under the Apache License, Version 2.0 (the "License");
4*09537850SAkhilesh Sanikop // you may not use this file except in compliance with the License.
5*09537850SAkhilesh Sanikop // You may obtain a copy of the License at
6*09537850SAkhilesh Sanikop //
7*09537850SAkhilesh Sanikop //      http://www.apache.org/licenses/LICENSE-2.0
8*09537850SAkhilesh Sanikop //
9*09537850SAkhilesh Sanikop // Unless required by applicable law or agreed to in writing, software
10*09537850SAkhilesh Sanikop // distributed under the License is distributed on an "AS IS" BASIS,
11*09537850SAkhilesh Sanikop // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*09537850SAkhilesh Sanikop // See the License for the specific language governing permissions and
13*09537850SAkhilesh Sanikop // limitations under the License.
14*09537850SAkhilesh Sanikop 
15*09537850SAkhilesh Sanikop #include <algorithm>
16*09537850SAkhilesh Sanikop #include <array>
17*09537850SAkhilesh Sanikop #include <cassert>
18*09537850SAkhilesh Sanikop #include <cstddef>
19*09537850SAkhilesh Sanikop #include <cstdint>
20*09537850SAkhilesh Sanikop #include <cstdlib>
21*09537850SAkhilesh Sanikop #include <cstring>
22*09537850SAkhilesh Sanikop #include <memory>
23*09537850SAkhilesh Sanikop 
24*09537850SAkhilesh Sanikop #include "src/buffer_pool.h"
25*09537850SAkhilesh Sanikop #include "src/dsp/constants.h"
26*09537850SAkhilesh Sanikop #include "src/dsp/dsp.h"
27*09537850SAkhilesh Sanikop #include "src/motion_vector.h"
28*09537850SAkhilesh Sanikop #include "src/obu_parser.h"
29*09537850SAkhilesh Sanikop #include "src/prediction_mask.h"
30*09537850SAkhilesh Sanikop #include "src/tile.h"
31*09537850SAkhilesh Sanikop #include "src/utils/array_2d.h"
32*09537850SAkhilesh Sanikop #include "src/utils/bit_mask_set.h"
33*09537850SAkhilesh Sanikop #include "src/utils/block_parameters_holder.h"
34*09537850SAkhilesh Sanikop #include "src/utils/common.h"
35*09537850SAkhilesh Sanikop #include "src/utils/constants.h"
36*09537850SAkhilesh Sanikop #include "src/utils/logging.h"
37*09537850SAkhilesh Sanikop #include "src/utils/memory.h"
38*09537850SAkhilesh Sanikop #include "src/utils/types.h"
39*09537850SAkhilesh Sanikop #include "src/warp_prediction.h"
40*09537850SAkhilesh Sanikop #include "src/yuv_buffer.h"
41*09537850SAkhilesh Sanikop 
42*09537850SAkhilesh Sanikop namespace libgav1 {
43*09537850SAkhilesh Sanikop namespace {
44*09537850SAkhilesh Sanikop 
45*09537850SAkhilesh Sanikop // Import all the constants in the anonymous namespace.
46*09537850SAkhilesh Sanikop #include "src/inter_intra_masks.inc"
47*09537850SAkhilesh Sanikop 
48*09537850SAkhilesh Sanikop // Precision bits when scaling reference frames.
49*09537850SAkhilesh Sanikop constexpr int kReferenceScaleShift = 14;
50*09537850SAkhilesh Sanikop constexpr int kAngleStep = 3;
51*09537850SAkhilesh Sanikop constexpr int kPredictionModeToAngle[kIntraPredictionModesUV] = {
52*09537850SAkhilesh Sanikop     0, 90, 180, 45, 135, 113, 157, 203, 67, 0, 0, 0, 0};
53*09537850SAkhilesh Sanikop 
54*09537850SAkhilesh Sanikop // The following modes need both the left_column and top_row for intra
55*09537850SAkhilesh Sanikop // prediction. For directional modes left/top requirement is inferred based on
56*09537850SAkhilesh Sanikop // the prediction angle. For Dc modes, left/top requirement is inferred based on
57*09537850SAkhilesh Sanikop // whether or not left/top is available.
58*09537850SAkhilesh Sanikop constexpr BitMaskSet kNeedsLeftAndTop(kPredictionModeSmooth,
59*09537850SAkhilesh Sanikop                                       kPredictionModeSmoothHorizontal,
60*09537850SAkhilesh Sanikop                                       kPredictionModeSmoothVertical,
61*09537850SAkhilesh Sanikop                                       kPredictionModePaeth);
62*09537850SAkhilesh Sanikop 
GetDirectionalIntraPredictorDerivative(const int angle)63*09537850SAkhilesh Sanikop int16_t GetDirectionalIntraPredictorDerivative(const int angle) {
64*09537850SAkhilesh Sanikop   assert(angle >= 3);
65*09537850SAkhilesh Sanikop   assert(angle <= 87);
66*09537850SAkhilesh Sanikop   return kDirectionalIntraPredictorDerivative[DivideBy2(angle) - 1];
67*09537850SAkhilesh Sanikop }
68*09537850SAkhilesh Sanikop 
69*09537850SAkhilesh Sanikop // Maps the block_size to an index as follows:
70*09537850SAkhilesh Sanikop //  kBlock8x8 => 0.
71*09537850SAkhilesh Sanikop //  kBlock8x16 => 1.
72*09537850SAkhilesh Sanikop //  kBlock8x32 => 2.
73*09537850SAkhilesh Sanikop //  kBlock16x8 => 3.
74*09537850SAkhilesh Sanikop //  kBlock16x16 => 4.
75*09537850SAkhilesh Sanikop //  kBlock16x32 => 5.
76*09537850SAkhilesh Sanikop //  kBlock32x8 => 6.
77*09537850SAkhilesh Sanikop //  kBlock32x16 => 7.
78*09537850SAkhilesh Sanikop //  kBlock32x32 => 8.
GetWedgeBlockSizeIndex(BlockSize block_size)79*09537850SAkhilesh Sanikop int GetWedgeBlockSizeIndex(BlockSize block_size) {
80*09537850SAkhilesh Sanikop   assert(block_size >= kBlock8x8);
81*09537850SAkhilesh Sanikop   return block_size - kBlock8x8 - static_cast<int>(block_size >= kBlock16x8) -
82*09537850SAkhilesh Sanikop          static_cast<int>(block_size >= kBlock32x8);
83*09537850SAkhilesh Sanikop }
84*09537850SAkhilesh Sanikop 
85*09537850SAkhilesh Sanikop // Maps a dimension of 4, 8, 16 and 32 to indices 0, 1, 2 and 3 respectively.
GetInterIntraMaskLookupIndex(int dimension)86*09537850SAkhilesh Sanikop int GetInterIntraMaskLookupIndex(int dimension) {
87*09537850SAkhilesh Sanikop   assert(dimension == 4 || dimension == 8 || dimension == 16 ||
88*09537850SAkhilesh Sanikop          dimension == 32);
89*09537850SAkhilesh Sanikop   return FloorLog2(dimension) - 2;
90*09537850SAkhilesh Sanikop }
91*09537850SAkhilesh Sanikop 
92*09537850SAkhilesh Sanikop // 7.11.2.9.
GetIntraEdgeFilterStrength(int width,int height,int filter_type,int delta)93*09537850SAkhilesh Sanikop int GetIntraEdgeFilterStrength(int width, int height, int filter_type,
94*09537850SAkhilesh Sanikop                                int delta) {
95*09537850SAkhilesh Sanikop   const int sum = width + height;
96*09537850SAkhilesh Sanikop   delta = std::abs(delta);
97*09537850SAkhilesh Sanikop   if (filter_type == 0) {
98*09537850SAkhilesh Sanikop     if (sum <= 8) {
99*09537850SAkhilesh Sanikop       if (delta >= 56) return 1;
100*09537850SAkhilesh Sanikop     } else if (sum <= 16) {
101*09537850SAkhilesh Sanikop       if (delta >= 40) return 1;
102*09537850SAkhilesh Sanikop     } else if (sum <= 24) {
103*09537850SAkhilesh Sanikop       if (delta >= 32) return 3;
104*09537850SAkhilesh Sanikop       if (delta >= 16) return 2;
105*09537850SAkhilesh Sanikop       if (delta >= 8) return 1;
106*09537850SAkhilesh Sanikop     } else if (sum <= 32) {
107*09537850SAkhilesh Sanikop       if (delta >= 32) return 3;
108*09537850SAkhilesh Sanikop       if (delta >= 4) return 2;
109*09537850SAkhilesh Sanikop       return 1;
110*09537850SAkhilesh Sanikop     } else {
111*09537850SAkhilesh Sanikop       return 3;
112*09537850SAkhilesh Sanikop     }
113*09537850SAkhilesh Sanikop   } else {
114*09537850SAkhilesh Sanikop     if (sum <= 8) {
115*09537850SAkhilesh Sanikop       if (delta >= 64) return 2;
116*09537850SAkhilesh Sanikop       if (delta >= 40) return 1;
117*09537850SAkhilesh Sanikop     } else if (sum <= 16) {
118*09537850SAkhilesh Sanikop       if (delta >= 48) return 2;
119*09537850SAkhilesh Sanikop       if (delta >= 20) return 1;
120*09537850SAkhilesh Sanikop     } else if (sum <= 24) {
121*09537850SAkhilesh Sanikop       if (delta >= 4) return 3;
122*09537850SAkhilesh Sanikop     } else {
123*09537850SAkhilesh Sanikop       return 3;
124*09537850SAkhilesh Sanikop     }
125*09537850SAkhilesh Sanikop   }
126*09537850SAkhilesh Sanikop   return 0;
127*09537850SAkhilesh Sanikop }
128*09537850SAkhilesh Sanikop 
129*09537850SAkhilesh Sanikop // 7.11.2.10.
DoIntraEdgeUpsampling(int width,int height,int filter_type,int delta)130*09537850SAkhilesh Sanikop bool DoIntraEdgeUpsampling(int width, int height, int filter_type, int delta) {
131*09537850SAkhilesh Sanikop   const int sum = width + height;
132*09537850SAkhilesh Sanikop   delta = std::abs(delta);
133*09537850SAkhilesh Sanikop   // This function should not be called when the prediction angle is 90 or 180.
134*09537850SAkhilesh Sanikop   assert(delta != 0);
135*09537850SAkhilesh Sanikop   if (delta >= 40) return false;
136*09537850SAkhilesh Sanikop   return (filter_type == 1) ? sum <= 8 : sum <= 16;
137*09537850SAkhilesh Sanikop }
138*09537850SAkhilesh Sanikop 
139*09537850SAkhilesh Sanikop constexpr uint8_t kQuantizedDistanceWeight[4][2] = {
140*09537850SAkhilesh Sanikop     {2, 3}, {2, 5}, {2, 7}, {1, kMaxFrameDistance}};
141*09537850SAkhilesh Sanikop 
142*09537850SAkhilesh Sanikop constexpr uint8_t kQuantizedDistanceLookup[4][2] = {
143*09537850SAkhilesh Sanikop     {9, 7}, {11, 5}, {12, 4}, {13, 3}};
144*09537850SAkhilesh Sanikop 
GetDistanceWeights(const int distance[2],int weight[2])145*09537850SAkhilesh Sanikop void GetDistanceWeights(const int distance[2], int weight[2]) {
146*09537850SAkhilesh Sanikop   // Note: distance[0] and distance[1] correspond to relative distance
147*09537850SAkhilesh Sanikop   // between current frame and reference frame [1] and [0], respectively.
148*09537850SAkhilesh Sanikop   const int order = static_cast<int>(distance[0] <= distance[1]);
149*09537850SAkhilesh Sanikop   if (distance[0] == 0 || distance[1] == 0) {
150*09537850SAkhilesh Sanikop     weight[0] = kQuantizedDistanceLookup[3][order];
151*09537850SAkhilesh Sanikop     weight[1] = kQuantizedDistanceLookup[3][1 - order];
152*09537850SAkhilesh Sanikop   } else {
153*09537850SAkhilesh Sanikop     int i;
154*09537850SAkhilesh Sanikop     for (i = 0; i < 3; ++i) {
155*09537850SAkhilesh Sanikop       const int weight_0 = kQuantizedDistanceWeight[i][order];
156*09537850SAkhilesh Sanikop       const int weight_1 = kQuantizedDistanceWeight[i][1 - order];
157*09537850SAkhilesh Sanikop       if (order == 0) {
158*09537850SAkhilesh Sanikop         if (distance[0] * weight_0 < distance[1] * weight_1) break;
159*09537850SAkhilesh Sanikop       } else {
160*09537850SAkhilesh Sanikop         if (distance[0] * weight_0 > distance[1] * weight_1) break;
161*09537850SAkhilesh Sanikop       }
162*09537850SAkhilesh Sanikop     }
163*09537850SAkhilesh Sanikop     weight[0] = kQuantizedDistanceLookup[i][order];
164*09537850SAkhilesh Sanikop     weight[1] = kQuantizedDistanceLookup[i][1 - order];
165*09537850SAkhilesh Sanikop   }
166*09537850SAkhilesh Sanikop }
167*09537850SAkhilesh Sanikop 
GetIntraPredictor(PredictionMode mode,bool has_left,bool has_top)168*09537850SAkhilesh Sanikop dsp::IntraPredictor GetIntraPredictor(PredictionMode mode, bool has_left,
169*09537850SAkhilesh Sanikop                                       bool has_top) {
170*09537850SAkhilesh Sanikop   if (mode == kPredictionModeDc) {
171*09537850SAkhilesh Sanikop     if (has_left && has_top) {
172*09537850SAkhilesh Sanikop       return dsp::kIntraPredictorDc;
173*09537850SAkhilesh Sanikop     }
174*09537850SAkhilesh Sanikop     if (has_left) {
175*09537850SAkhilesh Sanikop       return dsp::kIntraPredictorDcLeft;
176*09537850SAkhilesh Sanikop     }
177*09537850SAkhilesh Sanikop     if (has_top) {
178*09537850SAkhilesh Sanikop       return dsp::kIntraPredictorDcTop;
179*09537850SAkhilesh Sanikop     }
180*09537850SAkhilesh Sanikop     return dsp::kIntraPredictorDcFill;
181*09537850SAkhilesh Sanikop   }
182*09537850SAkhilesh Sanikop   switch (mode) {
183*09537850SAkhilesh Sanikop     case kPredictionModePaeth:
184*09537850SAkhilesh Sanikop       return dsp::kIntraPredictorPaeth;
185*09537850SAkhilesh Sanikop     case kPredictionModeSmooth:
186*09537850SAkhilesh Sanikop       return dsp::kIntraPredictorSmooth;
187*09537850SAkhilesh Sanikop     case kPredictionModeSmoothVertical:
188*09537850SAkhilesh Sanikop       return dsp::kIntraPredictorSmoothVertical;
189*09537850SAkhilesh Sanikop     case kPredictionModeSmoothHorizontal:
190*09537850SAkhilesh Sanikop       return dsp::kIntraPredictorSmoothHorizontal;
191*09537850SAkhilesh Sanikop     default:
192*09537850SAkhilesh Sanikop       return dsp::kNumIntraPredictors;
193*09537850SAkhilesh Sanikop   }
194*09537850SAkhilesh Sanikop }
195*09537850SAkhilesh Sanikop 
GetStartPoint(Array2DView<uint8_t> * const buffer,const int plane,const int x,const int y,const int bitdepth)196*09537850SAkhilesh Sanikop uint8_t* GetStartPoint(Array2DView<uint8_t>* const buffer, const int plane,
197*09537850SAkhilesh Sanikop                        const int x, const int y, const int bitdepth) {
198*09537850SAkhilesh Sanikop #if LIBGAV1_MAX_BITDEPTH >= 10
199*09537850SAkhilesh Sanikop   if (bitdepth > 8) {
200*09537850SAkhilesh Sanikop     Array2DView<uint16_t> buffer16(
201*09537850SAkhilesh Sanikop         buffer[plane].rows(), buffer[plane].columns() / sizeof(uint16_t),
202*09537850SAkhilesh Sanikop         reinterpret_cast<uint16_t*>(&buffer[plane][0][0]));
203*09537850SAkhilesh Sanikop     return reinterpret_cast<uint8_t*>(&buffer16[y][x]);
204*09537850SAkhilesh Sanikop   }
205*09537850SAkhilesh Sanikop #endif  // LIBGAV1_MAX_BITDEPTH >= 10
206*09537850SAkhilesh Sanikop   static_cast<void>(bitdepth);
207*09537850SAkhilesh Sanikop   return &buffer[plane][y][x];
208*09537850SAkhilesh Sanikop }
209*09537850SAkhilesh Sanikop 
GetPixelPositionFromHighScale(int start,int step,int offset)210*09537850SAkhilesh Sanikop int GetPixelPositionFromHighScale(int start, int step, int offset) {
211*09537850SAkhilesh Sanikop   return (start + step * offset) >> kScaleSubPixelBits;
212*09537850SAkhilesh Sanikop }
213*09537850SAkhilesh Sanikop 
GetMaskBlendFunc(const dsp::Dsp & dsp,bool is_inter_intra,bool is_wedge_inter_intra,int subsampling_x,int subsampling_y)214*09537850SAkhilesh Sanikop dsp::MaskBlendFunc GetMaskBlendFunc(const dsp::Dsp& dsp, bool is_inter_intra,
215*09537850SAkhilesh Sanikop                                     bool is_wedge_inter_intra,
216*09537850SAkhilesh Sanikop                                     int subsampling_x, int subsampling_y) {
217*09537850SAkhilesh Sanikop   return (is_inter_intra && !is_wedge_inter_intra)
218*09537850SAkhilesh Sanikop              ? dsp.mask_blend[0][/*is_inter_intra=*/true]
219*09537850SAkhilesh Sanikop              : dsp.mask_blend[subsampling_x + subsampling_y][is_inter_intra];
220*09537850SAkhilesh Sanikop }
221*09537850SAkhilesh Sanikop 
222*09537850SAkhilesh Sanikop }  // namespace
223*09537850SAkhilesh Sanikop 
224*09537850SAkhilesh Sanikop template <typename Pixel>
IntraPrediction(const Block & block,Plane plane,int x,int y,bool has_left,bool has_top,bool has_top_right,bool has_bottom_left,PredictionMode mode,TransformSize tx_size)225*09537850SAkhilesh Sanikop void Tile::IntraPrediction(const Block& block, Plane plane, int x, int y,
226*09537850SAkhilesh Sanikop                            bool has_left, bool has_top, bool has_top_right,
227*09537850SAkhilesh Sanikop                            bool has_bottom_left, PredictionMode mode,
228*09537850SAkhilesh Sanikop                            TransformSize tx_size) {
229*09537850SAkhilesh Sanikop   const int width = kTransformWidth[tx_size];
230*09537850SAkhilesh Sanikop   const int height = kTransformHeight[tx_size];
231*09537850SAkhilesh Sanikop   const int x_shift = subsampling_x_[plane];
232*09537850SAkhilesh Sanikop   const int y_shift = subsampling_y_[plane];
233*09537850SAkhilesh Sanikop   const int max_x = (MultiplyBy4(frame_header_.columns4x4) >> x_shift) - 1;
234*09537850SAkhilesh Sanikop   const int max_y = (MultiplyBy4(frame_header_.rows4x4) >> y_shift) - 1;
235*09537850SAkhilesh Sanikop   // For performance reasons, do not initialize the following two buffers.
236*09537850SAkhilesh Sanikop   alignas(kMaxAlignment) Pixel top_row_data[160];
237*09537850SAkhilesh Sanikop   alignas(kMaxAlignment) Pixel left_column_data[160];
238*09537850SAkhilesh Sanikop #if LIBGAV1_MSAN
239*09537850SAkhilesh Sanikop   if (IsDirectionalMode(mode)) {
240*09537850SAkhilesh Sanikop     memset(top_row_data, 0, sizeof(top_row_data));
241*09537850SAkhilesh Sanikop     memset(left_column_data, 0, sizeof(left_column_data));
242*09537850SAkhilesh Sanikop   }
243*09537850SAkhilesh Sanikop #endif
244*09537850SAkhilesh Sanikop   // Some predictors use |top_row_data| and |left_column_data| with a negative
245*09537850SAkhilesh Sanikop   // offset to access pixels to the top-left of the current block. So have some
246*09537850SAkhilesh Sanikop   // space before the arrays to allow populating those without having to move
247*09537850SAkhilesh Sanikop   // the rest of the array.
248*09537850SAkhilesh Sanikop   Pixel* const top_row = top_row_data + 16;
249*09537850SAkhilesh Sanikop   Pixel* const left_column = left_column_data + 16;
250*09537850SAkhilesh Sanikop   const int bitdepth = sequence_header_.color_config.bitdepth;
251*09537850SAkhilesh Sanikop   const int top_and_left_size = width + height;
252*09537850SAkhilesh Sanikop   const bool is_directional_mode = IsDirectionalMode(mode);
253*09537850SAkhilesh Sanikop   const PredictionParameters& prediction_parameters =
254*09537850SAkhilesh Sanikop       *block.bp->prediction_parameters;
255*09537850SAkhilesh Sanikop   const bool use_filter_intra =
256*09537850SAkhilesh Sanikop       (plane == kPlaneY && prediction_parameters.use_filter_intra);
257*09537850SAkhilesh Sanikop   const int prediction_angle =
258*09537850SAkhilesh Sanikop       is_directional_mode
259*09537850SAkhilesh Sanikop           ? kPredictionModeToAngle[mode] +
260*09537850SAkhilesh Sanikop                 prediction_parameters.angle_delta[GetPlaneType(plane)] *
261*09537850SAkhilesh Sanikop                     kAngleStep
262*09537850SAkhilesh Sanikop           : 0;
263*09537850SAkhilesh Sanikop   // Directional prediction requires buffers larger than the width or height.
264*09537850SAkhilesh Sanikop   const int top_size = is_directional_mode ? top_and_left_size : width;
265*09537850SAkhilesh Sanikop   const int left_size = is_directional_mode ? top_and_left_size : height;
266*09537850SAkhilesh Sanikop   const int top_right_size =
267*09537850SAkhilesh Sanikop       is_directional_mode ? (has_top_right ? 2 : 1) * width : width;
268*09537850SAkhilesh Sanikop   const int bottom_left_size =
269*09537850SAkhilesh Sanikop       is_directional_mode ? (has_bottom_left ? 2 : 1) * height : height;
270*09537850SAkhilesh Sanikop 
271*09537850SAkhilesh Sanikop   Array2DView<Pixel> buffer(buffer_[plane].rows(),
272*09537850SAkhilesh Sanikop                             buffer_[plane].columns() / sizeof(Pixel),
273*09537850SAkhilesh Sanikop                             reinterpret_cast<Pixel*>(&buffer_[plane][0][0]));
274*09537850SAkhilesh Sanikop   const bool needs_top = use_filter_intra || kNeedsLeftAndTop.Contains(mode) ||
275*09537850SAkhilesh Sanikop                          (is_directional_mode && prediction_angle < 180) ||
276*09537850SAkhilesh Sanikop                          (mode == kPredictionModeDc && has_top);
277*09537850SAkhilesh Sanikop   const bool needs_left = use_filter_intra || kNeedsLeftAndTop.Contains(mode) ||
278*09537850SAkhilesh Sanikop                           (is_directional_mode && prediction_angle > 90) ||
279*09537850SAkhilesh Sanikop                           (mode == kPredictionModeDc && has_left);
280*09537850SAkhilesh Sanikop 
281*09537850SAkhilesh Sanikop   const Pixel* top_row_src = buffer[y - 1];
282*09537850SAkhilesh Sanikop 
283*09537850SAkhilesh Sanikop   // Determine if we need to retrieve the top row from
284*09537850SAkhilesh Sanikop   // |intra_prediction_buffer_|.
285*09537850SAkhilesh Sanikop   if ((needs_top || needs_left) && use_intra_prediction_buffer_) {
286*09537850SAkhilesh Sanikop     // Superblock index of block.row4x4. block.row4x4 is always in luma
287*09537850SAkhilesh Sanikop     // dimension (no subsampling).
288*09537850SAkhilesh Sanikop     const int current_superblock_index =
289*09537850SAkhilesh Sanikop         block.row4x4 >> (sequence_header_.use_128x128_superblock ? 5 : 4);
290*09537850SAkhilesh Sanikop     // Superblock index of y - 1. y is in the plane dimension (chroma planes
291*09537850SAkhilesh Sanikop     // could be subsampled).
292*09537850SAkhilesh Sanikop     const int plane_shift = (sequence_header_.use_128x128_superblock ? 7 : 6) -
293*09537850SAkhilesh Sanikop                             subsampling_y_[plane];
294*09537850SAkhilesh Sanikop     const int top_row_superblock_index = (y - 1) >> plane_shift;
295*09537850SAkhilesh Sanikop     // If the superblock index of y - 1 is not that of the current superblock,
296*09537850SAkhilesh Sanikop     // then we will have to retrieve the top row from the
297*09537850SAkhilesh Sanikop     // |intra_prediction_buffer_|.
298*09537850SAkhilesh Sanikop     if (current_superblock_index != top_row_superblock_index) {
299*09537850SAkhilesh Sanikop       top_row_src = reinterpret_cast<const Pixel*>(
300*09537850SAkhilesh Sanikop           (*intra_prediction_buffer_)[plane].get());
301*09537850SAkhilesh Sanikop     }
302*09537850SAkhilesh Sanikop   }
303*09537850SAkhilesh Sanikop 
304*09537850SAkhilesh Sanikop   if (needs_top) {
305*09537850SAkhilesh Sanikop     // Compute top_row.
306*09537850SAkhilesh Sanikop     if (has_top || has_left) {
307*09537850SAkhilesh Sanikop       const int left_index = has_left ? x - 1 : x;
308*09537850SAkhilesh Sanikop       top_row[-1] = has_top ? top_row_src[left_index] : buffer[y][left_index];
309*09537850SAkhilesh Sanikop     } else {
310*09537850SAkhilesh Sanikop       top_row[-1] = 1 << (bitdepth - 1);
311*09537850SAkhilesh Sanikop     }
312*09537850SAkhilesh Sanikop     if (!has_top && has_left) {
313*09537850SAkhilesh Sanikop       Memset(top_row, buffer[y][x - 1], top_size);
314*09537850SAkhilesh Sanikop     } else if (!has_top && !has_left) {
315*09537850SAkhilesh Sanikop       Memset(top_row, (1 << (bitdepth - 1)) - 1, top_size);
316*09537850SAkhilesh Sanikop     } else {
317*09537850SAkhilesh Sanikop       const int top_limit = std::min(max_x - x + 1, top_right_size);
318*09537850SAkhilesh Sanikop       memcpy(top_row, &top_row_src[x], top_limit * sizeof(Pixel));
319*09537850SAkhilesh Sanikop       // Even though it is safe to call Memset with a size of 0, accessing
320*09537850SAkhilesh Sanikop       // top_row_src[top_limit - x + 1] is not allowed when this condition is
321*09537850SAkhilesh Sanikop       // false.
322*09537850SAkhilesh Sanikop       if (top_size - top_limit > 0) {
323*09537850SAkhilesh Sanikop         Memset(top_row + top_limit, top_row_src[top_limit + x - 1],
324*09537850SAkhilesh Sanikop                top_size - top_limit);
325*09537850SAkhilesh Sanikop       }
326*09537850SAkhilesh Sanikop     }
327*09537850SAkhilesh Sanikop   }
328*09537850SAkhilesh Sanikop   if (needs_left) {
329*09537850SAkhilesh Sanikop     // Compute left_column.
330*09537850SAkhilesh Sanikop     if (has_top || has_left) {
331*09537850SAkhilesh Sanikop       const int left_index = has_left ? x - 1 : x;
332*09537850SAkhilesh Sanikop       left_column[-1] =
333*09537850SAkhilesh Sanikop           has_top ? top_row_src[left_index] : buffer[y][left_index];
334*09537850SAkhilesh Sanikop     } else {
335*09537850SAkhilesh Sanikop       left_column[-1] = 1 << (bitdepth - 1);
336*09537850SAkhilesh Sanikop     }
337*09537850SAkhilesh Sanikop     if (!has_left && has_top) {
338*09537850SAkhilesh Sanikop       Memset(left_column, top_row_src[x], left_size);
339*09537850SAkhilesh Sanikop     } else if (!has_left && !has_top) {
340*09537850SAkhilesh Sanikop       Memset(left_column, (1 << (bitdepth - 1)) + 1, left_size);
341*09537850SAkhilesh Sanikop     } else {
342*09537850SAkhilesh Sanikop       const int left_limit = std::min(max_y - y + 1, bottom_left_size);
343*09537850SAkhilesh Sanikop       for (int i = 0; i < left_limit; ++i) {
344*09537850SAkhilesh Sanikop         left_column[i] = buffer[y + i][x - 1];
345*09537850SAkhilesh Sanikop       }
346*09537850SAkhilesh Sanikop       // Even though it is safe to call Memset with a size of 0, accessing
347*09537850SAkhilesh Sanikop       // buffer[left_limit - y + 1][x - 1] is not allowed when this condition is
348*09537850SAkhilesh Sanikop       // false.
349*09537850SAkhilesh Sanikop       if (left_size - left_limit > 0) {
350*09537850SAkhilesh Sanikop         Memset(left_column + left_limit, buffer[left_limit + y - 1][x - 1],
351*09537850SAkhilesh Sanikop                left_size - left_limit);
352*09537850SAkhilesh Sanikop       }
353*09537850SAkhilesh Sanikop     }
354*09537850SAkhilesh Sanikop   }
355*09537850SAkhilesh Sanikop   Pixel* const dest = &buffer[y][x];
356*09537850SAkhilesh Sanikop   const ptrdiff_t dest_stride = buffer_[plane].columns();
357*09537850SAkhilesh Sanikop   if (use_filter_intra) {
358*09537850SAkhilesh Sanikop     dsp_.filter_intra_predictor(dest, dest_stride, top_row, left_column,
359*09537850SAkhilesh Sanikop                                 prediction_parameters.filter_intra_mode, width,
360*09537850SAkhilesh Sanikop                                 height);
361*09537850SAkhilesh Sanikop   } else if (is_directional_mode) {
362*09537850SAkhilesh Sanikop     DirectionalPrediction(block, plane, x, y, has_left, has_top, needs_left,
363*09537850SAkhilesh Sanikop                           needs_top, prediction_angle, width, height, max_x,
364*09537850SAkhilesh Sanikop                           max_y, tx_size, top_row, left_column);
365*09537850SAkhilesh Sanikop   } else {
366*09537850SAkhilesh Sanikop     const dsp::IntraPredictor predictor =
367*09537850SAkhilesh Sanikop         GetIntraPredictor(mode, has_left, has_top);
368*09537850SAkhilesh Sanikop     assert(predictor != dsp::kNumIntraPredictors);
369*09537850SAkhilesh Sanikop     dsp_.intra_predictors[tx_size][predictor](dest, dest_stride, top_row,
370*09537850SAkhilesh Sanikop                                               left_column);
371*09537850SAkhilesh Sanikop   }
372*09537850SAkhilesh Sanikop }
373*09537850SAkhilesh Sanikop 
374*09537850SAkhilesh Sanikop template void Tile::IntraPrediction<uint8_t>(const Block& block, Plane plane,
375*09537850SAkhilesh Sanikop                                              int x, int y, bool has_left,
376*09537850SAkhilesh Sanikop                                              bool has_top, bool has_top_right,
377*09537850SAkhilesh Sanikop                                              bool has_bottom_left,
378*09537850SAkhilesh Sanikop                                              PredictionMode mode,
379*09537850SAkhilesh Sanikop                                              TransformSize tx_size);
380*09537850SAkhilesh Sanikop #if LIBGAV1_MAX_BITDEPTH >= 10
381*09537850SAkhilesh Sanikop template void Tile::IntraPrediction<uint16_t>(const Block& block, Plane plane,
382*09537850SAkhilesh Sanikop                                               int x, int y, bool has_left,
383*09537850SAkhilesh Sanikop                                               bool has_top, bool has_top_right,
384*09537850SAkhilesh Sanikop                                               bool has_bottom_left,
385*09537850SAkhilesh Sanikop                                               PredictionMode mode,
386*09537850SAkhilesh Sanikop                                               TransformSize tx_size);
387*09537850SAkhilesh Sanikop #endif
388*09537850SAkhilesh Sanikop 
GetIntraEdgeFilterType(const Block & block,Plane plane) const389*09537850SAkhilesh Sanikop int Tile::GetIntraEdgeFilterType(const Block& block, Plane plane) const {
390*09537850SAkhilesh Sanikop   bool top;
391*09537850SAkhilesh Sanikop   bool left;
392*09537850SAkhilesh Sanikop   if (plane == kPlaneY) {
393*09537850SAkhilesh Sanikop     top = block.top_available[kPlaneY] &&
394*09537850SAkhilesh Sanikop           kPredictionModeSmoothMask.Contains(block.bp_top->y_mode);
395*09537850SAkhilesh Sanikop     left = block.left_available[kPlaneY] &&
396*09537850SAkhilesh Sanikop            kPredictionModeSmoothMask.Contains(block.bp_left->y_mode);
397*09537850SAkhilesh Sanikop   } else {
398*09537850SAkhilesh Sanikop     top = block.top_available[plane] &&
399*09537850SAkhilesh Sanikop           block.bp->prediction_parameters->chroma_top_uses_smooth_prediction;
400*09537850SAkhilesh Sanikop     left = block.left_available[plane] &&
401*09537850SAkhilesh Sanikop            block.bp->prediction_parameters->chroma_left_uses_smooth_prediction;
402*09537850SAkhilesh Sanikop   }
403*09537850SAkhilesh Sanikop   return static_cast<int>(top || left);
404*09537850SAkhilesh Sanikop }
405*09537850SAkhilesh Sanikop 
406*09537850SAkhilesh Sanikop template <typename Pixel>
DirectionalPrediction(const Block & block,Plane plane,int x,int y,bool has_left,bool has_top,bool needs_left,bool needs_top,int prediction_angle,int width,int height,int max_x,int max_y,TransformSize tx_size,Pixel * const top_row,Pixel * const left_column)407*09537850SAkhilesh Sanikop void Tile::DirectionalPrediction(const Block& block, Plane plane, int x, int y,
408*09537850SAkhilesh Sanikop                                  bool has_left, bool has_top, bool needs_left,
409*09537850SAkhilesh Sanikop                                  bool needs_top, int prediction_angle,
410*09537850SAkhilesh Sanikop                                  int width, int height, int max_x, int max_y,
411*09537850SAkhilesh Sanikop                                  TransformSize tx_size, Pixel* const top_row,
412*09537850SAkhilesh Sanikop                                  Pixel* const left_column) {
413*09537850SAkhilesh Sanikop   Array2DView<Pixel> buffer(buffer_[plane].rows(),
414*09537850SAkhilesh Sanikop                             buffer_[plane].columns() / sizeof(Pixel),
415*09537850SAkhilesh Sanikop                             reinterpret_cast<Pixel*>(&buffer_[plane][0][0]));
416*09537850SAkhilesh Sanikop   Pixel* const dest = &buffer[y][x];
417*09537850SAkhilesh Sanikop   const ptrdiff_t stride = buffer_[plane].columns();
418*09537850SAkhilesh Sanikop   if (prediction_angle == 90) {
419*09537850SAkhilesh Sanikop     dsp_.intra_predictors[tx_size][dsp::kIntraPredictorVertical](
420*09537850SAkhilesh Sanikop         dest, stride, top_row, left_column);
421*09537850SAkhilesh Sanikop     return;
422*09537850SAkhilesh Sanikop   }
423*09537850SAkhilesh Sanikop   if (prediction_angle == 180) {
424*09537850SAkhilesh Sanikop     dsp_.intra_predictors[tx_size][dsp::kIntraPredictorHorizontal](
425*09537850SAkhilesh Sanikop         dest, stride, top_row, left_column);
426*09537850SAkhilesh Sanikop     return;
427*09537850SAkhilesh Sanikop   }
428*09537850SAkhilesh Sanikop 
429*09537850SAkhilesh Sanikop   bool upsampled_top = false;
430*09537850SAkhilesh Sanikop   bool upsampled_left = false;
431*09537850SAkhilesh Sanikop   if (sequence_header_.enable_intra_edge_filter) {
432*09537850SAkhilesh Sanikop     const int filter_type = GetIntraEdgeFilterType(block, plane);
433*09537850SAkhilesh Sanikop     if (prediction_angle > 90 && prediction_angle < 180 &&
434*09537850SAkhilesh Sanikop         (width + height) >= 24) {
435*09537850SAkhilesh Sanikop       // 7.11.2.7.
436*09537850SAkhilesh Sanikop       left_column[-1] = top_row[-1] = RightShiftWithRounding(
437*09537850SAkhilesh Sanikop           left_column[0] * 5 + top_row[-1] * 6 + top_row[0] * 5, 4);
438*09537850SAkhilesh Sanikop     }
439*09537850SAkhilesh Sanikop     if (has_top && needs_top) {
440*09537850SAkhilesh Sanikop       const int strength = GetIntraEdgeFilterStrength(
441*09537850SAkhilesh Sanikop           width, height, filter_type, prediction_angle - 90);
442*09537850SAkhilesh Sanikop       if (strength > 0) {
443*09537850SAkhilesh Sanikop         const int num_pixels = std::min(width, max_x - x + 1) +
444*09537850SAkhilesh Sanikop                                ((prediction_angle < 90) ? height : 0) + 1;
445*09537850SAkhilesh Sanikop         dsp_.intra_edge_filter(top_row - 1, num_pixels, strength);
446*09537850SAkhilesh Sanikop       }
447*09537850SAkhilesh Sanikop     }
448*09537850SAkhilesh Sanikop     if (has_left && needs_left) {
449*09537850SAkhilesh Sanikop       const int strength = GetIntraEdgeFilterStrength(
450*09537850SAkhilesh Sanikop           width, height, filter_type, prediction_angle - 180);
451*09537850SAkhilesh Sanikop       if (strength > 0) {
452*09537850SAkhilesh Sanikop         const int num_pixels = std::min(height, max_y - y + 1) +
453*09537850SAkhilesh Sanikop                                ((prediction_angle > 180) ? width : 0) + 1;
454*09537850SAkhilesh Sanikop         dsp_.intra_edge_filter(left_column - 1, num_pixels, strength);
455*09537850SAkhilesh Sanikop       }
456*09537850SAkhilesh Sanikop     }
457*09537850SAkhilesh Sanikop     upsampled_top = DoIntraEdgeUpsampling(width, height, filter_type,
458*09537850SAkhilesh Sanikop                                           prediction_angle - 90);
459*09537850SAkhilesh Sanikop     if (upsampled_top && needs_top) {
460*09537850SAkhilesh Sanikop       const int num_pixels = width + ((prediction_angle < 90) ? height : 0);
461*09537850SAkhilesh Sanikop       dsp_.intra_edge_upsampler(top_row, num_pixels);
462*09537850SAkhilesh Sanikop     }
463*09537850SAkhilesh Sanikop     upsampled_left = DoIntraEdgeUpsampling(width, height, filter_type,
464*09537850SAkhilesh Sanikop                                            prediction_angle - 180);
465*09537850SAkhilesh Sanikop     if (upsampled_left && needs_left) {
466*09537850SAkhilesh Sanikop       const int num_pixels = height + ((prediction_angle > 180) ? width : 0);
467*09537850SAkhilesh Sanikop       dsp_.intra_edge_upsampler(left_column, num_pixels);
468*09537850SAkhilesh Sanikop     }
469*09537850SAkhilesh Sanikop   }
470*09537850SAkhilesh Sanikop 
471*09537850SAkhilesh Sanikop   if (prediction_angle < 90) {
472*09537850SAkhilesh Sanikop     const int dx = GetDirectionalIntraPredictorDerivative(prediction_angle);
473*09537850SAkhilesh Sanikop     dsp_.directional_intra_predictor_zone1(dest, stride, top_row, width, height,
474*09537850SAkhilesh Sanikop                                            dx, upsampled_top);
475*09537850SAkhilesh Sanikop   } else if (prediction_angle < 180) {
476*09537850SAkhilesh Sanikop     const int dx =
477*09537850SAkhilesh Sanikop         GetDirectionalIntraPredictorDerivative(180 - prediction_angle);
478*09537850SAkhilesh Sanikop     const int dy =
479*09537850SAkhilesh Sanikop         GetDirectionalIntraPredictorDerivative(prediction_angle - 90);
480*09537850SAkhilesh Sanikop     dsp_.directional_intra_predictor_zone2(dest, stride, top_row, left_column,
481*09537850SAkhilesh Sanikop                                            width, height, dx, dy, upsampled_top,
482*09537850SAkhilesh Sanikop                                            upsampled_left);
483*09537850SAkhilesh Sanikop   } else {
484*09537850SAkhilesh Sanikop     assert(prediction_angle < 270);
485*09537850SAkhilesh Sanikop     const int dy =
486*09537850SAkhilesh Sanikop         GetDirectionalIntraPredictorDerivative(270 - prediction_angle);
487*09537850SAkhilesh Sanikop     dsp_.directional_intra_predictor_zone3(dest, stride, left_column, width,
488*09537850SAkhilesh Sanikop                                            height, dy, upsampled_left);
489*09537850SAkhilesh Sanikop   }
490*09537850SAkhilesh Sanikop }
491*09537850SAkhilesh Sanikop 
492*09537850SAkhilesh Sanikop template <typename Pixel>
PalettePrediction(const Block & block,const Plane plane,const int start_x,const int start_y,const int x,const int y,const TransformSize tx_size)493*09537850SAkhilesh Sanikop void Tile::PalettePrediction(const Block& block, const Plane plane,
494*09537850SAkhilesh Sanikop                              const int start_x, const int start_y, const int x,
495*09537850SAkhilesh Sanikop                              const int y, const TransformSize tx_size) {
496*09537850SAkhilesh Sanikop   const int tx_width = kTransformWidth[tx_size];
497*09537850SAkhilesh Sanikop   const int tx_height = kTransformHeight[tx_size];
498*09537850SAkhilesh Sanikop   const uint16_t* const palette =
499*09537850SAkhilesh Sanikop       block.bp->prediction_parameters->palette_mode_info.color[plane];
500*09537850SAkhilesh Sanikop   const PlaneType plane_type = GetPlaneType(plane);
501*09537850SAkhilesh Sanikop   const int x4 = MultiplyBy4(x);
502*09537850SAkhilesh Sanikop   const int y4 = MultiplyBy4(y);
503*09537850SAkhilesh Sanikop   Array2DView<Pixel> buffer(buffer_[plane].rows(),
504*09537850SAkhilesh Sanikop                             buffer_[plane].columns() / sizeof(Pixel),
505*09537850SAkhilesh Sanikop                             reinterpret_cast<Pixel*>(&buffer_[plane][0][0]));
506*09537850SAkhilesh Sanikop   for (int row = 0; row < tx_height; ++row) {
507*09537850SAkhilesh Sanikop     assert(block.bp->prediction_parameters
508*09537850SAkhilesh Sanikop                ->color_index_map[plane_type][y4 + row] != nullptr);
509*09537850SAkhilesh Sanikop     for (int column = 0; column < tx_width; ++column) {
510*09537850SAkhilesh Sanikop       buffer[start_y + row][start_x + column] =
511*09537850SAkhilesh Sanikop           palette[block.bp->prediction_parameters
512*09537850SAkhilesh Sanikop                       ->color_index_map[plane_type][y4 + row][x4 + column]];
513*09537850SAkhilesh Sanikop     }
514*09537850SAkhilesh Sanikop   }
515*09537850SAkhilesh Sanikop }
516*09537850SAkhilesh Sanikop 
517*09537850SAkhilesh Sanikop template void Tile::PalettePrediction<uint8_t>(
518*09537850SAkhilesh Sanikop     const Block& block, const Plane plane, const int start_x, const int start_y,
519*09537850SAkhilesh Sanikop     const int x, const int y, const TransformSize tx_size);
520*09537850SAkhilesh Sanikop #if LIBGAV1_MAX_BITDEPTH >= 10
521*09537850SAkhilesh Sanikop template void Tile::PalettePrediction<uint16_t>(
522*09537850SAkhilesh Sanikop     const Block& block, const Plane plane, const int start_x, const int start_y,
523*09537850SAkhilesh Sanikop     const int x, const int y, const TransformSize tx_size);
524*09537850SAkhilesh Sanikop #endif
525*09537850SAkhilesh Sanikop 
526*09537850SAkhilesh Sanikop template <typename Pixel>
ChromaFromLumaPrediction(const Block & block,const Plane plane,const int start_x,const int start_y,const TransformSize tx_size)527*09537850SAkhilesh Sanikop void Tile::ChromaFromLumaPrediction(const Block& block, const Plane plane,
528*09537850SAkhilesh Sanikop                                     const int start_x, const int start_y,
529*09537850SAkhilesh Sanikop                                     const TransformSize tx_size) {
530*09537850SAkhilesh Sanikop   const int subsampling_x = subsampling_x_[plane];
531*09537850SAkhilesh Sanikop   const int subsampling_y = subsampling_y_[plane];
532*09537850SAkhilesh Sanikop   const PredictionParameters& prediction_parameters =
533*09537850SAkhilesh Sanikop       *block.bp->prediction_parameters;
534*09537850SAkhilesh Sanikop   Array2DView<Pixel> y_buffer(
535*09537850SAkhilesh Sanikop       buffer_[kPlaneY].rows(), buffer_[kPlaneY].columns() / sizeof(Pixel),
536*09537850SAkhilesh Sanikop       reinterpret_cast<Pixel*>(&buffer_[kPlaneY][0][0]));
537*09537850SAkhilesh Sanikop   if (!block.scratch_buffer->cfl_luma_buffer_valid) {
538*09537850SAkhilesh Sanikop     const int luma_x = start_x << subsampling_x;
539*09537850SAkhilesh Sanikop     const int luma_y = start_y << subsampling_y;
540*09537850SAkhilesh Sanikop     dsp_.cfl_subsamplers[tx_size][subsampling_x + subsampling_y](
541*09537850SAkhilesh Sanikop         block.scratch_buffer->cfl_luma_buffer,
542*09537850SAkhilesh Sanikop         prediction_parameters.max_luma_width - luma_x,
543*09537850SAkhilesh Sanikop         prediction_parameters.max_luma_height - luma_y,
544*09537850SAkhilesh Sanikop         reinterpret_cast<uint8_t*>(&y_buffer[luma_y][luma_x]),
545*09537850SAkhilesh Sanikop         buffer_[kPlaneY].columns());
546*09537850SAkhilesh Sanikop     block.scratch_buffer->cfl_luma_buffer_valid = true;
547*09537850SAkhilesh Sanikop   }
548*09537850SAkhilesh Sanikop   Array2DView<Pixel> buffer(buffer_[plane].rows(),
549*09537850SAkhilesh Sanikop                             buffer_[plane].columns() / sizeof(Pixel),
550*09537850SAkhilesh Sanikop                             reinterpret_cast<Pixel*>(&buffer_[plane][0][0]));
551*09537850SAkhilesh Sanikop   dsp_.cfl_intra_predictors[tx_size](
552*09537850SAkhilesh Sanikop       reinterpret_cast<uint8_t*>(&buffer[start_y][start_x]),
553*09537850SAkhilesh Sanikop       buffer_[plane].columns(), block.scratch_buffer->cfl_luma_buffer,
554*09537850SAkhilesh Sanikop       (plane == kPlaneU) ? prediction_parameters.cfl_alpha_u
555*09537850SAkhilesh Sanikop                          : prediction_parameters.cfl_alpha_v);
556*09537850SAkhilesh Sanikop }
557*09537850SAkhilesh Sanikop 
558*09537850SAkhilesh Sanikop template void Tile::ChromaFromLumaPrediction<uint8_t>(
559*09537850SAkhilesh Sanikop     const Block& block, const Plane plane, const int start_x, const int start_y,
560*09537850SAkhilesh Sanikop     const TransformSize tx_size);
561*09537850SAkhilesh Sanikop #if LIBGAV1_MAX_BITDEPTH >= 10
562*09537850SAkhilesh Sanikop template void Tile::ChromaFromLumaPrediction<uint16_t>(
563*09537850SAkhilesh Sanikop     const Block& block, const Plane plane, const int start_x, const int start_y,
564*09537850SAkhilesh Sanikop     const TransformSize tx_size);
565*09537850SAkhilesh Sanikop #endif
566*09537850SAkhilesh Sanikop 
InterIntraPrediction(uint16_t * const prediction_0,const uint8_t * const prediction_mask,const ptrdiff_t prediction_mask_stride,const PredictionParameters & prediction_parameters,const int prediction_width,const int prediction_height,const int subsampling_x,const int subsampling_y,uint8_t * const dest,const ptrdiff_t dest_stride)567*09537850SAkhilesh Sanikop void Tile::InterIntraPrediction(
568*09537850SAkhilesh Sanikop     uint16_t* const prediction_0, const uint8_t* const prediction_mask,
569*09537850SAkhilesh Sanikop     const ptrdiff_t prediction_mask_stride,
570*09537850SAkhilesh Sanikop     const PredictionParameters& prediction_parameters,
571*09537850SAkhilesh Sanikop     const int prediction_width, const int prediction_height,
572*09537850SAkhilesh Sanikop     const int subsampling_x, const int subsampling_y, uint8_t* const dest,
573*09537850SAkhilesh Sanikop     const ptrdiff_t dest_stride) {
574*09537850SAkhilesh Sanikop   assert(prediction_mask != nullptr);
575*09537850SAkhilesh Sanikop   assert(prediction_parameters.compound_prediction_type ==
576*09537850SAkhilesh Sanikop              kCompoundPredictionTypeIntra ||
577*09537850SAkhilesh Sanikop          prediction_parameters.compound_prediction_type ==
578*09537850SAkhilesh Sanikop              kCompoundPredictionTypeWedge);
579*09537850SAkhilesh Sanikop   // The first buffer of InterIntra is from inter prediction.
580*09537850SAkhilesh Sanikop   // The second buffer is from intra prediction.
581*09537850SAkhilesh Sanikop #if LIBGAV1_MAX_BITDEPTH >= 10
582*09537850SAkhilesh Sanikop   if (sequence_header_.color_config.bitdepth > 8) {
583*09537850SAkhilesh Sanikop     GetMaskBlendFunc(dsp_, /*is_inter_intra=*/true,
584*09537850SAkhilesh Sanikop                      prediction_parameters.is_wedge_inter_intra, subsampling_x,
585*09537850SAkhilesh Sanikop                      subsampling_y)(
586*09537850SAkhilesh Sanikop         prediction_0, reinterpret_cast<uint16_t*>(dest),
587*09537850SAkhilesh Sanikop         dest_stride / sizeof(uint16_t), prediction_mask, prediction_mask_stride,
588*09537850SAkhilesh Sanikop         prediction_width, prediction_height, dest, dest_stride);
589*09537850SAkhilesh Sanikop     return;
590*09537850SAkhilesh Sanikop   }
591*09537850SAkhilesh Sanikop #endif
592*09537850SAkhilesh Sanikop   const int function_index = prediction_parameters.is_wedge_inter_intra
593*09537850SAkhilesh Sanikop                                  ? subsampling_x + subsampling_y
594*09537850SAkhilesh Sanikop                                  : 0;
595*09537850SAkhilesh Sanikop   // |is_inter_intra| prediction values are stored in a Pixel buffer but it is
596*09537850SAkhilesh Sanikop   // currently declared as a uint16_t buffer.
597*09537850SAkhilesh Sanikop   // TODO(johannkoenig): convert the prediction buffer to a uint8_t buffer and
598*09537850SAkhilesh Sanikop   // remove the reinterpret_cast.
599*09537850SAkhilesh Sanikop   dsp_.inter_intra_mask_blend_8bpp[function_index](
600*09537850SAkhilesh Sanikop       reinterpret_cast<uint8_t*>(prediction_0), dest, dest_stride,
601*09537850SAkhilesh Sanikop       prediction_mask, prediction_mask_stride, prediction_width,
602*09537850SAkhilesh Sanikop       prediction_height);
603*09537850SAkhilesh Sanikop }
604*09537850SAkhilesh Sanikop 
CompoundInterPrediction(const Block & block,const uint8_t * const prediction_mask,const ptrdiff_t prediction_mask_stride,const int prediction_width,const int prediction_height,const int subsampling_x,const int subsampling_y,const int candidate_row,const int candidate_column,uint8_t * dest,const ptrdiff_t dest_stride)605*09537850SAkhilesh Sanikop void Tile::CompoundInterPrediction(
606*09537850SAkhilesh Sanikop     const Block& block, const uint8_t* const prediction_mask,
607*09537850SAkhilesh Sanikop     const ptrdiff_t prediction_mask_stride, const int prediction_width,
608*09537850SAkhilesh Sanikop     const int prediction_height, const int subsampling_x,
609*09537850SAkhilesh Sanikop     const int subsampling_y, const int candidate_row,
610*09537850SAkhilesh Sanikop     const int candidate_column, uint8_t* dest, const ptrdiff_t dest_stride) {
611*09537850SAkhilesh Sanikop   const PredictionParameters& prediction_parameters =
612*09537850SAkhilesh Sanikop       *block.bp->prediction_parameters;
613*09537850SAkhilesh Sanikop 
614*09537850SAkhilesh Sanikop   void* prediction[2];
615*09537850SAkhilesh Sanikop #if LIBGAV1_MAX_BITDEPTH >= 10
616*09537850SAkhilesh Sanikop   const int bitdepth = sequence_header_.color_config.bitdepth;
617*09537850SAkhilesh Sanikop   if (bitdepth > 8) {
618*09537850SAkhilesh Sanikop     prediction[0] = block.scratch_buffer->prediction_buffer[0];
619*09537850SAkhilesh Sanikop     prediction[1] = block.scratch_buffer->prediction_buffer[1];
620*09537850SAkhilesh Sanikop   } else {
621*09537850SAkhilesh Sanikop #endif
622*09537850SAkhilesh Sanikop     prediction[0] = block.scratch_buffer->compound_prediction_buffer_8bpp[0];
623*09537850SAkhilesh Sanikop     prediction[1] = block.scratch_buffer->compound_prediction_buffer_8bpp[1];
624*09537850SAkhilesh Sanikop #if LIBGAV1_MAX_BITDEPTH >= 10
625*09537850SAkhilesh Sanikop   }
626*09537850SAkhilesh Sanikop #endif
627*09537850SAkhilesh Sanikop 
628*09537850SAkhilesh Sanikop   switch (prediction_parameters.compound_prediction_type) {
629*09537850SAkhilesh Sanikop     case kCompoundPredictionTypeWedge:
630*09537850SAkhilesh Sanikop     case kCompoundPredictionTypeDiffWeighted:
631*09537850SAkhilesh Sanikop       GetMaskBlendFunc(dsp_, /*is_inter_intra=*/false,
632*09537850SAkhilesh Sanikop                        prediction_parameters.is_wedge_inter_intra,
633*09537850SAkhilesh Sanikop                        subsampling_x, subsampling_y)(
634*09537850SAkhilesh Sanikop           prediction[0], prediction[1],
635*09537850SAkhilesh Sanikop           /*prediction_stride=*/prediction_width, prediction_mask,
636*09537850SAkhilesh Sanikop           prediction_mask_stride, prediction_width, prediction_height, dest,
637*09537850SAkhilesh Sanikop           dest_stride);
638*09537850SAkhilesh Sanikop       break;
639*09537850SAkhilesh Sanikop     case kCompoundPredictionTypeDistance:
640*09537850SAkhilesh Sanikop       DistanceWeightedPrediction(prediction[0], prediction[1], prediction_width,
641*09537850SAkhilesh Sanikop                                  prediction_height, candidate_row,
642*09537850SAkhilesh Sanikop                                  candidate_column, dest, dest_stride);
643*09537850SAkhilesh Sanikop       break;
644*09537850SAkhilesh Sanikop     default:
645*09537850SAkhilesh Sanikop       assert(prediction_parameters.compound_prediction_type ==
646*09537850SAkhilesh Sanikop              kCompoundPredictionTypeAverage);
647*09537850SAkhilesh Sanikop       dsp_.average_blend(prediction[0], prediction[1], prediction_width,
648*09537850SAkhilesh Sanikop                          prediction_height, dest, dest_stride);
649*09537850SAkhilesh Sanikop       break;
650*09537850SAkhilesh Sanikop   }
651*09537850SAkhilesh Sanikop }
652*09537850SAkhilesh Sanikop 
GetWarpParams(const Block & block,const Plane plane,const int prediction_width,const int prediction_height,const PredictionParameters & prediction_parameters,const ReferenceFrameType reference_type,bool * const is_local_valid,GlobalMotion * const global_motion_params,GlobalMotion * const local_warp_params) const653*09537850SAkhilesh Sanikop GlobalMotion* Tile::GetWarpParams(
654*09537850SAkhilesh Sanikop     const Block& block, const Plane plane, const int prediction_width,
655*09537850SAkhilesh Sanikop     const int prediction_height,
656*09537850SAkhilesh Sanikop     const PredictionParameters& prediction_parameters,
657*09537850SAkhilesh Sanikop     const ReferenceFrameType reference_type, bool* const is_local_valid,
658*09537850SAkhilesh Sanikop     GlobalMotion* const global_motion_params,
659*09537850SAkhilesh Sanikop     GlobalMotion* const local_warp_params) const {
660*09537850SAkhilesh Sanikop   if (prediction_width < 8 || prediction_height < 8 ||
661*09537850SAkhilesh Sanikop       frame_header_.force_integer_mv == 1) {
662*09537850SAkhilesh Sanikop     return nullptr;
663*09537850SAkhilesh Sanikop   }
664*09537850SAkhilesh Sanikop   if (plane == kPlaneY) {
665*09537850SAkhilesh Sanikop     *is_local_valid =
666*09537850SAkhilesh Sanikop         prediction_parameters.motion_mode == kMotionModeLocalWarp &&
667*09537850SAkhilesh Sanikop         WarpEstimation(
668*09537850SAkhilesh Sanikop             prediction_parameters.num_warp_samples, DivideBy4(prediction_width),
669*09537850SAkhilesh Sanikop             DivideBy4(prediction_height), block.row4x4, block.column4x4,
670*09537850SAkhilesh Sanikop             block.bp->mv.mv[0], prediction_parameters.warp_estimate_candidates,
671*09537850SAkhilesh Sanikop             local_warp_params) &&
672*09537850SAkhilesh Sanikop         SetupShear(local_warp_params);
673*09537850SAkhilesh Sanikop   }
674*09537850SAkhilesh Sanikop   if (prediction_parameters.motion_mode == kMotionModeLocalWarp &&
675*09537850SAkhilesh Sanikop       *is_local_valid) {
676*09537850SAkhilesh Sanikop     return local_warp_params;
677*09537850SAkhilesh Sanikop   }
678*09537850SAkhilesh Sanikop   if (!IsScaled(reference_type)) {
679*09537850SAkhilesh Sanikop     GlobalMotionTransformationType global_motion_type =
680*09537850SAkhilesh Sanikop         (reference_type != kReferenceFrameIntra)
681*09537850SAkhilesh Sanikop             ? global_motion_params->type
682*09537850SAkhilesh Sanikop             : kNumGlobalMotionTransformationTypes;
683*09537850SAkhilesh Sanikop     const bool is_global_valid =
684*09537850SAkhilesh Sanikop         IsGlobalMvBlock(*block.bp, global_motion_type) &&
685*09537850SAkhilesh Sanikop         SetupShear(global_motion_params);
686*09537850SAkhilesh Sanikop     // Valid global motion type implies reference type can't be intra.
687*09537850SAkhilesh Sanikop     assert(!is_global_valid || reference_type != kReferenceFrameIntra);
688*09537850SAkhilesh Sanikop     if (is_global_valid) return global_motion_params;
689*09537850SAkhilesh Sanikop   }
690*09537850SAkhilesh Sanikop   return nullptr;
691*09537850SAkhilesh Sanikop }
692*09537850SAkhilesh Sanikop 
InterPrediction(const Block & block,const Plane plane,const int x,const int y,const int prediction_width,const int prediction_height,int candidate_row,int candidate_column,bool * const is_local_valid,GlobalMotion * const local_warp_params)693*09537850SAkhilesh Sanikop bool Tile::InterPrediction(const Block& block, const Plane plane, const int x,
694*09537850SAkhilesh Sanikop                            const int y, const int prediction_width,
695*09537850SAkhilesh Sanikop                            const int prediction_height, int candidate_row,
696*09537850SAkhilesh Sanikop                            int candidate_column, bool* const is_local_valid,
697*09537850SAkhilesh Sanikop                            GlobalMotion* const local_warp_params) {
698*09537850SAkhilesh Sanikop   const int bitdepth = sequence_header_.color_config.bitdepth;
699*09537850SAkhilesh Sanikop   const BlockParameters& bp = *block.bp;
700*09537850SAkhilesh Sanikop   const BlockParameters& bp_reference =
701*09537850SAkhilesh Sanikop       *block_parameters_holder_.Find(candidate_row, candidate_column);
702*09537850SAkhilesh Sanikop   const bool is_compound =
703*09537850SAkhilesh Sanikop       bp_reference.reference_frame[1] > kReferenceFrameIntra;
704*09537850SAkhilesh Sanikop   assert(bp.is_inter);
705*09537850SAkhilesh Sanikop   const bool is_inter_intra = bp.reference_frame[1] == kReferenceFrameIntra;
706*09537850SAkhilesh Sanikop 
707*09537850SAkhilesh Sanikop   const PredictionParameters& prediction_parameters =
708*09537850SAkhilesh Sanikop       *block.bp->prediction_parameters;
709*09537850SAkhilesh Sanikop   uint8_t* const dest = GetStartPoint(buffer_, plane, x, y, bitdepth);
710*09537850SAkhilesh Sanikop   const ptrdiff_t dest_stride = buffer_[plane].columns();  // In bytes.
711*09537850SAkhilesh Sanikop   for (int index = 0; index < 1 + static_cast<int>(is_compound); ++index) {
712*09537850SAkhilesh Sanikop     const ReferenceFrameType reference_type =
713*09537850SAkhilesh Sanikop         bp_reference.reference_frame[index];
714*09537850SAkhilesh Sanikop     GlobalMotion global_motion_params =
715*09537850SAkhilesh Sanikop         frame_header_.global_motion[reference_type];
716*09537850SAkhilesh Sanikop     GlobalMotion* warp_params =
717*09537850SAkhilesh Sanikop         GetWarpParams(block, plane, prediction_width, prediction_height,
718*09537850SAkhilesh Sanikop                       prediction_parameters, reference_type, is_local_valid,
719*09537850SAkhilesh Sanikop                       &global_motion_params, local_warp_params);
720*09537850SAkhilesh Sanikop     if (warp_params != nullptr) {
721*09537850SAkhilesh Sanikop       if (!BlockWarpProcess(block, plane, index, x, y, prediction_width,
722*09537850SAkhilesh Sanikop                             prediction_height, warp_params, is_compound,
723*09537850SAkhilesh Sanikop                             is_inter_intra, dest, dest_stride)) {
724*09537850SAkhilesh Sanikop         return false;
725*09537850SAkhilesh Sanikop       }
726*09537850SAkhilesh Sanikop     } else {
727*09537850SAkhilesh Sanikop       const int reference_index =
728*09537850SAkhilesh Sanikop           prediction_parameters.use_intra_block_copy
729*09537850SAkhilesh Sanikop               ? -1
730*09537850SAkhilesh Sanikop               : frame_header_.reference_frame_index[reference_type -
731*09537850SAkhilesh Sanikop                                                     kReferenceFrameLast];
732*09537850SAkhilesh Sanikop       if (!BlockInterPrediction(
733*09537850SAkhilesh Sanikop               block, plane, reference_index, bp_reference.mv.mv[index], x, y,
734*09537850SAkhilesh Sanikop               prediction_width, prediction_height, candidate_row,
735*09537850SAkhilesh Sanikop               candidate_column, block.scratch_buffer->prediction_buffer[index],
736*09537850SAkhilesh Sanikop               is_compound, is_inter_intra, dest, dest_stride)) {
737*09537850SAkhilesh Sanikop         return false;
738*09537850SAkhilesh Sanikop       }
739*09537850SAkhilesh Sanikop     }
740*09537850SAkhilesh Sanikop   }
741*09537850SAkhilesh Sanikop 
742*09537850SAkhilesh Sanikop   const int subsampling_x = subsampling_x_[plane];
743*09537850SAkhilesh Sanikop   const int subsampling_y = subsampling_y_[plane];
744*09537850SAkhilesh Sanikop   ptrdiff_t prediction_mask_stride = 0;
745*09537850SAkhilesh Sanikop   const uint8_t* prediction_mask = nullptr;
746*09537850SAkhilesh Sanikop   if (prediction_parameters.compound_prediction_type ==
747*09537850SAkhilesh Sanikop       kCompoundPredictionTypeWedge) {
748*09537850SAkhilesh Sanikop     const Array2D<uint8_t>& wedge_mask =
749*09537850SAkhilesh Sanikop         wedge_masks_[GetWedgeBlockSizeIndex(block.size)]
750*09537850SAkhilesh Sanikop                     [prediction_parameters.wedge_sign]
751*09537850SAkhilesh Sanikop                     [prediction_parameters.wedge_index];
752*09537850SAkhilesh Sanikop     prediction_mask = wedge_mask[0];
753*09537850SAkhilesh Sanikop     prediction_mask_stride = wedge_mask.columns();
754*09537850SAkhilesh Sanikop   } else if (prediction_parameters.compound_prediction_type ==
755*09537850SAkhilesh Sanikop              kCompoundPredictionTypeIntra) {
756*09537850SAkhilesh Sanikop     // 7.11.3.13. The inter intra masks are precomputed and stored as a set of
757*09537850SAkhilesh Sanikop     // look up tables.
758*09537850SAkhilesh Sanikop     assert(prediction_parameters.inter_intra_mode < kNumInterIntraModes);
759*09537850SAkhilesh Sanikop     prediction_mask =
760*09537850SAkhilesh Sanikop         kInterIntraMasks[prediction_parameters.inter_intra_mode]
761*09537850SAkhilesh Sanikop                         [GetInterIntraMaskLookupIndex(prediction_width)]
762*09537850SAkhilesh Sanikop                         [GetInterIntraMaskLookupIndex(prediction_height)];
763*09537850SAkhilesh Sanikop     prediction_mask_stride = prediction_width;
764*09537850SAkhilesh Sanikop   } else if (prediction_parameters.compound_prediction_type ==
765*09537850SAkhilesh Sanikop              kCompoundPredictionTypeDiffWeighted) {
766*09537850SAkhilesh Sanikop     if (plane == kPlaneY) {
767*09537850SAkhilesh Sanikop       assert(prediction_width >= 8);
768*09537850SAkhilesh Sanikop       assert(prediction_height >= 8);
769*09537850SAkhilesh Sanikop       dsp_.weight_mask[FloorLog2(prediction_width) - 3]
770*09537850SAkhilesh Sanikop                       [FloorLog2(prediction_height) - 3]
771*09537850SAkhilesh Sanikop                       [static_cast<int>(prediction_parameters.mask_is_inverse)](
772*09537850SAkhilesh Sanikop                           block.scratch_buffer->prediction_buffer[0],
773*09537850SAkhilesh Sanikop                           block.scratch_buffer->prediction_buffer[1],
774*09537850SAkhilesh Sanikop                           block.scratch_buffer->weight_mask, block.width);
775*09537850SAkhilesh Sanikop     }
776*09537850SAkhilesh Sanikop     prediction_mask = block.scratch_buffer->weight_mask;
777*09537850SAkhilesh Sanikop     prediction_mask_stride = block.width;
778*09537850SAkhilesh Sanikop   }
779*09537850SAkhilesh Sanikop 
780*09537850SAkhilesh Sanikop   if (is_compound) {
781*09537850SAkhilesh Sanikop     CompoundInterPrediction(block, prediction_mask, prediction_mask_stride,
782*09537850SAkhilesh Sanikop                             prediction_width, prediction_height, subsampling_x,
783*09537850SAkhilesh Sanikop                             subsampling_y, candidate_row, candidate_column,
784*09537850SAkhilesh Sanikop                             dest, dest_stride);
785*09537850SAkhilesh Sanikop   } else if (prediction_parameters.motion_mode == kMotionModeObmc) {
786*09537850SAkhilesh Sanikop     // Obmc mode is allowed only for single reference (!is_compound).
787*09537850SAkhilesh Sanikop     return ObmcPrediction(block, plane, prediction_width, prediction_height);
788*09537850SAkhilesh Sanikop   } else if (is_inter_intra) {
789*09537850SAkhilesh Sanikop     // InterIntra and obmc must be mutually exclusive.
790*09537850SAkhilesh Sanikop     InterIntraPrediction(
791*09537850SAkhilesh Sanikop         block.scratch_buffer->prediction_buffer[0], prediction_mask,
792*09537850SAkhilesh Sanikop         prediction_mask_stride, prediction_parameters, prediction_width,
793*09537850SAkhilesh Sanikop         prediction_height, subsampling_x, subsampling_y, dest, dest_stride);
794*09537850SAkhilesh Sanikop   }
795*09537850SAkhilesh Sanikop   return true;
796*09537850SAkhilesh Sanikop }
797*09537850SAkhilesh Sanikop 
ObmcBlockPrediction(const Block & block,const MotionVector & mv,const Plane plane,const int reference_frame_index,const int width,const int height,const int x,const int y,const int candidate_row,const int candidate_column,const ObmcDirection blending_direction)798*09537850SAkhilesh Sanikop bool Tile::ObmcBlockPrediction(const Block& block, const MotionVector& mv,
799*09537850SAkhilesh Sanikop                                const Plane plane,
800*09537850SAkhilesh Sanikop                                const int reference_frame_index, const int width,
801*09537850SAkhilesh Sanikop                                const int height, const int x, const int y,
802*09537850SAkhilesh Sanikop                                const int candidate_row,
803*09537850SAkhilesh Sanikop                                const int candidate_column,
804*09537850SAkhilesh Sanikop                                const ObmcDirection blending_direction) {
805*09537850SAkhilesh Sanikop   const int bitdepth = sequence_header_.color_config.bitdepth;
806*09537850SAkhilesh Sanikop   // Obmc's prediction needs to be clipped before blending with above/left
807*09537850SAkhilesh Sanikop   // prediction blocks.
808*09537850SAkhilesh Sanikop   // Obmc prediction is used only when is_compound is false. So it is safe to
809*09537850SAkhilesh Sanikop   // use prediction_buffer[1] as a temporary buffer for the Obmc prediction.
810*09537850SAkhilesh Sanikop   static_assert(sizeof(block.scratch_buffer->prediction_buffer[1]) >=
811*09537850SAkhilesh Sanikop                     64 * 64 * sizeof(uint16_t),
812*09537850SAkhilesh Sanikop                 "");
813*09537850SAkhilesh Sanikop   auto* const obmc_buffer =
814*09537850SAkhilesh Sanikop       reinterpret_cast<uint8_t*>(block.scratch_buffer->prediction_buffer[1]);
815*09537850SAkhilesh Sanikop   const ptrdiff_t obmc_buffer_stride =
816*09537850SAkhilesh Sanikop       (bitdepth == 8) ? width : width * sizeof(uint16_t);
817*09537850SAkhilesh Sanikop   if (!BlockInterPrediction(block, plane, reference_frame_index, mv, x, y,
818*09537850SAkhilesh Sanikop                             width, height, candidate_row, candidate_column,
819*09537850SAkhilesh Sanikop                             nullptr, false, false, obmc_buffer,
820*09537850SAkhilesh Sanikop                             obmc_buffer_stride)) {
821*09537850SAkhilesh Sanikop     return false;
822*09537850SAkhilesh Sanikop   }
823*09537850SAkhilesh Sanikop 
824*09537850SAkhilesh Sanikop   uint8_t* const prediction = GetStartPoint(buffer_, plane, x, y, bitdepth);
825*09537850SAkhilesh Sanikop   const ptrdiff_t prediction_stride = buffer_[plane].columns();
826*09537850SAkhilesh Sanikop   dsp_.obmc_blend[blending_direction](prediction, prediction_stride, width,
827*09537850SAkhilesh Sanikop                                       height, obmc_buffer, obmc_buffer_stride);
828*09537850SAkhilesh Sanikop   return true;
829*09537850SAkhilesh Sanikop }
830*09537850SAkhilesh Sanikop 
ObmcPrediction(const Block & block,const Plane plane,const int width,const int height)831*09537850SAkhilesh Sanikop bool Tile::ObmcPrediction(const Block& block, const Plane plane,
832*09537850SAkhilesh Sanikop                           const int width, const int height) {
833*09537850SAkhilesh Sanikop   const int subsampling_x = subsampling_x_[plane];
834*09537850SAkhilesh Sanikop   const int subsampling_y = subsampling_y_[plane];
835*09537850SAkhilesh Sanikop   if (block.top_available[kPlaneY] &&
836*09537850SAkhilesh Sanikop       !IsBlockSmallerThan8x8(block.residual_size[plane])) {
837*09537850SAkhilesh Sanikop     const int num_limit = std::min(uint8_t{4}, k4x4WidthLog2[block.size]);
838*09537850SAkhilesh Sanikop     const int column4x4_max =
839*09537850SAkhilesh Sanikop         std::min(block.column4x4 + block.width4x4, frame_header_.columns4x4);
840*09537850SAkhilesh Sanikop     const int candidate_row = block.row4x4 - 1;
841*09537850SAkhilesh Sanikop     const int block_start_y = MultiplyBy4(block.row4x4) >> subsampling_y;
842*09537850SAkhilesh Sanikop     int column4x4 = block.column4x4;
843*09537850SAkhilesh Sanikop     const int prediction_height = std::min(height >> 1, 32 >> subsampling_y);
844*09537850SAkhilesh Sanikop     for (int i = 0, step; i < num_limit && column4x4 < column4x4_max;
845*09537850SAkhilesh Sanikop          column4x4 += step) {
846*09537850SAkhilesh Sanikop       const int candidate_column = column4x4 | 1;
847*09537850SAkhilesh Sanikop       const BlockParameters& bp_top =
848*09537850SAkhilesh Sanikop           *block_parameters_holder_.Find(candidate_row, candidate_column);
849*09537850SAkhilesh Sanikop       const int candidate_block_size = bp_top.size;
850*09537850SAkhilesh Sanikop       step = Clip3(kNum4x4BlocksWide[candidate_block_size], 2, 16);
851*09537850SAkhilesh Sanikop       if (bp_top.reference_frame[0] > kReferenceFrameIntra) {
852*09537850SAkhilesh Sanikop         i++;
853*09537850SAkhilesh Sanikop         const int candidate_reference_frame_index =
854*09537850SAkhilesh Sanikop             frame_header_.reference_frame_index[bp_top.reference_frame[0] -
855*09537850SAkhilesh Sanikop                                                 kReferenceFrameLast];
856*09537850SAkhilesh Sanikop         const int prediction_width =
857*09537850SAkhilesh Sanikop             std::min(width, MultiplyBy4(step) >> subsampling_x);
858*09537850SAkhilesh Sanikop         if (!ObmcBlockPrediction(
859*09537850SAkhilesh Sanikop                 block, bp_top.mv.mv[0], plane, candidate_reference_frame_index,
860*09537850SAkhilesh Sanikop                 prediction_width, prediction_height,
861*09537850SAkhilesh Sanikop                 MultiplyBy4(column4x4) >> subsampling_x, block_start_y,
862*09537850SAkhilesh Sanikop                 candidate_row, candidate_column, kObmcDirectionVertical)) {
863*09537850SAkhilesh Sanikop           return false;
864*09537850SAkhilesh Sanikop         }
865*09537850SAkhilesh Sanikop       }
866*09537850SAkhilesh Sanikop     }
867*09537850SAkhilesh Sanikop   }
868*09537850SAkhilesh Sanikop 
869*09537850SAkhilesh Sanikop   if (block.left_available[kPlaneY]) {
870*09537850SAkhilesh Sanikop     const int num_limit = std::min(uint8_t{4}, k4x4HeightLog2[block.size]);
871*09537850SAkhilesh Sanikop     const int row4x4_max =
872*09537850SAkhilesh Sanikop         std::min(block.row4x4 + block.height4x4, frame_header_.rows4x4);
873*09537850SAkhilesh Sanikop     const int candidate_column = block.column4x4 - 1;
874*09537850SAkhilesh Sanikop     int row4x4 = block.row4x4;
875*09537850SAkhilesh Sanikop     const int block_start_x = MultiplyBy4(block.column4x4) >> subsampling_x;
876*09537850SAkhilesh Sanikop     const int prediction_width = std::min(width >> 1, 32 >> subsampling_x);
877*09537850SAkhilesh Sanikop     for (int i = 0, step; i < num_limit && row4x4 < row4x4_max;
878*09537850SAkhilesh Sanikop          row4x4 += step) {
879*09537850SAkhilesh Sanikop       const int candidate_row = row4x4 | 1;
880*09537850SAkhilesh Sanikop       const BlockParameters& bp_left =
881*09537850SAkhilesh Sanikop           *block_parameters_holder_.Find(candidate_row, candidate_column);
882*09537850SAkhilesh Sanikop       const int candidate_block_size = bp_left.size;
883*09537850SAkhilesh Sanikop       step = Clip3(kNum4x4BlocksHigh[candidate_block_size], 2, 16);
884*09537850SAkhilesh Sanikop       if (bp_left.reference_frame[0] > kReferenceFrameIntra) {
885*09537850SAkhilesh Sanikop         i++;
886*09537850SAkhilesh Sanikop         const int candidate_reference_frame_index =
887*09537850SAkhilesh Sanikop             frame_header_.reference_frame_index[bp_left.reference_frame[0] -
888*09537850SAkhilesh Sanikop                                                 kReferenceFrameLast];
889*09537850SAkhilesh Sanikop         const int prediction_height =
890*09537850SAkhilesh Sanikop             std::min(height, MultiplyBy4(step) >> subsampling_y);
891*09537850SAkhilesh Sanikop         if (!ObmcBlockPrediction(
892*09537850SAkhilesh Sanikop                 block, bp_left.mv.mv[0], plane, candidate_reference_frame_index,
893*09537850SAkhilesh Sanikop                 prediction_width, prediction_height, block_start_x,
894*09537850SAkhilesh Sanikop                 MultiplyBy4(row4x4) >> subsampling_y, candidate_row,
895*09537850SAkhilesh Sanikop                 candidate_column, kObmcDirectionHorizontal)) {
896*09537850SAkhilesh Sanikop           return false;
897*09537850SAkhilesh Sanikop         }
898*09537850SAkhilesh Sanikop       }
899*09537850SAkhilesh Sanikop     }
900*09537850SAkhilesh Sanikop   }
901*09537850SAkhilesh Sanikop   return true;
902*09537850SAkhilesh Sanikop }
903*09537850SAkhilesh Sanikop 
DistanceWeightedPrediction(void * prediction_0,void * prediction_1,const int width,const int height,const int candidate_row,const int candidate_column,uint8_t * dest,ptrdiff_t dest_stride)904*09537850SAkhilesh Sanikop void Tile::DistanceWeightedPrediction(void* prediction_0, void* prediction_1,
905*09537850SAkhilesh Sanikop                                       const int width, const int height,
906*09537850SAkhilesh Sanikop                                       const int candidate_row,
907*09537850SAkhilesh Sanikop                                       const int candidate_column, uint8_t* dest,
908*09537850SAkhilesh Sanikop                                       ptrdiff_t dest_stride) {
909*09537850SAkhilesh Sanikop   int distance[2];
910*09537850SAkhilesh Sanikop   int weight[2];
911*09537850SAkhilesh Sanikop   for (int reference = 0; reference < 2; ++reference) {
912*09537850SAkhilesh Sanikop     const BlockParameters& bp =
913*09537850SAkhilesh Sanikop         *block_parameters_holder_.Find(candidate_row, candidate_column);
914*09537850SAkhilesh Sanikop     // Note: distance[0] and distance[1] correspond to relative distance
915*09537850SAkhilesh Sanikop     // between current frame and reference frame [1] and [0], respectively.
916*09537850SAkhilesh Sanikop     distance[1 - reference] = std::min(
917*09537850SAkhilesh Sanikop         std::abs(static_cast<int>(
918*09537850SAkhilesh Sanikop             current_frame_.reference_info()
919*09537850SAkhilesh Sanikop                 ->relative_distance_from[bp.reference_frame[reference]])),
920*09537850SAkhilesh Sanikop         static_cast<int>(kMaxFrameDistance));
921*09537850SAkhilesh Sanikop   }
922*09537850SAkhilesh Sanikop   GetDistanceWeights(distance, weight);
923*09537850SAkhilesh Sanikop 
924*09537850SAkhilesh Sanikop   dsp_.distance_weighted_blend(prediction_0, prediction_1, weight[0], weight[1],
925*09537850SAkhilesh Sanikop                                width, height, dest, dest_stride);
926*09537850SAkhilesh Sanikop }
927*09537850SAkhilesh Sanikop 
ScaleMotionVector(const MotionVector & mv,const Plane plane,const int reference_frame_index,const int x,const int y,int * const start_x,int * const start_y,int * const step_x,int * const step_y)928*09537850SAkhilesh Sanikop void Tile::ScaleMotionVector(const MotionVector& mv, const Plane plane,
929*09537850SAkhilesh Sanikop                              const int reference_frame_index, const int x,
930*09537850SAkhilesh Sanikop                              const int y, int* const start_x,
931*09537850SAkhilesh Sanikop                              int* const start_y, int* const step_x,
932*09537850SAkhilesh Sanikop                              int* const step_y) {
933*09537850SAkhilesh Sanikop   const int reference_upscaled_width =
934*09537850SAkhilesh Sanikop       (reference_frame_index == -1)
935*09537850SAkhilesh Sanikop           ? frame_header_.upscaled_width
936*09537850SAkhilesh Sanikop           : reference_frames_[reference_frame_index]->upscaled_width();
937*09537850SAkhilesh Sanikop   const int reference_height =
938*09537850SAkhilesh Sanikop       (reference_frame_index == -1)
939*09537850SAkhilesh Sanikop           ? frame_header_.height
940*09537850SAkhilesh Sanikop           : reference_frames_[reference_frame_index]->frame_height();
941*09537850SAkhilesh Sanikop   assert(2 * frame_header_.width >= reference_upscaled_width &&
942*09537850SAkhilesh Sanikop          2 * frame_header_.height >= reference_height &&
943*09537850SAkhilesh Sanikop          frame_header_.width <= 16 * reference_upscaled_width &&
944*09537850SAkhilesh Sanikop          frame_header_.height <= 16 * reference_height);
945*09537850SAkhilesh Sanikop   const bool is_scaled_x = reference_upscaled_width != frame_header_.width;
946*09537850SAkhilesh Sanikop   const bool is_scaled_y = reference_height != frame_header_.height;
947*09537850SAkhilesh Sanikop   const int half_sample = 1 << (kSubPixelBits - 1);
948*09537850SAkhilesh Sanikop   int orig_x = (x << kSubPixelBits) + ((2 * mv.mv[1]) >> subsampling_x_[plane]);
949*09537850SAkhilesh Sanikop   int orig_y = (y << kSubPixelBits) + ((2 * mv.mv[0]) >> subsampling_y_[plane]);
950*09537850SAkhilesh Sanikop   const int rounding_offset =
951*09537850SAkhilesh Sanikop       DivideBy2(1 << (kScaleSubPixelBits - kSubPixelBits));
952*09537850SAkhilesh Sanikop   if (is_scaled_x) {
953*09537850SAkhilesh Sanikop     const int scale_x = ((reference_upscaled_width << kReferenceScaleShift) +
954*09537850SAkhilesh Sanikop                          DivideBy2(frame_header_.width)) /
955*09537850SAkhilesh Sanikop                         frame_header_.width;
956*09537850SAkhilesh Sanikop     *step_x = RightShiftWithRoundingSigned(
957*09537850SAkhilesh Sanikop         scale_x, kReferenceScaleShift - kScaleSubPixelBits);
958*09537850SAkhilesh Sanikop     orig_x += half_sample;
959*09537850SAkhilesh Sanikop     // When frame size is 4k and above, orig_x can be above 16 bits, scale_x can
960*09537850SAkhilesh Sanikop     // be up to 15 bits. So we use int64_t to hold base_x.
961*09537850SAkhilesh Sanikop     const int64_t base_x = static_cast<int64_t>(orig_x) * scale_x -
962*09537850SAkhilesh Sanikop                            (half_sample << kReferenceScaleShift);
963*09537850SAkhilesh Sanikop     *start_x =
964*09537850SAkhilesh Sanikop         RightShiftWithRoundingSigned(
965*09537850SAkhilesh Sanikop             base_x, kReferenceScaleShift + kSubPixelBits - kScaleSubPixelBits) +
966*09537850SAkhilesh Sanikop         rounding_offset;
967*09537850SAkhilesh Sanikop   } else {
968*09537850SAkhilesh Sanikop     *step_x = 1 << kScaleSubPixelBits;
969*09537850SAkhilesh Sanikop     *start_x = LeftShift(orig_x, 6) + rounding_offset;
970*09537850SAkhilesh Sanikop   }
971*09537850SAkhilesh Sanikop   if (is_scaled_y) {
972*09537850SAkhilesh Sanikop     const int scale_y = ((reference_height << kReferenceScaleShift) +
973*09537850SAkhilesh Sanikop                          DivideBy2(frame_header_.height)) /
974*09537850SAkhilesh Sanikop                         frame_header_.height;
975*09537850SAkhilesh Sanikop     *step_y = RightShiftWithRoundingSigned(
976*09537850SAkhilesh Sanikop         scale_y, kReferenceScaleShift - kScaleSubPixelBits);
977*09537850SAkhilesh Sanikop     orig_y += half_sample;
978*09537850SAkhilesh Sanikop     const int64_t base_y = static_cast<int64_t>(orig_y) * scale_y -
979*09537850SAkhilesh Sanikop                            (half_sample << kReferenceScaleShift);
980*09537850SAkhilesh Sanikop     *start_y =
981*09537850SAkhilesh Sanikop         RightShiftWithRoundingSigned(
982*09537850SAkhilesh Sanikop             base_y, kReferenceScaleShift + kSubPixelBits - kScaleSubPixelBits) +
983*09537850SAkhilesh Sanikop         rounding_offset;
984*09537850SAkhilesh Sanikop   } else {
985*09537850SAkhilesh Sanikop     *step_y = 1 << kScaleSubPixelBits;
986*09537850SAkhilesh Sanikop     *start_y = LeftShift(orig_y, 6) + rounding_offset;
987*09537850SAkhilesh Sanikop   }
988*09537850SAkhilesh Sanikop }
989*09537850SAkhilesh Sanikop 
990*09537850SAkhilesh Sanikop // static.
GetReferenceBlockPosition(const int reference_frame_index,const bool is_scaled,const int width,const int height,const int ref_start_x,const int ref_last_x,const int ref_start_y,const int ref_last_y,const int start_x,const int start_y,const int step_x,const int step_y,const int left_border,const int right_border,const int top_border,const int bottom_border,int * ref_block_start_x,int * ref_block_start_y,int * ref_block_end_x,int * ref_block_end_y)991*09537850SAkhilesh Sanikop bool Tile::GetReferenceBlockPosition(
992*09537850SAkhilesh Sanikop     const int reference_frame_index, const bool is_scaled, const int width,
993*09537850SAkhilesh Sanikop     const int height, const int ref_start_x, const int ref_last_x,
994*09537850SAkhilesh Sanikop     const int ref_start_y, const int ref_last_y, const int start_x,
995*09537850SAkhilesh Sanikop     const int start_y, const int step_x, const int step_y,
996*09537850SAkhilesh Sanikop     const int left_border, const int right_border, const int top_border,
997*09537850SAkhilesh Sanikop     const int bottom_border, int* ref_block_start_x, int* ref_block_start_y,
998*09537850SAkhilesh Sanikop     int* ref_block_end_x, int* ref_block_end_y) {
999*09537850SAkhilesh Sanikop   *ref_block_start_x = GetPixelPositionFromHighScale(start_x, 0, 0);
1000*09537850SAkhilesh Sanikop   *ref_block_start_y = GetPixelPositionFromHighScale(start_y, 0, 0);
1001*09537850SAkhilesh Sanikop   if (reference_frame_index == -1) {
1002*09537850SAkhilesh Sanikop     return false;
1003*09537850SAkhilesh Sanikop   }
1004*09537850SAkhilesh Sanikop   *ref_block_start_x -= kConvolveBorderLeftTop;
1005*09537850SAkhilesh Sanikop   *ref_block_start_y -= kConvolveBorderLeftTop;
1006*09537850SAkhilesh Sanikop   *ref_block_end_x = GetPixelPositionFromHighScale(start_x, step_x, width - 1) +
1007*09537850SAkhilesh Sanikop                      kConvolveBorderRight;
1008*09537850SAkhilesh Sanikop   *ref_block_end_y =
1009*09537850SAkhilesh Sanikop       GetPixelPositionFromHighScale(start_y, step_y, height - 1) +
1010*09537850SAkhilesh Sanikop       kConvolveBorderBottom;
1011*09537850SAkhilesh Sanikop   if (is_scaled) {
1012*09537850SAkhilesh Sanikop     const int block_height =
1013*09537850SAkhilesh Sanikop         (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
1014*09537850SAkhilesh Sanikop          kScaleSubPixelBits) +
1015*09537850SAkhilesh Sanikop         kSubPixelTaps;
1016*09537850SAkhilesh Sanikop     *ref_block_end_x += kConvolveScaleBorderRight - kConvolveBorderRight;
1017*09537850SAkhilesh Sanikop     *ref_block_end_y = *ref_block_start_y + block_height - 1;
1018*09537850SAkhilesh Sanikop   }
1019*09537850SAkhilesh Sanikop   // Determines if we need to extend beyond the left/right/top/bottom border.
1020*09537850SAkhilesh Sanikop   return *ref_block_start_x < (ref_start_x - left_border) ||
1021*09537850SAkhilesh Sanikop          *ref_block_end_x > (ref_last_x + right_border) ||
1022*09537850SAkhilesh Sanikop          *ref_block_start_y < (ref_start_y - top_border) ||
1023*09537850SAkhilesh Sanikop          *ref_block_end_y > (ref_last_y + bottom_border);
1024*09537850SAkhilesh Sanikop }
1025*09537850SAkhilesh Sanikop 
1026*09537850SAkhilesh Sanikop // Builds a block as the input for convolve, by copying the content of
1027*09537850SAkhilesh Sanikop // reference frame (either a decoded reference frame, or current frame).
1028*09537850SAkhilesh Sanikop // |block_extended_width| is the combined width of the block and its borders.
1029*09537850SAkhilesh Sanikop template <typename Pixel>
BuildConvolveBlock(const Plane plane,const int reference_frame_index,const bool is_scaled,const int height,const int ref_start_x,const int ref_last_x,const int ref_start_y,const int ref_last_y,const int step_y,const int ref_block_start_x,const int ref_block_end_x,const int ref_block_start_y,uint8_t * block_buffer,ptrdiff_t convolve_buffer_stride,ptrdiff_t block_extended_width)1030*09537850SAkhilesh Sanikop void Tile::BuildConvolveBlock(
1031*09537850SAkhilesh Sanikop     const Plane plane, const int reference_frame_index, const bool is_scaled,
1032*09537850SAkhilesh Sanikop     const int height, const int ref_start_x, const int ref_last_x,
1033*09537850SAkhilesh Sanikop     const int ref_start_y, const int ref_last_y, const int step_y,
1034*09537850SAkhilesh Sanikop     const int ref_block_start_x, const int ref_block_end_x,
1035*09537850SAkhilesh Sanikop     const int ref_block_start_y, uint8_t* block_buffer,
1036*09537850SAkhilesh Sanikop     ptrdiff_t convolve_buffer_stride, ptrdiff_t block_extended_width) {
1037*09537850SAkhilesh Sanikop   const YuvBuffer* const reference_buffer =
1038*09537850SAkhilesh Sanikop       (reference_frame_index == -1)
1039*09537850SAkhilesh Sanikop           ? current_frame_.buffer()
1040*09537850SAkhilesh Sanikop           : reference_frames_[reference_frame_index]->buffer();
1041*09537850SAkhilesh Sanikop   Array2DView<const Pixel> reference_block(
1042*09537850SAkhilesh Sanikop       reference_buffer->height(plane),
1043*09537850SAkhilesh Sanikop       reference_buffer->stride(plane) / sizeof(Pixel),
1044*09537850SAkhilesh Sanikop       reinterpret_cast<const Pixel*>(reference_buffer->data(plane)));
1045*09537850SAkhilesh Sanikop   auto* const block_head = reinterpret_cast<Pixel*>(block_buffer);
1046*09537850SAkhilesh Sanikop   convolve_buffer_stride /= sizeof(Pixel);
1047*09537850SAkhilesh Sanikop   int block_height = height + kConvolveBorderLeftTop + kConvolveBorderBottom;
1048*09537850SAkhilesh Sanikop   if (is_scaled) {
1049*09537850SAkhilesh Sanikop     block_height = (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
1050*09537850SAkhilesh Sanikop                     kScaleSubPixelBits) +
1051*09537850SAkhilesh Sanikop                    kSubPixelTaps;
1052*09537850SAkhilesh Sanikop   }
1053*09537850SAkhilesh Sanikop   const int copy_start_x = Clip3(ref_block_start_x, ref_start_x, ref_last_x);
1054*09537850SAkhilesh Sanikop   const int copy_start_y = Clip3(ref_block_start_y, ref_start_y, ref_last_y);
1055*09537850SAkhilesh Sanikop   const int copy_end_x = Clip3(ref_block_end_x, copy_start_x, ref_last_x);
1056*09537850SAkhilesh Sanikop   const int block_width = copy_end_x - copy_start_x + 1;
1057*09537850SAkhilesh Sanikop   const bool extend_left = ref_block_start_x < ref_start_x;
1058*09537850SAkhilesh Sanikop   const bool extend_right = ref_block_end_x > ref_last_x;
1059*09537850SAkhilesh Sanikop   const bool out_of_left = copy_start_x > ref_block_end_x;
1060*09537850SAkhilesh Sanikop   const bool out_of_right = copy_end_x < ref_block_start_x;
1061*09537850SAkhilesh Sanikop   if (out_of_left || out_of_right) {
1062*09537850SAkhilesh Sanikop     const int ref_x = out_of_left ? copy_start_x : copy_end_x;
1063*09537850SAkhilesh Sanikop     Pixel* buf_ptr = block_head;
1064*09537850SAkhilesh Sanikop     for (int y = 0, ref_y = copy_start_y; y < block_height; ++y) {
1065*09537850SAkhilesh Sanikop       Memset(buf_ptr, reference_block[ref_y][ref_x], block_extended_width);
1066*09537850SAkhilesh Sanikop       if (ref_block_start_y + y >= ref_start_y &&
1067*09537850SAkhilesh Sanikop           ref_block_start_y + y < ref_last_y) {
1068*09537850SAkhilesh Sanikop         ++ref_y;
1069*09537850SAkhilesh Sanikop       }
1070*09537850SAkhilesh Sanikop       buf_ptr += convolve_buffer_stride;
1071*09537850SAkhilesh Sanikop     }
1072*09537850SAkhilesh Sanikop   } else {
1073*09537850SAkhilesh Sanikop     Pixel* buf_ptr = block_head;
1074*09537850SAkhilesh Sanikop     const int left_width = copy_start_x - ref_block_start_x;
1075*09537850SAkhilesh Sanikop     for (int y = 0, ref_y = copy_start_y; y < block_height; ++y) {
1076*09537850SAkhilesh Sanikop       if (extend_left) {
1077*09537850SAkhilesh Sanikop         Memset(buf_ptr, reference_block[ref_y][copy_start_x], left_width);
1078*09537850SAkhilesh Sanikop       }
1079*09537850SAkhilesh Sanikop       memcpy(buf_ptr + left_width, &reference_block[ref_y][copy_start_x],
1080*09537850SAkhilesh Sanikop              block_width * sizeof(Pixel));
1081*09537850SAkhilesh Sanikop       if (extend_right) {
1082*09537850SAkhilesh Sanikop         Memset(buf_ptr + left_width + block_width,
1083*09537850SAkhilesh Sanikop                reference_block[ref_y][copy_end_x],
1084*09537850SAkhilesh Sanikop                block_extended_width - left_width - block_width);
1085*09537850SAkhilesh Sanikop       }
1086*09537850SAkhilesh Sanikop       if (ref_block_start_y + y >= ref_start_y &&
1087*09537850SAkhilesh Sanikop           ref_block_start_y + y < ref_last_y) {
1088*09537850SAkhilesh Sanikop         ++ref_y;
1089*09537850SAkhilesh Sanikop       }
1090*09537850SAkhilesh Sanikop       buf_ptr += convolve_buffer_stride;
1091*09537850SAkhilesh Sanikop     }
1092*09537850SAkhilesh Sanikop   }
1093*09537850SAkhilesh Sanikop }
1094*09537850SAkhilesh Sanikop 
BlockInterPrediction(const Block & block,const Plane plane,const int reference_frame_index,const MotionVector & mv,const int x,const int y,const int width,const int height,const int candidate_row,const int candidate_column,uint16_t * const prediction,const bool is_compound,const bool is_inter_intra,uint8_t * const dest,const ptrdiff_t dest_stride)1095*09537850SAkhilesh Sanikop bool Tile::BlockInterPrediction(
1096*09537850SAkhilesh Sanikop     const Block& block, const Plane plane, const int reference_frame_index,
1097*09537850SAkhilesh Sanikop     const MotionVector& mv, const int x, const int y, const int width,
1098*09537850SAkhilesh Sanikop     const int height, const int candidate_row, const int candidate_column,
1099*09537850SAkhilesh Sanikop     uint16_t* const prediction, const bool is_compound,
1100*09537850SAkhilesh Sanikop     const bool is_inter_intra, uint8_t* const dest,
1101*09537850SAkhilesh Sanikop     const ptrdiff_t dest_stride) {
1102*09537850SAkhilesh Sanikop   const BlockParameters& bp =
1103*09537850SAkhilesh Sanikop       *block_parameters_holder_.Find(candidate_row, candidate_column);
1104*09537850SAkhilesh Sanikop   int start_x;
1105*09537850SAkhilesh Sanikop   int start_y;
1106*09537850SAkhilesh Sanikop   int step_x;
1107*09537850SAkhilesh Sanikop   int step_y;
1108*09537850SAkhilesh Sanikop   ScaleMotionVector(mv, plane, reference_frame_index, x, y, &start_x, &start_y,
1109*09537850SAkhilesh Sanikop                     &step_x, &step_y);
1110*09537850SAkhilesh Sanikop   const int horizontal_filter_index = bp.interpolation_filter[1];
1111*09537850SAkhilesh Sanikop   const int vertical_filter_index = bp.interpolation_filter[0];
1112*09537850SAkhilesh Sanikop   const int subsampling_x = subsampling_x_[plane];
1113*09537850SAkhilesh Sanikop   const int subsampling_y = subsampling_y_[plane];
1114*09537850SAkhilesh Sanikop   // reference_frame_index equal to -1 indicates using current frame as
1115*09537850SAkhilesh Sanikop   // reference.
1116*09537850SAkhilesh Sanikop   const YuvBuffer* const reference_buffer =
1117*09537850SAkhilesh Sanikop       (reference_frame_index == -1)
1118*09537850SAkhilesh Sanikop           ? current_frame_.buffer()
1119*09537850SAkhilesh Sanikop           : reference_frames_[reference_frame_index]->buffer();
1120*09537850SAkhilesh Sanikop   const int reference_upscaled_width =
1121*09537850SAkhilesh Sanikop       (reference_frame_index == -1)
1122*09537850SAkhilesh Sanikop           ? MultiplyBy4(frame_header_.columns4x4)
1123*09537850SAkhilesh Sanikop           : reference_frames_[reference_frame_index]->upscaled_width();
1124*09537850SAkhilesh Sanikop   const int reference_height =
1125*09537850SAkhilesh Sanikop       (reference_frame_index == -1)
1126*09537850SAkhilesh Sanikop           ? MultiplyBy4(frame_header_.rows4x4)
1127*09537850SAkhilesh Sanikop           : reference_frames_[reference_frame_index]->frame_height();
1128*09537850SAkhilesh Sanikop   const int ref_start_x = 0;
1129*09537850SAkhilesh Sanikop   const int ref_last_x =
1130*09537850SAkhilesh Sanikop       SubsampledValue(reference_upscaled_width, subsampling_x) - 1;
1131*09537850SAkhilesh Sanikop   const int ref_start_y = 0;
1132*09537850SAkhilesh Sanikop   const int ref_last_y = SubsampledValue(reference_height, subsampling_y) - 1;
1133*09537850SAkhilesh Sanikop 
1134*09537850SAkhilesh Sanikop   const bool is_scaled = (reference_frame_index != -1) &&
1135*09537850SAkhilesh Sanikop                          (frame_header_.width != reference_upscaled_width ||
1136*09537850SAkhilesh Sanikop                           frame_header_.height != reference_height);
1137*09537850SAkhilesh Sanikop   const int bitdepth = sequence_header_.color_config.bitdepth;
1138*09537850SAkhilesh Sanikop   const int pixel_size = (bitdepth == 8) ? sizeof(uint8_t) : sizeof(uint16_t);
1139*09537850SAkhilesh Sanikop   int ref_block_start_x;
1140*09537850SAkhilesh Sanikop   int ref_block_start_y;
1141*09537850SAkhilesh Sanikop   int ref_block_end_x;
1142*09537850SAkhilesh Sanikop   int ref_block_end_y;
1143*09537850SAkhilesh Sanikop   const bool extend_block = GetReferenceBlockPosition(
1144*09537850SAkhilesh Sanikop       reference_frame_index, is_scaled, width, height, ref_start_x, ref_last_x,
1145*09537850SAkhilesh Sanikop       ref_start_y, ref_last_y, start_x, start_y, step_x, step_y,
1146*09537850SAkhilesh Sanikop       reference_buffer->left_border(plane),
1147*09537850SAkhilesh Sanikop       reference_buffer->right_border(plane),
1148*09537850SAkhilesh Sanikop       reference_buffer->top_border(plane),
1149*09537850SAkhilesh Sanikop       reference_buffer->bottom_border(plane), &ref_block_start_x,
1150*09537850SAkhilesh Sanikop       &ref_block_start_y, &ref_block_end_x, &ref_block_end_y);
1151*09537850SAkhilesh Sanikop 
1152*09537850SAkhilesh Sanikop   // In frame parallel mode, ensure that the reference block has been decoded
1153*09537850SAkhilesh Sanikop   // and available for referencing.
1154*09537850SAkhilesh Sanikop   if (reference_frame_index != -1 && frame_parallel_) {
1155*09537850SAkhilesh Sanikop     // For U and V planes with subsampling, we need to multiply the value of
1156*09537850SAkhilesh Sanikop     // ref_block_end_y by 2 since we only track the progress of the Y planes.
1157*09537850SAkhilesh Sanikop     const int reference_y_max = LeftShift(
1158*09537850SAkhilesh Sanikop         std::min(ref_block_end_y + kSubPixelTaps, ref_last_y), subsampling_y);
1159*09537850SAkhilesh Sanikop     if (reference_frame_progress_cache_[reference_frame_index] <
1160*09537850SAkhilesh Sanikop             reference_y_max &&
1161*09537850SAkhilesh Sanikop         !reference_frames_[reference_frame_index]->WaitUntil(
1162*09537850SAkhilesh Sanikop             reference_y_max,
1163*09537850SAkhilesh Sanikop             &reference_frame_progress_cache_[reference_frame_index])) {
1164*09537850SAkhilesh Sanikop       return false;
1165*09537850SAkhilesh Sanikop     }
1166*09537850SAkhilesh Sanikop   }
1167*09537850SAkhilesh Sanikop 
1168*09537850SAkhilesh Sanikop   const uint8_t* block_start = nullptr;
1169*09537850SAkhilesh Sanikop   ptrdiff_t convolve_buffer_stride;
1170*09537850SAkhilesh Sanikop   if (!extend_block) {
1171*09537850SAkhilesh Sanikop     const YuvBuffer* const reference_buffer =
1172*09537850SAkhilesh Sanikop         (reference_frame_index == -1)
1173*09537850SAkhilesh Sanikop             ? current_frame_.buffer()
1174*09537850SAkhilesh Sanikop             : reference_frames_[reference_frame_index]->buffer();
1175*09537850SAkhilesh Sanikop     convolve_buffer_stride = reference_buffer->stride(plane);
1176*09537850SAkhilesh Sanikop     if (reference_frame_index == -1 || is_scaled) {
1177*09537850SAkhilesh Sanikop       block_start = reference_buffer->data(plane) +
1178*09537850SAkhilesh Sanikop                     ref_block_start_y * reference_buffer->stride(plane) +
1179*09537850SAkhilesh Sanikop                     ref_block_start_x * pixel_size;
1180*09537850SAkhilesh Sanikop     } else {
1181*09537850SAkhilesh Sanikop       block_start = reference_buffer->data(plane) +
1182*09537850SAkhilesh Sanikop                     (ref_block_start_y + kConvolveBorderLeftTop) *
1183*09537850SAkhilesh Sanikop                         reference_buffer->stride(plane) +
1184*09537850SAkhilesh Sanikop                     (ref_block_start_x + kConvolveBorderLeftTop) * pixel_size;
1185*09537850SAkhilesh Sanikop     }
1186*09537850SAkhilesh Sanikop   } else {
1187*09537850SAkhilesh Sanikop     const int border_right =
1188*09537850SAkhilesh Sanikop         is_scaled ? kConvolveScaleBorderRight : kConvolveBorderRight;
1189*09537850SAkhilesh Sanikop     // The block width can be at most 2 times as much as current
1190*09537850SAkhilesh Sanikop     // block's width because of scaling.
1191*09537850SAkhilesh Sanikop     auto block_extended_width = Align<ptrdiff_t>(
1192*09537850SAkhilesh Sanikop         (2 * width + kConvolveBorderLeftTop + border_right) * pixel_size,
1193*09537850SAkhilesh Sanikop         kMaxAlignment);
1194*09537850SAkhilesh Sanikop     convolve_buffer_stride = block.scratch_buffer->convolve_block_buffer_stride;
1195*09537850SAkhilesh Sanikop #if LIBGAV1_MAX_BITDEPTH >= 10
1196*09537850SAkhilesh Sanikop     if (bitdepth > 8) {
1197*09537850SAkhilesh Sanikop       BuildConvolveBlock<uint16_t>(
1198*09537850SAkhilesh Sanikop           plane, reference_frame_index, is_scaled, height, ref_start_x,
1199*09537850SAkhilesh Sanikop           ref_last_x, ref_start_y, ref_last_y, step_y, ref_block_start_x,
1200*09537850SAkhilesh Sanikop           ref_block_end_x, ref_block_start_y,
1201*09537850SAkhilesh Sanikop           block.scratch_buffer->convolve_block_buffer.get(),
1202*09537850SAkhilesh Sanikop           convolve_buffer_stride, block_extended_width);
1203*09537850SAkhilesh Sanikop     } else {
1204*09537850SAkhilesh Sanikop #endif
1205*09537850SAkhilesh Sanikop       BuildConvolveBlock<uint8_t>(
1206*09537850SAkhilesh Sanikop           plane, reference_frame_index, is_scaled, height, ref_start_x,
1207*09537850SAkhilesh Sanikop           ref_last_x, ref_start_y, ref_last_y, step_y, ref_block_start_x,
1208*09537850SAkhilesh Sanikop           ref_block_end_x, ref_block_start_y,
1209*09537850SAkhilesh Sanikop           block.scratch_buffer->convolve_block_buffer.get(),
1210*09537850SAkhilesh Sanikop           convolve_buffer_stride, block_extended_width);
1211*09537850SAkhilesh Sanikop #if LIBGAV1_MAX_BITDEPTH >= 10
1212*09537850SAkhilesh Sanikop     }
1213*09537850SAkhilesh Sanikop #endif
1214*09537850SAkhilesh Sanikop     block_start = block.scratch_buffer->convolve_block_buffer.get() +
1215*09537850SAkhilesh Sanikop                   (is_scaled ? 0
1216*09537850SAkhilesh Sanikop                              : kConvolveBorderLeftTop * convolve_buffer_stride +
1217*09537850SAkhilesh Sanikop                                    kConvolveBorderLeftTop * pixel_size);
1218*09537850SAkhilesh Sanikop   }
1219*09537850SAkhilesh Sanikop 
1220*09537850SAkhilesh Sanikop   void* const output =
1221*09537850SAkhilesh Sanikop       (is_compound || is_inter_intra) ? prediction : static_cast<void*>(dest);
1222*09537850SAkhilesh Sanikop   ptrdiff_t output_stride = (is_compound || is_inter_intra)
1223*09537850SAkhilesh Sanikop                                 ? /*prediction_stride=*/width
1224*09537850SAkhilesh Sanikop                                 : dest_stride;
1225*09537850SAkhilesh Sanikop #if LIBGAV1_MAX_BITDEPTH >= 10
1226*09537850SAkhilesh Sanikop   // |is_inter_intra| calculations are written to the |prediction| buffer.
1227*09537850SAkhilesh Sanikop   // Unlike the |is_compound| calculations the output is Pixel and not uint16_t.
1228*09537850SAkhilesh Sanikop   // convolve_func() expects |output_stride| to be in bytes and not Pixels.
1229*09537850SAkhilesh Sanikop   // |prediction_stride| is in units of uint16_t. Adjust |output_stride| to
1230*09537850SAkhilesh Sanikop   // account for this.
1231*09537850SAkhilesh Sanikop   if (is_inter_intra && sequence_header_.color_config.bitdepth > 8) {
1232*09537850SAkhilesh Sanikop     output_stride *= 2;
1233*09537850SAkhilesh Sanikop   }
1234*09537850SAkhilesh Sanikop #endif
1235*09537850SAkhilesh Sanikop   assert(output != nullptr);
1236*09537850SAkhilesh Sanikop   if (is_scaled) {
1237*09537850SAkhilesh Sanikop     dsp::ConvolveScaleFunc convolve_func = dsp_.convolve_scale[is_compound];
1238*09537850SAkhilesh Sanikop     assert(convolve_func != nullptr);
1239*09537850SAkhilesh Sanikop 
1240*09537850SAkhilesh Sanikop     convolve_func(block_start, convolve_buffer_stride, horizontal_filter_index,
1241*09537850SAkhilesh Sanikop                   vertical_filter_index, start_x, start_y, step_x, step_y,
1242*09537850SAkhilesh Sanikop                   width, height, output, output_stride);
1243*09537850SAkhilesh Sanikop   } else {
1244*09537850SAkhilesh Sanikop     const int horizontal_filter_id = (start_x >> 6) & kSubPixelMask;
1245*09537850SAkhilesh Sanikop     const int vertical_filter_id = (start_y >> 6) & kSubPixelMask;
1246*09537850SAkhilesh Sanikop 
1247*09537850SAkhilesh Sanikop     dsp::ConvolveFunc convolve_func =
1248*09537850SAkhilesh Sanikop         dsp_.convolve[reference_frame_index == -1][is_compound]
1249*09537850SAkhilesh Sanikop                      [vertical_filter_id != 0][horizontal_filter_id != 0];
1250*09537850SAkhilesh Sanikop     assert(convolve_func != nullptr);
1251*09537850SAkhilesh Sanikop 
1252*09537850SAkhilesh Sanikop     convolve_func(block_start, convolve_buffer_stride, horizontal_filter_index,
1253*09537850SAkhilesh Sanikop                   vertical_filter_index, horizontal_filter_id,
1254*09537850SAkhilesh Sanikop                   vertical_filter_id, width, height, output, output_stride);
1255*09537850SAkhilesh Sanikop   }
1256*09537850SAkhilesh Sanikop   return true;
1257*09537850SAkhilesh Sanikop }
1258*09537850SAkhilesh Sanikop 
BlockWarpProcess(const Block & block,const Plane plane,const int index,const int block_start_x,const int block_start_y,const int width,const int height,GlobalMotion * const warp_params,const bool is_compound,const bool is_inter_intra,uint8_t * const dest,const ptrdiff_t dest_stride)1259*09537850SAkhilesh Sanikop bool Tile::BlockWarpProcess(const Block& block, const Plane plane,
1260*09537850SAkhilesh Sanikop                             const int index, const int block_start_x,
1261*09537850SAkhilesh Sanikop                             const int block_start_y, const int width,
1262*09537850SAkhilesh Sanikop                             const int height, GlobalMotion* const warp_params,
1263*09537850SAkhilesh Sanikop                             const bool is_compound, const bool is_inter_intra,
1264*09537850SAkhilesh Sanikop                             uint8_t* const dest, const ptrdiff_t dest_stride) {
1265*09537850SAkhilesh Sanikop   assert(width >= 8 && height >= 8);
1266*09537850SAkhilesh Sanikop   const BlockParameters& bp = *block.bp;
1267*09537850SAkhilesh Sanikop   const int reference_frame_index =
1268*09537850SAkhilesh Sanikop       frame_header_.reference_frame_index[bp.reference_frame[index] -
1269*09537850SAkhilesh Sanikop                                           kReferenceFrameLast];
1270*09537850SAkhilesh Sanikop   const uint8_t* const source =
1271*09537850SAkhilesh Sanikop       reference_frames_[reference_frame_index]->buffer()->data(plane);
1272*09537850SAkhilesh Sanikop   ptrdiff_t source_stride =
1273*09537850SAkhilesh Sanikop       reference_frames_[reference_frame_index]->buffer()->stride(plane);
1274*09537850SAkhilesh Sanikop   const int source_width =
1275*09537850SAkhilesh Sanikop       reference_frames_[reference_frame_index]->buffer()->width(plane);
1276*09537850SAkhilesh Sanikop   const int source_height =
1277*09537850SAkhilesh Sanikop       reference_frames_[reference_frame_index]->buffer()->height(plane);
1278*09537850SAkhilesh Sanikop   uint16_t* const prediction = block.scratch_buffer->prediction_buffer[index];
1279*09537850SAkhilesh Sanikop 
1280*09537850SAkhilesh Sanikop   // In frame parallel mode, ensure that the reference block has been decoded
1281*09537850SAkhilesh Sanikop   // and available for referencing.
1282*09537850SAkhilesh Sanikop   if (frame_parallel_) {
1283*09537850SAkhilesh Sanikop     int reference_y_max = -1;
1284*09537850SAkhilesh Sanikop     // Find out the maximum y-coordinate for warping.
1285*09537850SAkhilesh Sanikop     for (int start_y = block_start_y; start_y < block_start_y + height;
1286*09537850SAkhilesh Sanikop          start_y += 8) {
1287*09537850SAkhilesh Sanikop       for (int start_x = block_start_x; start_x < block_start_x + width;
1288*09537850SAkhilesh Sanikop            start_x += 8) {
1289*09537850SAkhilesh Sanikop         const int src_x = (start_x + 4) << subsampling_x_[plane];
1290*09537850SAkhilesh Sanikop         const int src_y = (start_y + 4) << subsampling_y_[plane];
1291*09537850SAkhilesh Sanikop         const int64_t dst_y =
1292*09537850SAkhilesh Sanikop             src_x * warp_params->params[4] +
1293*09537850SAkhilesh Sanikop             static_cast<int64_t>(src_y) * warp_params->params[5] +
1294*09537850SAkhilesh Sanikop             warp_params->params[1];
1295*09537850SAkhilesh Sanikop         const int64_t y4 = dst_y >> subsampling_y_[plane];
1296*09537850SAkhilesh Sanikop         const int iy4 = static_cast<int>(y4 >> kWarpedModelPrecisionBits);
1297*09537850SAkhilesh Sanikop         reference_y_max = std::max(iy4 + 8, reference_y_max);
1298*09537850SAkhilesh Sanikop       }
1299*09537850SAkhilesh Sanikop     }
1300*09537850SAkhilesh Sanikop     // For U and V planes with subsampling, we need to multiply reference_y_max
1301*09537850SAkhilesh Sanikop     // by 2 since we only track the progress of Y planes.
1302*09537850SAkhilesh Sanikop     reference_y_max = LeftShift(reference_y_max, subsampling_y_[plane]);
1303*09537850SAkhilesh Sanikop     if (reference_frame_progress_cache_[reference_frame_index] <
1304*09537850SAkhilesh Sanikop             reference_y_max &&
1305*09537850SAkhilesh Sanikop         !reference_frames_[reference_frame_index]->WaitUntil(
1306*09537850SAkhilesh Sanikop             reference_y_max,
1307*09537850SAkhilesh Sanikop             &reference_frame_progress_cache_[reference_frame_index])) {
1308*09537850SAkhilesh Sanikop       return false;
1309*09537850SAkhilesh Sanikop     }
1310*09537850SAkhilesh Sanikop   }
1311*09537850SAkhilesh Sanikop   if (is_compound) {
1312*09537850SAkhilesh Sanikop     dsp_.warp_compound(source, source_stride, source_width, source_height,
1313*09537850SAkhilesh Sanikop                        warp_params->params, subsampling_x_[plane],
1314*09537850SAkhilesh Sanikop                        subsampling_y_[plane], block_start_x, block_start_y,
1315*09537850SAkhilesh Sanikop                        width, height, warp_params->alpha, warp_params->beta,
1316*09537850SAkhilesh Sanikop                        warp_params->gamma, warp_params->delta, prediction,
1317*09537850SAkhilesh Sanikop                        /*prediction_stride=*/width);
1318*09537850SAkhilesh Sanikop   } else {
1319*09537850SAkhilesh Sanikop     void* const output = is_inter_intra ? static_cast<void*>(prediction) : dest;
1320*09537850SAkhilesh Sanikop     ptrdiff_t output_stride =
1321*09537850SAkhilesh Sanikop         is_inter_intra ? /*prediction_stride=*/width : dest_stride;
1322*09537850SAkhilesh Sanikop #if LIBGAV1_MAX_BITDEPTH >= 10
1323*09537850SAkhilesh Sanikop     // |is_inter_intra| calculations are written to the |prediction| buffer.
1324*09537850SAkhilesh Sanikop     // Unlike the |is_compound| calculations the output is Pixel and not
1325*09537850SAkhilesh Sanikop     // uint16_t. warp_clip() expects |output_stride| to be in bytes and not
1326*09537850SAkhilesh Sanikop     // Pixels. |prediction_stride| is in units of uint16_t. Adjust
1327*09537850SAkhilesh Sanikop     // |output_stride| to account for this.
1328*09537850SAkhilesh Sanikop     if (is_inter_intra && sequence_header_.color_config.bitdepth > 8) {
1329*09537850SAkhilesh Sanikop       output_stride *= 2;
1330*09537850SAkhilesh Sanikop     }
1331*09537850SAkhilesh Sanikop #endif
1332*09537850SAkhilesh Sanikop     dsp_.warp(source, source_stride, source_width, source_height,
1333*09537850SAkhilesh Sanikop               warp_params->params, subsampling_x_[plane], subsampling_y_[plane],
1334*09537850SAkhilesh Sanikop               block_start_x, block_start_y, width, height, warp_params->alpha,
1335*09537850SAkhilesh Sanikop               warp_params->beta, warp_params->gamma, warp_params->delta, output,
1336*09537850SAkhilesh Sanikop               output_stride);
1337*09537850SAkhilesh Sanikop   }
1338*09537850SAkhilesh Sanikop   return true;
1339*09537850SAkhilesh Sanikop }
1340*09537850SAkhilesh Sanikop 
1341*09537850SAkhilesh Sanikop }  // namespace libgav1
1342