xref: /aosp_15_r20/external/libgav1/src/tile/tile.cc (revision 095378508e87ed692bf8dfeb34008b65b3735891)
1*09537850SAkhilesh Sanikop // Copyright 2019 The libgav1 Authors
2*09537850SAkhilesh Sanikop //
3*09537850SAkhilesh Sanikop // Licensed under the Apache License, Version 2.0 (the "License");
4*09537850SAkhilesh Sanikop // you may not use this file except in compliance with the License.
5*09537850SAkhilesh Sanikop // You may obtain a copy of the License at
6*09537850SAkhilesh Sanikop //
7*09537850SAkhilesh Sanikop //      http://www.apache.org/licenses/LICENSE-2.0
8*09537850SAkhilesh Sanikop //
9*09537850SAkhilesh Sanikop // Unless required by applicable law or agreed to in writing, software
10*09537850SAkhilesh Sanikop // distributed under the License is distributed on an "AS IS" BASIS,
11*09537850SAkhilesh Sanikop // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*09537850SAkhilesh Sanikop // See the License for the specific language governing permissions and
13*09537850SAkhilesh Sanikop // limitations under the License.
14*09537850SAkhilesh Sanikop 
15*09537850SAkhilesh Sanikop #include "src/tile.h"
16*09537850SAkhilesh Sanikop 
17*09537850SAkhilesh Sanikop #include <algorithm>
18*09537850SAkhilesh Sanikop #include <array>
19*09537850SAkhilesh Sanikop #include <cassert>
20*09537850SAkhilesh Sanikop #include <climits>
21*09537850SAkhilesh Sanikop #include <cstdlib>
22*09537850SAkhilesh Sanikop #include <cstring>
23*09537850SAkhilesh Sanikop #include <memory>
24*09537850SAkhilesh Sanikop #include <new>
25*09537850SAkhilesh Sanikop #include <numeric>
26*09537850SAkhilesh Sanikop #include <type_traits>
27*09537850SAkhilesh Sanikop #include <utility>
28*09537850SAkhilesh Sanikop 
29*09537850SAkhilesh Sanikop #include "src/frame_scratch_buffer.h"
30*09537850SAkhilesh Sanikop #include "src/motion_vector.h"
31*09537850SAkhilesh Sanikop #include "src/reconstruction.h"
32*09537850SAkhilesh Sanikop #include "src/utils/bit_mask_set.h"
33*09537850SAkhilesh Sanikop #include "src/utils/common.h"
34*09537850SAkhilesh Sanikop #include "src/utils/constants.h"
35*09537850SAkhilesh Sanikop #include "src/utils/logging.h"
36*09537850SAkhilesh Sanikop #include "src/utils/segmentation.h"
37*09537850SAkhilesh Sanikop #include "src/utils/stack.h"
38*09537850SAkhilesh Sanikop 
39*09537850SAkhilesh Sanikop namespace libgav1 {
40*09537850SAkhilesh Sanikop namespace {
41*09537850SAkhilesh Sanikop 
42*09537850SAkhilesh Sanikop // Import all the constants in the anonymous namespace.
43*09537850SAkhilesh Sanikop #include "src/scan_tables.inc"
44*09537850SAkhilesh Sanikop 
45*09537850SAkhilesh Sanikop // Range above kNumQuantizerBaseLevels which the exponential golomb coding
46*09537850SAkhilesh Sanikop // process is activated.
47*09537850SAkhilesh Sanikop constexpr int kQuantizerCoefficientBaseRange = 12;
48*09537850SAkhilesh Sanikop constexpr int kNumQuantizerBaseLevels = 2;
49*09537850SAkhilesh Sanikop constexpr int kCoeffBaseRangeMaxIterations =
50*09537850SAkhilesh Sanikop     kQuantizerCoefficientBaseRange / (kCoeffBaseRangeSymbolCount - 1);
51*09537850SAkhilesh Sanikop constexpr int kEntropyContextLeft = 0;
52*09537850SAkhilesh Sanikop constexpr int kEntropyContextTop = 1;
53*09537850SAkhilesh Sanikop 
54*09537850SAkhilesh Sanikop constexpr uint8_t kAllZeroContextsByTopLeft[5][5] = {{1, 2, 2, 2, 3},
55*09537850SAkhilesh Sanikop                                                      {2, 4, 4, 4, 5},
56*09537850SAkhilesh Sanikop                                                      {2, 4, 4, 4, 5},
57*09537850SAkhilesh Sanikop                                                      {2, 4, 4, 4, 5},
58*09537850SAkhilesh Sanikop                                                      {3, 5, 5, 5, 6}};
59*09537850SAkhilesh Sanikop 
60*09537850SAkhilesh Sanikop // The space complexity of DFS is O(branching_factor * max_depth). For the
61*09537850SAkhilesh Sanikop // parameter tree, branching_factor = 4 (there could be up to 4 children for
62*09537850SAkhilesh Sanikop // every node) and max_depth (excluding the root) = 5 (to go from a 128x128
63*09537850SAkhilesh Sanikop // block all the way to a 4x4 block). The worse-case stack size is 16, by
64*09537850SAkhilesh Sanikop // counting the number of 'o' nodes in the diagram:
65*09537850SAkhilesh Sanikop //
66*09537850SAkhilesh Sanikop //   |                    128x128  The highest level (corresponding to the
67*09537850SAkhilesh Sanikop //   |                             root of the tree) has no node in the stack.
68*09537850SAkhilesh Sanikop //   |-----------------+
69*09537850SAkhilesh Sanikop //   |     |     |     |
70*09537850SAkhilesh Sanikop //   |     o     o     o  64x64
71*09537850SAkhilesh Sanikop //   |
72*09537850SAkhilesh Sanikop //   |-----------------+
73*09537850SAkhilesh Sanikop //   |     |     |     |
74*09537850SAkhilesh Sanikop //   |     o     o     o  32x32    Higher levels have three nodes in the stack,
75*09537850SAkhilesh Sanikop //   |                             because we pop one node off the stack before
76*09537850SAkhilesh Sanikop //   |-----------------+           pushing its four children onto the stack.
77*09537850SAkhilesh Sanikop //   |     |     |     |
78*09537850SAkhilesh Sanikop //   |     o     o     o  16x16
79*09537850SAkhilesh Sanikop //   |
80*09537850SAkhilesh Sanikop //   |-----------------+
81*09537850SAkhilesh Sanikop //   |     |     |     |
82*09537850SAkhilesh Sanikop //   |     o     o     o  8x8
83*09537850SAkhilesh Sanikop //   |
84*09537850SAkhilesh Sanikop //   |-----------------+
85*09537850SAkhilesh Sanikop //   |     |     |     |
86*09537850SAkhilesh Sanikop //   o     o     o     o  4x4      Only the lowest level has four nodes in the
87*09537850SAkhilesh Sanikop //                                 stack.
88*09537850SAkhilesh Sanikop constexpr int kDfsStackSize = 16;
89*09537850SAkhilesh Sanikop 
90*09537850SAkhilesh Sanikop // Mask indicating whether the transform sets contain a particular transform
91*09537850SAkhilesh Sanikop // type. If |tx_type| is present in |tx_set|, then the |tx_type|th LSB is set.
92*09537850SAkhilesh Sanikop constexpr BitMaskSet kTransformTypeInSetMask[kNumTransformSets] = {
93*09537850SAkhilesh Sanikop     BitMaskSet(0x1),    BitMaskSet(0xE0F), BitMaskSet(0x20F),
94*09537850SAkhilesh Sanikop     BitMaskSet(0xFFFF), BitMaskSet(0xFFF), BitMaskSet(0x201)};
95*09537850SAkhilesh Sanikop 
96*09537850SAkhilesh Sanikop constexpr PredictionMode
97*09537850SAkhilesh Sanikop     kFilterIntraModeToIntraPredictor[kNumFilterIntraPredictors] = {
98*09537850SAkhilesh Sanikop         kPredictionModeDc, kPredictionModeVertical, kPredictionModeHorizontal,
99*09537850SAkhilesh Sanikop         kPredictionModeD157, kPredictionModeDc};
100*09537850SAkhilesh Sanikop 
101*09537850SAkhilesh Sanikop // Mask used to determine the index for mode_deltas lookup.
102*09537850SAkhilesh Sanikop constexpr BitMaskSet kPredictionModeDeltasMask(
103*09537850SAkhilesh Sanikop     kPredictionModeNearestMv, kPredictionModeNearMv, kPredictionModeNewMv,
104*09537850SAkhilesh Sanikop     kPredictionModeNearestNearestMv, kPredictionModeNearNearMv,
105*09537850SAkhilesh Sanikop     kPredictionModeNearestNewMv, kPredictionModeNewNearestMv,
106*09537850SAkhilesh Sanikop     kPredictionModeNearNewMv, kPredictionModeNewNearMv,
107*09537850SAkhilesh Sanikop     kPredictionModeNewNewMv);
108*09537850SAkhilesh Sanikop 
109*09537850SAkhilesh Sanikop // This is computed as:
110*09537850SAkhilesh Sanikop // min(transform_width_log2, 5) + min(transform_height_log2, 5) - 4.
111*09537850SAkhilesh Sanikop constexpr uint8_t kEobMultiSizeLookup[kNumTransformSizes] = {
112*09537850SAkhilesh Sanikop     0, 1, 2, 1, 2, 3, 4, 2, 3, 4, 5, 5, 4, 5, 6, 6, 5, 6, 6};
113*09537850SAkhilesh Sanikop 
114*09537850SAkhilesh Sanikop /* clang-format off */
115*09537850SAkhilesh Sanikop constexpr uint8_t kCoeffBaseContextOffset[kNumTransformSizes][5][5] = {
116*09537850SAkhilesh Sanikop     {{0, 1, 6, 6, 0}, {1, 6, 6, 21, 0}, {6, 6, 21, 21, 0}, {6, 21, 21, 21, 0},
117*09537850SAkhilesh Sanikop      {0, 0, 0, 0, 0}},
118*09537850SAkhilesh Sanikop     {{0, 11, 11, 11, 0}, {11, 11, 11, 11, 0}, {6, 6, 21, 21, 0},
119*09537850SAkhilesh Sanikop      {6, 21, 21, 21, 0}, {21, 21, 21, 21, 0}},
120*09537850SAkhilesh Sanikop     {{0, 11, 11, 11, 0}, {11, 11, 11, 11, 0}, {6, 6, 21, 21, 0},
121*09537850SAkhilesh Sanikop      {6, 21, 21, 21, 0}, {21, 21, 21, 21, 0}},
122*09537850SAkhilesh Sanikop     {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
123*09537850SAkhilesh Sanikop      {16, 16, 21, 21, 21}, {0, 0, 0, 0, 0}},
124*09537850SAkhilesh Sanikop     {{0, 1, 6, 6, 21}, {1, 6, 6, 21, 21}, {6, 6, 21, 21, 21},
125*09537850SAkhilesh Sanikop      {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
126*09537850SAkhilesh Sanikop     {{0, 11, 11, 11, 11}, {11, 11, 11, 11, 11}, {6, 6, 21, 21, 21},
127*09537850SAkhilesh Sanikop      {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
128*09537850SAkhilesh Sanikop     {{0, 11, 11, 11, 11}, {11, 11, 11, 11, 11}, {6, 6, 21, 21, 21},
129*09537850SAkhilesh Sanikop      {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
130*09537850SAkhilesh Sanikop     {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
131*09537850SAkhilesh Sanikop      {16, 16, 21, 21, 21}, {0, 0, 0, 0, 0}},
132*09537850SAkhilesh Sanikop     {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
133*09537850SAkhilesh Sanikop      {16, 16, 21, 21, 21}, {16, 16, 21, 21, 21}},
134*09537850SAkhilesh Sanikop     {{0, 1, 6, 6, 21}, {1, 6, 6, 21, 21}, {6, 6, 21, 21, 21},
135*09537850SAkhilesh Sanikop      {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
136*09537850SAkhilesh Sanikop     {{0, 11, 11, 11, 11}, {11, 11, 11, 11, 11}, {6, 6, 21, 21, 21},
137*09537850SAkhilesh Sanikop      {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
138*09537850SAkhilesh Sanikop     {{0, 11, 11, 11, 11}, {11, 11, 11, 11, 11}, {6, 6, 21, 21, 21},
139*09537850SAkhilesh Sanikop      {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
140*09537850SAkhilesh Sanikop     {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
141*09537850SAkhilesh Sanikop      {16, 16, 21, 21, 21}, {16, 16, 21, 21, 21}},
142*09537850SAkhilesh Sanikop     {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
143*09537850SAkhilesh Sanikop      {16, 16, 21, 21, 21}, {16, 16, 21, 21, 21}},
144*09537850SAkhilesh Sanikop     {{0, 1, 6, 6, 21}, {1, 6, 6, 21, 21}, {6, 6, 21, 21, 21},
145*09537850SAkhilesh Sanikop      {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
146*09537850SAkhilesh Sanikop     {{0, 11, 11, 11, 11}, {11, 11, 11, 11, 11}, {6, 6, 21, 21, 21},
147*09537850SAkhilesh Sanikop      {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
148*09537850SAkhilesh Sanikop     {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
149*09537850SAkhilesh Sanikop      {16, 16, 21, 21, 21}, {16, 16, 21, 21, 21}},
150*09537850SAkhilesh Sanikop     {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
151*09537850SAkhilesh Sanikop      {16, 16, 21, 21, 21}, {16, 16, 21, 21, 21}},
152*09537850SAkhilesh Sanikop     {{0, 1, 6, 6, 21}, {1, 6, 6, 21, 21}, {6, 6, 21, 21, 21},
153*09537850SAkhilesh Sanikop      {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}}};
154*09537850SAkhilesh Sanikop /* clang-format on */
155*09537850SAkhilesh Sanikop 
156*09537850SAkhilesh Sanikop // Extended the table size from 3 to 16 by repeating the last element to avoid
157*09537850SAkhilesh Sanikop // the clips to row or column indices.
158*09537850SAkhilesh Sanikop constexpr uint8_t kCoeffBasePositionContextOffset[16] = {
159*09537850SAkhilesh Sanikop     26, 31, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36};
160*09537850SAkhilesh Sanikop 
161*09537850SAkhilesh Sanikop constexpr PredictionMode kInterIntraToIntraMode[kNumInterIntraModes] = {
162*09537850SAkhilesh Sanikop     kPredictionModeDc, kPredictionModeVertical, kPredictionModeHorizontal,
163*09537850SAkhilesh Sanikop     kPredictionModeSmooth};
164*09537850SAkhilesh Sanikop 
165*09537850SAkhilesh Sanikop // Number of horizontal luma samples before intra block copy can be used.
166*09537850SAkhilesh Sanikop constexpr int kIntraBlockCopyDelayPixels = 256;
167*09537850SAkhilesh Sanikop // Number of 64 by 64 blocks before intra block copy can be used.
168*09537850SAkhilesh Sanikop constexpr int kIntraBlockCopyDelay64x64Blocks = kIntraBlockCopyDelayPixels / 64;
169*09537850SAkhilesh Sanikop 
170*09537850SAkhilesh Sanikop // Index [i][j] corresponds to the transform size of width 1 << (i + 2) and
171*09537850SAkhilesh Sanikop // height 1 << (j + 2).
172*09537850SAkhilesh Sanikop constexpr TransformSize k4x4SizeToTransformSize[5][5] = {
173*09537850SAkhilesh Sanikop     {kTransformSize4x4, kTransformSize4x8, kTransformSize4x16,
174*09537850SAkhilesh Sanikop      kNumTransformSizes, kNumTransformSizes},
175*09537850SAkhilesh Sanikop     {kTransformSize8x4, kTransformSize8x8, kTransformSize8x16,
176*09537850SAkhilesh Sanikop      kTransformSize8x32, kNumTransformSizes},
177*09537850SAkhilesh Sanikop     {kTransformSize16x4, kTransformSize16x8, kTransformSize16x16,
178*09537850SAkhilesh Sanikop      kTransformSize16x32, kTransformSize16x64},
179*09537850SAkhilesh Sanikop     {kNumTransformSizes, kTransformSize32x8, kTransformSize32x16,
180*09537850SAkhilesh Sanikop      kTransformSize32x32, kTransformSize32x64},
181*09537850SAkhilesh Sanikop     {kNumTransformSizes, kNumTransformSizes, kTransformSize64x16,
182*09537850SAkhilesh Sanikop      kTransformSize64x32, kTransformSize64x64}};
183*09537850SAkhilesh Sanikop 
184*09537850SAkhilesh Sanikop // Defined in section 9.3 of the spec.
185*09537850SAkhilesh Sanikop constexpr TransformType kModeToTransformType[kIntraPredictionModesUV] = {
186*09537850SAkhilesh Sanikop     kTransformTypeDctDct,   kTransformTypeDctAdst,  kTransformTypeAdstDct,
187*09537850SAkhilesh Sanikop     kTransformTypeDctDct,   kTransformTypeAdstAdst, kTransformTypeDctAdst,
188*09537850SAkhilesh Sanikop     kTransformTypeAdstDct,  kTransformTypeAdstDct,  kTransformTypeDctAdst,
189*09537850SAkhilesh Sanikop     kTransformTypeAdstAdst, kTransformTypeDctAdst,  kTransformTypeAdstDct,
190*09537850SAkhilesh Sanikop     kTransformTypeAdstAdst, kTransformTypeDctDct};
191*09537850SAkhilesh Sanikop 
192*09537850SAkhilesh Sanikop // Defined in section 5.11.47 of the spec. This array does not contain an entry
193*09537850SAkhilesh Sanikop // for kTransformSetDctOnly, so the first dimension needs to be
194*09537850SAkhilesh Sanikop // |kNumTransformSets| - 1.
195*09537850SAkhilesh Sanikop constexpr TransformType kInverseTransformTypeBySet[kNumTransformSets - 1][16] =
196*09537850SAkhilesh Sanikop     {{kTransformTypeIdentityIdentity, kTransformTypeDctDct,
197*09537850SAkhilesh Sanikop       kTransformTypeIdentityDct, kTransformTypeDctIdentity,
198*09537850SAkhilesh Sanikop       kTransformTypeAdstAdst, kTransformTypeDctAdst, kTransformTypeAdstDct},
199*09537850SAkhilesh Sanikop      {kTransformTypeIdentityIdentity, kTransformTypeDctDct,
200*09537850SAkhilesh Sanikop       kTransformTypeAdstAdst, kTransformTypeDctAdst, kTransformTypeAdstDct},
201*09537850SAkhilesh Sanikop      {kTransformTypeIdentityIdentity, kTransformTypeIdentityDct,
202*09537850SAkhilesh Sanikop       kTransformTypeDctIdentity, kTransformTypeIdentityAdst,
203*09537850SAkhilesh Sanikop       kTransformTypeAdstIdentity, kTransformTypeIdentityFlipadst,
204*09537850SAkhilesh Sanikop       kTransformTypeFlipadstIdentity, kTransformTypeDctDct,
205*09537850SAkhilesh Sanikop       kTransformTypeDctAdst, kTransformTypeAdstDct, kTransformTypeDctFlipadst,
206*09537850SAkhilesh Sanikop       kTransformTypeFlipadstDct, kTransformTypeAdstAdst,
207*09537850SAkhilesh Sanikop       kTransformTypeFlipadstFlipadst, kTransformTypeFlipadstAdst,
208*09537850SAkhilesh Sanikop       kTransformTypeAdstFlipadst},
209*09537850SAkhilesh Sanikop      {kTransformTypeIdentityIdentity, kTransformTypeIdentityDct,
210*09537850SAkhilesh Sanikop       kTransformTypeDctIdentity, kTransformTypeDctDct, kTransformTypeDctAdst,
211*09537850SAkhilesh Sanikop       kTransformTypeAdstDct, kTransformTypeDctFlipadst,
212*09537850SAkhilesh Sanikop       kTransformTypeFlipadstDct, kTransformTypeAdstAdst,
213*09537850SAkhilesh Sanikop       kTransformTypeFlipadstFlipadst, kTransformTypeFlipadstAdst,
214*09537850SAkhilesh Sanikop       kTransformTypeAdstFlipadst},
215*09537850SAkhilesh Sanikop      {kTransformTypeIdentityIdentity, kTransformTypeDctDct}};
216*09537850SAkhilesh Sanikop 
217*09537850SAkhilesh Sanikop // Replaces all occurrences of 64x* and *x64 with 32x* and *x32 respectively.
218*09537850SAkhilesh Sanikop constexpr TransformSize kAdjustedTransformSize[kNumTransformSizes] = {
219*09537850SAkhilesh Sanikop     kTransformSize4x4,   kTransformSize4x8,   kTransformSize4x16,
220*09537850SAkhilesh Sanikop     kTransformSize8x4,   kTransformSize8x8,   kTransformSize8x16,
221*09537850SAkhilesh Sanikop     kTransformSize8x32,  kTransformSize16x4,  kTransformSize16x8,
222*09537850SAkhilesh Sanikop     kTransformSize16x16, kTransformSize16x32, kTransformSize16x32,
223*09537850SAkhilesh Sanikop     kTransformSize32x8,  kTransformSize32x16, kTransformSize32x32,
224*09537850SAkhilesh Sanikop     kTransformSize32x32, kTransformSize32x16, kTransformSize32x32,
225*09537850SAkhilesh Sanikop     kTransformSize32x32};
226*09537850SAkhilesh Sanikop 
227*09537850SAkhilesh Sanikop // This is the same as Max_Tx_Size_Rect array in the spec but with *x64 and 64*x
228*09537850SAkhilesh Sanikop // transforms replaced with *x32 and 32x* respectively.
229*09537850SAkhilesh Sanikop constexpr TransformSize kUVTransformSize[kMaxBlockSizes] = {
230*09537850SAkhilesh Sanikop     kTransformSize4x4,   kTransformSize4x8,   kTransformSize4x16,
231*09537850SAkhilesh Sanikop     kTransformSize8x4,   kTransformSize8x8,   kTransformSize8x16,
232*09537850SAkhilesh Sanikop     kTransformSize8x32,  kTransformSize16x4,  kTransformSize16x8,
233*09537850SAkhilesh Sanikop     kTransformSize16x16, kTransformSize16x32, kTransformSize16x32,
234*09537850SAkhilesh Sanikop     kTransformSize32x8,  kTransformSize32x16, kTransformSize32x32,
235*09537850SAkhilesh Sanikop     kTransformSize32x32, kTransformSize32x16, kTransformSize32x32,
236*09537850SAkhilesh Sanikop     kTransformSize32x32, kTransformSize32x32, kTransformSize32x32,
237*09537850SAkhilesh Sanikop     kTransformSize32x32};
238*09537850SAkhilesh Sanikop 
239*09537850SAkhilesh Sanikop // ith entry of this array is computed as:
240*09537850SAkhilesh Sanikop // DivideBy2(TransformSizeToSquareTransformIndex(kTransformSizeSquareMin[i]) +
241*09537850SAkhilesh Sanikop //           TransformSizeToSquareTransformIndex(kTransformSizeSquareMax[i]) +
242*09537850SAkhilesh Sanikop //           1)
243*09537850SAkhilesh Sanikop constexpr uint8_t kTransformSizeContext[kNumTransformSizes] = {
244*09537850SAkhilesh Sanikop     0, 1, 1, 1, 1, 2, 2, 1, 2, 2, 3, 3, 2, 3, 3, 4, 3, 4, 4};
245*09537850SAkhilesh Sanikop 
246*09537850SAkhilesh Sanikop constexpr int8_t kSgrProjDefaultMultiplier[2] = {-32, 31};
247*09537850SAkhilesh Sanikop 
248*09537850SAkhilesh Sanikop constexpr int8_t kWienerDefaultFilter[kNumWienerCoefficients] = {3, -7, 15};
249*09537850SAkhilesh Sanikop 
250*09537850SAkhilesh Sanikop // Maps compound prediction modes into single modes. For e.g.
251*09537850SAkhilesh Sanikop // kPredictionModeNearestNewMv will map to kPredictionModeNearestMv for index 0
252*09537850SAkhilesh Sanikop // and kPredictionModeNewMv for index 1. It is used to simplify the logic in
253*09537850SAkhilesh Sanikop // AssignMv (and avoid duplicate code). This is section 5.11.30. in the spec.
254*09537850SAkhilesh Sanikop constexpr PredictionMode
255*09537850SAkhilesh Sanikop     kCompoundToSinglePredictionMode[kNumCompoundInterPredictionModes][2] = {
256*09537850SAkhilesh Sanikop         {kPredictionModeNearestMv, kPredictionModeNearestMv},
257*09537850SAkhilesh Sanikop         {kPredictionModeNearMv, kPredictionModeNearMv},
258*09537850SAkhilesh Sanikop         {kPredictionModeNearestMv, kPredictionModeNewMv},
259*09537850SAkhilesh Sanikop         {kPredictionModeNewMv, kPredictionModeNearestMv},
260*09537850SAkhilesh Sanikop         {kPredictionModeNearMv, kPredictionModeNewMv},
261*09537850SAkhilesh Sanikop         {kPredictionModeNewMv, kPredictionModeNearMv},
262*09537850SAkhilesh Sanikop         {kPredictionModeGlobalMv, kPredictionModeGlobalMv},
263*09537850SAkhilesh Sanikop         {kPredictionModeNewMv, kPredictionModeNewMv},
264*09537850SAkhilesh Sanikop };
GetSinglePredictionMode(int index,PredictionMode y_mode)265*09537850SAkhilesh Sanikop PredictionMode GetSinglePredictionMode(int index, PredictionMode y_mode) {
266*09537850SAkhilesh Sanikop   if (y_mode < kPredictionModeNearestNearestMv) {
267*09537850SAkhilesh Sanikop     return y_mode;
268*09537850SAkhilesh Sanikop   }
269*09537850SAkhilesh Sanikop   const int lookup_index = y_mode - kPredictionModeNearestNearestMv;
270*09537850SAkhilesh Sanikop   assert(lookup_index >= 0);
271*09537850SAkhilesh Sanikop   return kCompoundToSinglePredictionMode[lookup_index][index];
272*09537850SAkhilesh Sanikop }
273*09537850SAkhilesh Sanikop 
274*09537850SAkhilesh Sanikop // log2(dqDenom) in section 7.12.3 of the spec. We use the log2 value because
275*09537850SAkhilesh Sanikop // dqDenom is always a power of two and hence right shift can be used instead of
276*09537850SAkhilesh Sanikop // division.
277*09537850SAkhilesh Sanikop constexpr uint8_t kQuantizationShift[kNumTransformSizes] = {
278*09537850SAkhilesh Sanikop     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 2, 1, 2, 2};
279*09537850SAkhilesh Sanikop 
280*09537850SAkhilesh Sanikop // Returns the minimum of |length| or |max|-|start|. This is used to clamp array
281*09537850SAkhilesh Sanikop // indices when accessing arrays whose bound is equal to |max|.
GetNumElements(int length,int start,int max)282*09537850SAkhilesh Sanikop int GetNumElements(int length, int start, int max) {
283*09537850SAkhilesh Sanikop   return std::min(length, max - start);
284*09537850SAkhilesh Sanikop }
285*09537850SAkhilesh Sanikop 
286*09537850SAkhilesh Sanikop template <typename T>
SetBlockValues(int rows,int columns,T value,T * dst,ptrdiff_t stride)287*09537850SAkhilesh Sanikop void SetBlockValues(int rows, int columns, T value, T* dst, ptrdiff_t stride) {
288*09537850SAkhilesh Sanikop   // Specialize all columns cases (values in kTransformWidth4x4[]) for better
289*09537850SAkhilesh Sanikop   // performance.
290*09537850SAkhilesh Sanikop   switch (columns) {
291*09537850SAkhilesh Sanikop     case 1:
292*09537850SAkhilesh Sanikop       MemSetBlock<T>(rows, 1, value, dst, stride);
293*09537850SAkhilesh Sanikop       break;
294*09537850SAkhilesh Sanikop     case 2:
295*09537850SAkhilesh Sanikop       MemSetBlock<T>(rows, 2, value, dst, stride);
296*09537850SAkhilesh Sanikop       break;
297*09537850SAkhilesh Sanikop     case 4:
298*09537850SAkhilesh Sanikop       MemSetBlock<T>(rows, 4, value, dst, stride);
299*09537850SAkhilesh Sanikop       break;
300*09537850SAkhilesh Sanikop     case 8:
301*09537850SAkhilesh Sanikop       MemSetBlock<T>(rows, 8, value, dst, stride);
302*09537850SAkhilesh Sanikop       break;
303*09537850SAkhilesh Sanikop     default:
304*09537850SAkhilesh Sanikop       assert(columns == 16);
305*09537850SAkhilesh Sanikop       MemSetBlock<T>(rows, 16, value, dst, stride);
306*09537850SAkhilesh Sanikop       break;
307*09537850SAkhilesh Sanikop   }
308*09537850SAkhilesh Sanikop }
309*09537850SAkhilesh Sanikop 
SetTransformType(const Tile::Block & block,int x4,int y4,int w4,int h4,TransformType tx_type,TransformType transform_types[32][32])310*09537850SAkhilesh Sanikop void SetTransformType(const Tile::Block& block, int x4, int y4, int w4, int h4,
311*09537850SAkhilesh Sanikop                       TransformType tx_type,
312*09537850SAkhilesh Sanikop                       TransformType transform_types[32][32]) {
313*09537850SAkhilesh Sanikop   const int y_offset = y4 - block.row4x4;
314*09537850SAkhilesh Sanikop   const int x_offset = x4 - block.column4x4;
315*09537850SAkhilesh Sanikop   TransformType* const dst = &transform_types[y_offset][x_offset];
316*09537850SAkhilesh Sanikop   SetBlockValues<TransformType>(h4, w4, tx_type, dst, 32);
317*09537850SAkhilesh Sanikop }
318*09537850SAkhilesh Sanikop 
StoreMotionFieldMvs(ReferenceFrameType reference_frame_to_store,const MotionVector & mv_to_store,ptrdiff_t stride,int rows,int columns,ReferenceFrameType * reference_frame_row_start,MotionVector * mv)319*09537850SAkhilesh Sanikop void StoreMotionFieldMvs(ReferenceFrameType reference_frame_to_store,
320*09537850SAkhilesh Sanikop                          const MotionVector& mv_to_store, ptrdiff_t stride,
321*09537850SAkhilesh Sanikop                          int rows, int columns,
322*09537850SAkhilesh Sanikop                          ReferenceFrameType* reference_frame_row_start,
323*09537850SAkhilesh Sanikop                          MotionVector* mv) {
324*09537850SAkhilesh Sanikop   static_assert(sizeof(*reference_frame_row_start) == sizeof(int8_t), "");
325*09537850SAkhilesh Sanikop   do {
326*09537850SAkhilesh Sanikop     // Don't switch the following two memory setting functions.
327*09537850SAkhilesh Sanikop     // Some ARM CPUs are quite sensitive to the order.
328*09537850SAkhilesh Sanikop     memset(reference_frame_row_start, reference_frame_to_store, columns);
329*09537850SAkhilesh Sanikop     std::fill(mv, mv + columns, mv_to_store);
330*09537850SAkhilesh Sanikop     reference_frame_row_start += stride;
331*09537850SAkhilesh Sanikop     mv += stride;
332*09537850SAkhilesh Sanikop   } while (--rows != 0);
333*09537850SAkhilesh Sanikop }
334*09537850SAkhilesh Sanikop 
335*09537850SAkhilesh Sanikop // Inverse transform process assumes that the quantized coefficients are stored
336*09537850SAkhilesh Sanikop // as a virtual 2d array of size |tx_width| x tx_height. If transform width is
337*09537850SAkhilesh Sanikop // 64, then this assumption is broken because the scan order used for populating
338*09537850SAkhilesh Sanikop // the coefficients for such transforms is the same as the one used for
339*09537850SAkhilesh Sanikop // corresponding transform with width 32 (e.g. the scan order used for 64x16 is
340*09537850SAkhilesh Sanikop // the same as the one used for 32x16). So we must restore the coefficients to
341*09537850SAkhilesh Sanikop // their correct positions and clean the positions they occupied.
342*09537850SAkhilesh Sanikop template <typename ResidualType>
MoveCoefficientsForTxWidth64(int clamped_tx_height,int tx_width,ResidualType * residual)343*09537850SAkhilesh Sanikop void MoveCoefficientsForTxWidth64(int clamped_tx_height, int tx_width,
344*09537850SAkhilesh Sanikop                                   ResidualType* residual) {
345*09537850SAkhilesh Sanikop   if (tx_width != 64) return;
346*09537850SAkhilesh Sanikop   const int rows = clamped_tx_height - 2;
347*09537850SAkhilesh Sanikop   auto* src = residual + 32 * rows;
348*09537850SAkhilesh Sanikop   residual += 64 * rows;
349*09537850SAkhilesh Sanikop   // Process 2 rows in each loop in reverse order to avoid overwrite.
350*09537850SAkhilesh Sanikop   int x = rows >> 1;
351*09537850SAkhilesh Sanikop   do {
352*09537850SAkhilesh Sanikop     // The 2 rows can be processed in order.
353*09537850SAkhilesh Sanikop     memcpy(residual, src, 32 * sizeof(src[0]));
354*09537850SAkhilesh Sanikop     memcpy(residual + 64, src + 32, 32 * sizeof(src[0]));
355*09537850SAkhilesh Sanikop     memset(src + 32, 0, 32 * sizeof(src[0]));
356*09537850SAkhilesh Sanikop     src -= 64;
357*09537850SAkhilesh Sanikop     residual -= 128;
358*09537850SAkhilesh Sanikop   } while (--x);
359*09537850SAkhilesh Sanikop   // Process the second row. The first row is already correct.
360*09537850SAkhilesh Sanikop   memcpy(residual + 64, src + 32, 32 * sizeof(src[0]));
361*09537850SAkhilesh Sanikop   memset(src + 32, 0, 32 * sizeof(src[0]));
362*09537850SAkhilesh Sanikop }
363*09537850SAkhilesh Sanikop 
GetClampParameters(const Tile::Block & block,int min[2],int max[2])364*09537850SAkhilesh Sanikop void GetClampParameters(const Tile::Block& block, int min[2], int max[2]) {
365*09537850SAkhilesh Sanikop   // 7.10.2.14 (part 1). (also contains implementations of 5.11.53
366*09537850SAkhilesh Sanikop   // and 5.11.54).
367*09537850SAkhilesh Sanikop   constexpr int kMvBorder4x4 = 4;
368*09537850SAkhilesh Sanikop   const int row_border = kMvBorder4x4 + block.height4x4;
369*09537850SAkhilesh Sanikop   const int column_border = kMvBorder4x4 + block.width4x4;
370*09537850SAkhilesh Sanikop   const int macroblocks_to_top_edge = -block.row4x4;
371*09537850SAkhilesh Sanikop   const int macroblocks_to_bottom_edge =
372*09537850SAkhilesh Sanikop       block.tile.frame_header().rows4x4 - block.height4x4 - block.row4x4;
373*09537850SAkhilesh Sanikop   const int macroblocks_to_left_edge = -block.column4x4;
374*09537850SAkhilesh Sanikop   const int macroblocks_to_right_edge =
375*09537850SAkhilesh Sanikop       block.tile.frame_header().columns4x4 - block.width4x4 - block.column4x4;
376*09537850SAkhilesh Sanikop   min[0] = MultiplyBy32(macroblocks_to_top_edge - row_border);
377*09537850SAkhilesh Sanikop   min[1] = MultiplyBy32(macroblocks_to_left_edge - column_border);
378*09537850SAkhilesh Sanikop   max[0] = MultiplyBy32(macroblocks_to_bottom_edge + row_border);
379*09537850SAkhilesh Sanikop   max[1] = MultiplyBy32(macroblocks_to_right_edge + column_border);
380*09537850SAkhilesh Sanikop }
381*09537850SAkhilesh Sanikop 
382*09537850SAkhilesh Sanikop // Section 8.3.2 in the spec, under coeff_base_eob.
GetCoeffBaseContextEob(TransformSize tx_size,int index)383*09537850SAkhilesh Sanikop int GetCoeffBaseContextEob(TransformSize tx_size, int index) {
384*09537850SAkhilesh Sanikop   if (index == 0) return 0;
385*09537850SAkhilesh Sanikop   const TransformSize adjusted_tx_size = kAdjustedTransformSize[tx_size];
386*09537850SAkhilesh Sanikop   const int tx_width_log2 = kTransformWidthLog2[adjusted_tx_size];
387*09537850SAkhilesh Sanikop   const int tx_height = kTransformHeight[adjusted_tx_size];
388*09537850SAkhilesh Sanikop   if (index <= DivideBy8(tx_height << tx_width_log2)) return 1;
389*09537850SAkhilesh Sanikop   if (index <= DivideBy4(tx_height << tx_width_log2)) return 2;
390*09537850SAkhilesh Sanikop   return 3;
391*09537850SAkhilesh Sanikop }
392*09537850SAkhilesh Sanikop 
393*09537850SAkhilesh Sanikop // Section 8.3.2 in the spec, under coeff_br. Optimized for end of block based
394*09537850SAkhilesh Sanikop // on the fact that {0, 1}, {1, 0}, {1, 1}, {0, 2} and {2, 0} will all be 0 in
395*09537850SAkhilesh Sanikop // the end of block case.
GetCoeffBaseRangeContextEob(int adjusted_tx_width_log2,int pos,TransformClass tx_class)396*09537850SAkhilesh Sanikop int GetCoeffBaseRangeContextEob(int adjusted_tx_width_log2, int pos,
397*09537850SAkhilesh Sanikop                                 TransformClass tx_class) {
398*09537850SAkhilesh Sanikop   if (pos == 0) return 0;
399*09537850SAkhilesh Sanikop   const int tx_width = 1 << adjusted_tx_width_log2;
400*09537850SAkhilesh Sanikop   const int row = pos >> adjusted_tx_width_log2;
401*09537850SAkhilesh Sanikop   const int column = pos & (tx_width - 1);
402*09537850SAkhilesh Sanikop   // This return statement is equivalent to:
403*09537850SAkhilesh Sanikop   // return ((tx_class == kTransformClass2D && (row | column) < 2) ||
404*09537850SAkhilesh Sanikop   //         (tx_class == kTransformClassHorizontal && column == 0) ||
405*09537850SAkhilesh Sanikop   //         (tx_class == kTransformClassVertical && row == 0))
406*09537850SAkhilesh Sanikop   //            ? 7
407*09537850SAkhilesh Sanikop   //            : 14;
408*09537850SAkhilesh Sanikop   return 14 >> ((static_cast<int>(tx_class == kTransformClass2D) &
409*09537850SAkhilesh Sanikop                  static_cast<int>((row | column) < 2)) |
410*09537850SAkhilesh Sanikop                 (tx_class & static_cast<int>(column == 0)) |
411*09537850SAkhilesh Sanikop                 ((tx_class >> 1) & static_cast<int>(row == 0)));
412*09537850SAkhilesh Sanikop }
413*09537850SAkhilesh Sanikop 
414*09537850SAkhilesh Sanikop }  // namespace
415*09537850SAkhilesh Sanikop 
Tile(int tile_number,const uint8_t * const data,size_t size,const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,RefCountedBuffer * const current_frame,const DecoderState & state,FrameScratchBuffer * const frame_scratch_buffer,const WedgeMaskArray & wedge_masks,const QuantizerMatrix & quantizer_matrix,SymbolDecoderContext * const saved_symbol_decoder_context,const SegmentationMap * prev_segment_ids,PostFilter * const post_filter,const dsp::Dsp * const dsp,ThreadPool * const thread_pool,BlockingCounterWithStatus * const pending_tiles,bool frame_parallel,bool use_intra_prediction_buffer)416*09537850SAkhilesh Sanikop Tile::Tile(int tile_number, const uint8_t* const data, size_t size,
417*09537850SAkhilesh Sanikop            const ObuSequenceHeader& sequence_header,
418*09537850SAkhilesh Sanikop            const ObuFrameHeader& frame_header,
419*09537850SAkhilesh Sanikop            RefCountedBuffer* const current_frame, const DecoderState& state,
420*09537850SAkhilesh Sanikop            FrameScratchBuffer* const frame_scratch_buffer,
421*09537850SAkhilesh Sanikop            const WedgeMaskArray& wedge_masks,
422*09537850SAkhilesh Sanikop            const QuantizerMatrix& quantizer_matrix,
423*09537850SAkhilesh Sanikop            SymbolDecoderContext* const saved_symbol_decoder_context,
424*09537850SAkhilesh Sanikop            const SegmentationMap* prev_segment_ids,
425*09537850SAkhilesh Sanikop            PostFilter* const post_filter, const dsp::Dsp* const dsp,
426*09537850SAkhilesh Sanikop            ThreadPool* const thread_pool,
427*09537850SAkhilesh Sanikop            BlockingCounterWithStatus* const pending_tiles, bool frame_parallel,
428*09537850SAkhilesh Sanikop            bool use_intra_prediction_buffer)
429*09537850SAkhilesh Sanikop     : number_(tile_number),
430*09537850SAkhilesh Sanikop       row_(number_ / frame_header.tile_info.tile_columns),
431*09537850SAkhilesh Sanikop       column_(number_ % frame_header.tile_info.tile_columns),
432*09537850SAkhilesh Sanikop       data_(data),
433*09537850SAkhilesh Sanikop       size_(size),
434*09537850SAkhilesh Sanikop       read_deltas_(false),
435*09537850SAkhilesh Sanikop       subsampling_x_{0, sequence_header.color_config.subsampling_x,
436*09537850SAkhilesh Sanikop                      sequence_header.color_config.subsampling_x},
437*09537850SAkhilesh Sanikop       subsampling_y_{0, sequence_header.color_config.subsampling_y,
438*09537850SAkhilesh Sanikop                      sequence_header.color_config.subsampling_y},
439*09537850SAkhilesh Sanikop       current_quantizer_index_(frame_header.quantizer.base_index),
440*09537850SAkhilesh Sanikop       sequence_header_(sequence_header),
441*09537850SAkhilesh Sanikop       frame_header_(frame_header),
442*09537850SAkhilesh Sanikop       reference_frame_sign_bias_(state.reference_frame_sign_bias),
443*09537850SAkhilesh Sanikop       reference_frames_(state.reference_frame),
444*09537850SAkhilesh Sanikop       motion_field_(frame_scratch_buffer->motion_field),
445*09537850SAkhilesh Sanikop       reference_order_hint_(state.reference_order_hint),
446*09537850SAkhilesh Sanikop       wedge_masks_(wedge_masks),
447*09537850SAkhilesh Sanikop       quantizer_matrix_(quantizer_matrix),
448*09537850SAkhilesh Sanikop       reader_(data_, size_, frame_header_.enable_cdf_update),
449*09537850SAkhilesh Sanikop       symbol_decoder_context_(frame_scratch_buffer->symbol_decoder_context),
450*09537850SAkhilesh Sanikop       saved_symbol_decoder_context_(saved_symbol_decoder_context),
451*09537850SAkhilesh Sanikop       prev_segment_ids_(prev_segment_ids),
452*09537850SAkhilesh Sanikop       dsp_(*dsp),
453*09537850SAkhilesh Sanikop       post_filter_(*post_filter),
454*09537850SAkhilesh Sanikop       block_parameters_holder_(frame_scratch_buffer->block_parameters_holder),
455*09537850SAkhilesh Sanikop       quantizer_(sequence_header_.color_config.bitdepth,
456*09537850SAkhilesh Sanikop                  &frame_header_.quantizer),
457*09537850SAkhilesh Sanikop       residual_size_((sequence_header_.color_config.bitdepth == 8)
458*09537850SAkhilesh Sanikop                          ? sizeof(int16_t)
459*09537850SAkhilesh Sanikop                          : sizeof(int32_t)),
460*09537850SAkhilesh Sanikop       intra_block_copy_lag_(
461*09537850SAkhilesh Sanikop           frame_header_.allow_intrabc
462*09537850SAkhilesh Sanikop               ? (sequence_header_.use_128x128_superblock ? 3 : 5)
463*09537850SAkhilesh Sanikop               : 1),
464*09537850SAkhilesh Sanikop       current_frame_(*current_frame),
465*09537850SAkhilesh Sanikop       cdef_index_(frame_scratch_buffer->cdef_index),
466*09537850SAkhilesh Sanikop       cdef_skip_(frame_scratch_buffer->cdef_skip),
467*09537850SAkhilesh Sanikop       inter_transform_sizes_(frame_scratch_buffer->inter_transform_sizes),
468*09537850SAkhilesh Sanikop       thread_pool_(thread_pool),
469*09537850SAkhilesh Sanikop       residual_buffer_pool_(frame_scratch_buffer->residual_buffer_pool.get()),
470*09537850SAkhilesh Sanikop       tile_scratch_buffer_pool_(
471*09537850SAkhilesh Sanikop           &frame_scratch_buffer->tile_scratch_buffer_pool),
472*09537850SAkhilesh Sanikop       pending_tiles_(pending_tiles),
473*09537850SAkhilesh Sanikop       frame_parallel_(frame_parallel),
474*09537850SAkhilesh Sanikop       use_intra_prediction_buffer_(use_intra_prediction_buffer),
475*09537850SAkhilesh Sanikop       intra_prediction_buffer_(
476*09537850SAkhilesh Sanikop           use_intra_prediction_buffer_
477*09537850SAkhilesh Sanikop               ? &frame_scratch_buffer->intra_prediction_buffers.get()[row_]
478*09537850SAkhilesh Sanikop               : nullptr) {
479*09537850SAkhilesh Sanikop   row4x4_start_ = frame_header.tile_info.tile_row_start[row_];
480*09537850SAkhilesh Sanikop   row4x4_end_ = frame_header.tile_info.tile_row_start[row_ + 1];
481*09537850SAkhilesh Sanikop   column4x4_start_ = frame_header.tile_info.tile_column_start[column_];
482*09537850SAkhilesh Sanikop   column4x4_end_ = frame_header.tile_info.tile_column_start[column_ + 1];
483*09537850SAkhilesh Sanikop   const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
484*09537850SAkhilesh Sanikop   const int block_width4x4_log2 = k4x4HeightLog2[SuperBlockSize()];
485*09537850SAkhilesh Sanikop   superblock_rows_ =
486*09537850SAkhilesh Sanikop       (row4x4_end_ - row4x4_start_ + block_width4x4 - 1) >> block_width4x4_log2;
487*09537850SAkhilesh Sanikop   superblock_columns_ =
488*09537850SAkhilesh Sanikop       (column4x4_end_ - column4x4_start_ + block_width4x4 - 1) >>
489*09537850SAkhilesh Sanikop       block_width4x4_log2;
490*09537850SAkhilesh Sanikop   // If |split_parse_and_decode_| is true, we do the necessary setup for
491*09537850SAkhilesh Sanikop   // splitting the parsing and the decoding steps. This is done in the following
492*09537850SAkhilesh Sanikop   // two cases:
493*09537850SAkhilesh Sanikop   //  1) If there is multi-threading within a tile (this is done if
494*09537850SAkhilesh Sanikop   //     |thread_pool_| is not nullptr and if there are at least as many
495*09537850SAkhilesh Sanikop   //     superblock columns as |intra_block_copy_lag_|).
496*09537850SAkhilesh Sanikop   //  2) If |frame_parallel| is true.
497*09537850SAkhilesh Sanikop   split_parse_and_decode_ = (thread_pool_ != nullptr &&
498*09537850SAkhilesh Sanikop                              superblock_columns_ > intra_block_copy_lag_) ||
499*09537850SAkhilesh Sanikop                             frame_parallel;
500*09537850SAkhilesh Sanikop   if (frame_parallel_) {
501*09537850SAkhilesh Sanikop     reference_frame_progress_cache_.fill(INT_MIN);
502*09537850SAkhilesh Sanikop   }
503*09537850SAkhilesh Sanikop   memset(delta_lf_, 0, sizeof(delta_lf_));
504*09537850SAkhilesh Sanikop   delta_lf_all_zero_ = true;
505*09537850SAkhilesh Sanikop   const YuvBuffer& buffer = post_filter_.frame_buffer();
506*09537850SAkhilesh Sanikop   for (int plane = kPlaneY; plane < PlaneCount(); ++plane) {
507*09537850SAkhilesh Sanikop     // Verify that the borders are big enough for Reconstruct(). max_tx_length
508*09537850SAkhilesh Sanikop     // is the maximum value of tx_width and tx_height for the plane.
509*09537850SAkhilesh Sanikop     const int max_tx_length = (plane == kPlaneY) ? 64 : 32;
510*09537850SAkhilesh Sanikop     // Reconstruct() may overwrite on the right. Since the right border of a
511*09537850SAkhilesh Sanikop     // row is followed in memory by the left border of the next row, the
512*09537850SAkhilesh Sanikop     // number of extra pixels to the right of a row is at least the sum of the
513*09537850SAkhilesh Sanikop     // left and right borders.
514*09537850SAkhilesh Sanikop     //
515*09537850SAkhilesh Sanikop     // Note: This assertion actually checks the sum of the left and right
516*09537850SAkhilesh Sanikop     // borders of post_filter_.GetUnfilteredBuffer(), which is a horizontally
517*09537850SAkhilesh Sanikop     // and vertically shifted version of |buffer|. Since the sum of the left and
518*09537850SAkhilesh Sanikop     // right borders is not changed by the shift, we can just check the sum of
519*09537850SAkhilesh Sanikop     // the left and right borders of |buffer|.
520*09537850SAkhilesh Sanikop     assert(buffer.left_border(plane) + buffer.right_border(plane) >=
521*09537850SAkhilesh Sanikop            max_tx_length - 1);
522*09537850SAkhilesh Sanikop     // Reconstruct() may overwrite on the bottom. We need an extra border row
523*09537850SAkhilesh Sanikop     // on the bottom because we need the left border of that row.
524*09537850SAkhilesh Sanikop     //
525*09537850SAkhilesh Sanikop     // Note: This assertion checks the bottom border of
526*09537850SAkhilesh Sanikop     // post_filter_.GetUnfilteredBuffer(). So we need to calculate the vertical
527*09537850SAkhilesh Sanikop     // shift that the PostFilter constructor applied to |buffer| and reduce the
528*09537850SAkhilesh Sanikop     // bottom border by that amount.
529*09537850SAkhilesh Sanikop #ifndef NDEBUG
530*09537850SAkhilesh Sanikop     const int vertical_shift = static_cast<int>(
531*09537850SAkhilesh Sanikop         (post_filter_.GetUnfilteredBuffer(plane) - buffer.data(plane)) /
532*09537850SAkhilesh Sanikop         buffer.stride(plane));
533*09537850SAkhilesh Sanikop     const int bottom_border = buffer.bottom_border(plane) - vertical_shift;
534*09537850SAkhilesh Sanikop     assert(bottom_border >= max_tx_length);
535*09537850SAkhilesh Sanikop #endif
536*09537850SAkhilesh Sanikop     // In AV1, a transform block of height H starts at a y coordinate that is
537*09537850SAkhilesh Sanikop     // a multiple of H. If a transform block at the bottom of the frame has
538*09537850SAkhilesh Sanikop     // height H, then Reconstruct() will write up to the row with index
539*09537850SAkhilesh Sanikop     // Align(buffer.height(plane), H) - 1. Therefore the maximum number of
540*09537850SAkhilesh Sanikop     // rows Reconstruct() may write to is
541*09537850SAkhilesh Sanikop     // Align(buffer.height(plane), max_tx_length).
542*09537850SAkhilesh Sanikop     buffer_[plane].Reset(Align(buffer.height(plane), max_tx_length),
543*09537850SAkhilesh Sanikop                          buffer.stride(plane),
544*09537850SAkhilesh Sanikop                          post_filter_.GetUnfilteredBuffer(plane));
545*09537850SAkhilesh Sanikop   }
546*09537850SAkhilesh Sanikop }
547*09537850SAkhilesh Sanikop 
Init()548*09537850SAkhilesh Sanikop bool Tile::Init() {
549*09537850SAkhilesh Sanikop   assert(coefficient_levels_.size() == dc_categories_.size());
550*09537850SAkhilesh Sanikop   for (size_t i = 0; i < coefficient_levels_.size(); ++i) {
551*09537850SAkhilesh Sanikop     const int contexts_per_plane = (i == kEntropyContextLeft)
552*09537850SAkhilesh Sanikop                                        ? frame_header_.rows4x4
553*09537850SAkhilesh Sanikop                                        : frame_header_.columns4x4;
554*09537850SAkhilesh Sanikop     if (!coefficient_levels_[i].Reset(PlaneCount(), contexts_per_plane)) {
555*09537850SAkhilesh Sanikop       LIBGAV1_DLOG(ERROR, "coefficient_levels_[%zu].Reset() failed.", i);
556*09537850SAkhilesh Sanikop       return false;
557*09537850SAkhilesh Sanikop     }
558*09537850SAkhilesh Sanikop     if (!dc_categories_[i].Reset(PlaneCount(), contexts_per_plane)) {
559*09537850SAkhilesh Sanikop       LIBGAV1_DLOG(ERROR, "dc_categories_[%zu].Reset() failed.", i);
560*09537850SAkhilesh Sanikop       return false;
561*09537850SAkhilesh Sanikop     }
562*09537850SAkhilesh Sanikop   }
563*09537850SAkhilesh Sanikop   if (split_parse_and_decode_) {
564*09537850SAkhilesh Sanikop     assert(residual_buffer_pool_ != nullptr);
565*09537850SAkhilesh Sanikop     if (!residual_buffer_threaded_.Reset(superblock_rows_, superblock_columns_,
566*09537850SAkhilesh Sanikop                                          /*zero_initialize=*/false)) {
567*09537850SAkhilesh Sanikop       LIBGAV1_DLOG(ERROR, "residual_buffer_threaded_.Reset() failed.");
568*09537850SAkhilesh Sanikop       return false;
569*09537850SAkhilesh Sanikop     }
570*09537850SAkhilesh Sanikop   } else {
571*09537850SAkhilesh Sanikop     // Add 32 * |kResidualPaddingVertical| padding to avoid bottom boundary
572*09537850SAkhilesh Sanikop     // checks when parsing quantized coefficients.
573*09537850SAkhilesh Sanikop     residual_buffer_ = MakeAlignedUniquePtr<uint8_t>(
574*09537850SAkhilesh Sanikop         32, (4096 + 32 * kResidualPaddingVertical) * residual_size_);
575*09537850SAkhilesh Sanikop     if (residual_buffer_ == nullptr) {
576*09537850SAkhilesh Sanikop       LIBGAV1_DLOG(ERROR, "Allocation of residual_buffer_ failed.");
577*09537850SAkhilesh Sanikop       return false;
578*09537850SAkhilesh Sanikop     }
579*09537850SAkhilesh Sanikop     prediction_parameters_.reset(new (std::nothrow) PredictionParameters());
580*09537850SAkhilesh Sanikop     if (prediction_parameters_ == nullptr) {
581*09537850SAkhilesh Sanikop       LIBGAV1_DLOG(ERROR, "Allocation of prediction_parameters_ failed.");
582*09537850SAkhilesh Sanikop       return false;
583*09537850SAkhilesh Sanikop     }
584*09537850SAkhilesh Sanikop   }
585*09537850SAkhilesh Sanikop   if (frame_header_.use_ref_frame_mvs) {
586*09537850SAkhilesh Sanikop     assert(sequence_header_.enable_order_hint);
587*09537850SAkhilesh Sanikop     SetupMotionField(frame_header_, current_frame_, reference_frames_,
588*09537850SAkhilesh Sanikop                      row4x4_start_, row4x4_end_, column4x4_start_,
589*09537850SAkhilesh Sanikop                      column4x4_end_, &motion_field_);
590*09537850SAkhilesh Sanikop   }
591*09537850SAkhilesh Sanikop   ResetLoopRestorationParams();
592*09537850SAkhilesh Sanikop   if (!top_context_.Resize(superblock_columns_)) {
593*09537850SAkhilesh Sanikop     LIBGAV1_DLOG(ERROR, "Allocation of top_context_ failed.");
594*09537850SAkhilesh Sanikop     return false;
595*09537850SAkhilesh Sanikop   }
596*09537850SAkhilesh Sanikop   return true;
597*09537850SAkhilesh Sanikop }
598*09537850SAkhilesh Sanikop 
599*09537850SAkhilesh Sanikop template <ProcessingMode processing_mode, bool save_symbol_decoder_context>
ProcessSuperBlockRow(int row4x4,TileScratchBuffer * const scratch_buffer)600*09537850SAkhilesh Sanikop bool Tile::ProcessSuperBlockRow(int row4x4,
601*09537850SAkhilesh Sanikop                                 TileScratchBuffer* const scratch_buffer) {
602*09537850SAkhilesh Sanikop   if (row4x4 < row4x4_start_ || row4x4 >= row4x4_end_) return true;
603*09537850SAkhilesh Sanikop   assert(scratch_buffer != nullptr);
604*09537850SAkhilesh Sanikop   const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
605*09537850SAkhilesh Sanikop   for (int column4x4 = column4x4_start_; column4x4 < column4x4_end_;
606*09537850SAkhilesh Sanikop        column4x4 += block_width4x4) {
607*09537850SAkhilesh Sanikop     if (!ProcessSuperBlock(row4x4, column4x4, scratch_buffer,
608*09537850SAkhilesh Sanikop                            processing_mode)) {
609*09537850SAkhilesh Sanikop       LIBGAV1_DLOG(ERROR, "Error decoding super block row: %d column: %d",
610*09537850SAkhilesh Sanikop                    row4x4, column4x4);
611*09537850SAkhilesh Sanikop       return false;
612*09537850SAkhilesh Sanikop     }
613*09537850SAkhilesh Sanikop   }
614*09537850SAkhilesh Sanikop   if (save_symbol_decoder_context && row4x4 + block_width4x4 >= row4x4_end_) {
615*09537850SAkhilesh Sanikop     SaveSymbolDecoderContext();
616*09537850SAkhilesh Sanikop   }
617*09537850SAkhilesh Sanikop   if (processing_mode == kProcessingModeDecodeOnly ||
618*09537850SAkhilesh Sanikop       processing_mode == kProcessingModeParseAndDecode) {
619*09537850SAkhilesh Sanikop     PopulateIntraPredictionBuffer(row4x4);
620*09537850SAkhilesh Sanikop   }
621*09537850SAkhilesh Sanikop   return true;
622*09537850SAkhilesh Sanikop }
623*09537850SAkhilesh Sanikop 
624*09537850SAkhilesh Sanikop // Used in frame parallel mode. The symbol decoder context need not be saved in
625*09537850SAkhilesh Sanikop // this case since it was done when parsing was complete.
626*09537850SAkhilesh Sanikop template bool Tile::ProcessSuperBlockRow<kProcessingModeDecodeOnly, false>(
627*09537850SAkhilesh Sanikop     int row4x4, TileScratchBuffer* scratch_buffer);
628*09537850SAkhilesh Sanikop // Used in non frame parallel mode.
629*09537850SAkhilesh Sanikop template bool Tile::ProcessSuperBlockRow<kProcessingModeParseAndDecode, true>(
630*09537850SAkhilesh Sanikop     int row4x4, TileScratchBuffer* scratch_buffer);
631*09537850SAkhilesh Sanikop 
SaveSymbolDecoderContext()632*09537850SAkhilesh Sanikop void Tile::SaveSymbolDecoderContext() {
633*09537850SAkhilesh Sanikop   if (frame_header_.enable_frame_end_update_cdf &&
634*09537850SAkhilesh Sanikop       number_ == frame_header_.tile_info.context_update_id) {
635*09537850SAkhilesh Sanikop     *saved_symbol_decoder_context_ = symbol_decoder_context_;
636*09537850SAkhilesh Sanikop   }
637*09537850SAkhilesh Sanikop }
638*09537850SAkhilesh Sanikop 
ParseAndDecode()639*09537850SAkhilesh Sanikop bool Tile::ParseAndDecode() {
640*09537850SAkhilesh Sanikop   if (split_parse_and_decode_) {
641*09537850SAkhilesh Sanikop     if (!ThreadedParseAndDecode()) return false;
642*09537850SAkhilesh Sanikop     SaveSymbolDecoderContext();
643*09537850SAkhilesh Sanikop     return true;
644*09537850SAkhilesh Sanikop   }
645*09537850SAkhilesh Sanikop   std::unique_ptr<TileScratchBuffer> scratch_buffer =
646*09537850SAkhilesh Sanikop       tile_scratch_buffer_pool_->Get();
647*09537850SAkhilesh Sanikop   if (scratch_buffer == nullptr) {
648*09537850SAkhilesh Sanikop     pending_tiles_->Decrement(false);
649*09537850SAkhilesh Sanikop     LIBGAV1_DLOG(ERROR, "Failed to get scratch buffer.");
650*09537850SAkhilesh Sanikop     return false;
651*09537850SAkhilesh Sanikop   }
652*09537850SAkhilesh Sanikop   const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
653*09537850SAkhilesh Sanikop   for (int row4x4 = row4x4_start_; row4x4 < row4x4_end_;
654*09537850SAkhilesh Sanikop        row4x4 += block_width4x4) {
655*09537850SAkhilesh Sanikop     if (!ProcessSuperBlockRow<kProcessingModeParseAndDecode, true>(
656*09537850SAkhilesh Sanikop             row4x4, scratch_buffer.get())) {
657*09537850SAkhilesh Sanikop       pending_tiles_->Decrement(false);
658*09537850SAkhilesh Sanikop       return false;
659*09537850SAkhilesh Sanikop     }
660*09537850SAkhilesh Sanikop   }
661*09537850SAkhilesh Sanikop   tile_scratch_buffer_pool_->Release(std::move(scratch_buffer));
662*09537850SAkhilesh Sanikop   pending_tiles_->Decrement(true);
663*09537850SAkhilesh Sanikop   return true;
664*09537850SAkhilesh Sanikop }
665*09537850SAkhilesh Sanikop 
Parse()666*09537850SAkhilesh Sanikop bool Tile::Parse() {
667*09537850SAkhilesh Sanikop   const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
668*09537850SAkhilesh Sanikop   std::unique_ptr<TileScratchBuffer> scratch_buffer =
669*09537850SAkhilesh Sanikop       tile_scratch_buffer_pool_->Get();
670*09537850SAkhilesh Sanikop   if (scratch_buffer == nullptr) {
671*09537850SAkhilesh Sanikop     LIBGAV1_DLOG(ERROR, "Failed to get scratch buffer.");
672*09537850SAkhilesh Sanikop     return false;
673*09537850SAkhilesh Sanikop   }
674*09537850SAkhilesh Sanikop   for (int row4x4 = row4x4_start_; row4x4 < row4x4_end_;
675*09537850SAkhilesh Sanikop        row4x4 += block_width4x4) {
676*09537850SAkhilesh Sanikop     if (!ProcessSuperBlockRow<kProcessingModeParseOnly, false>(
677*09537850SAkhilesh Sanikop             row4x4, scratch_buffer.get())) {
678*09537850SAkhilesh Sanikop       return false;
679*09537850SAkhilesh Sanikop     }
680*09537850SAkhilesh Sanikop   }
681*09537850SAkhilesh Sanikop   tile_scratch_buffer_pool_->Release(std::move(scratch_buffer));
682*09537850SAkhilesh Sanikop   SaveSymbolDecoderContext();
683*09537850SAkhilesh Sanikop   return true;
684*09537850SAkhilesh Sanikop }
685*09537850SAkhilesh Sanikop 
Decode(std::mutex * const mutex,int * const superblock_row_progress,std::condition_variable * const superblock_row_progress_condvar)686*09537850SAkhilesh Sanikop bool Tile::Decode(
687*09537850SAkhilesh Sanikop     std::mutex* const mutex, int* const superblock_row_progress,
688*09537850SAkhilesh Sanikop     std::condition_variable* const superblock_row_progress_condvar) {
689*09537850SAkhilesh Sanikop   const int block_width4x4 = sequence_header_.use_128x128_superblock ? 32 : 16;
690*09537850SAkhilesh Sanikop   const int block_width4x4_log2 =
691*09537850SAkhilesh Sanikop       sequence_header_.use_128x128_superblock ? 5 : 4;
692*09537850SAkhilesh Sanikop   std::unique_ptr<TileScratchBuffer> scratch_buffer =
693*09537850SAkhilesh Sanikop       tile_scratch_buffer_pool_->Get();
694*09537850SAkhilesh Sanikop   if (scratch_buffer == nullptr) {
695*09537850SAkhilesh Sanikop     LIBGAV1_DLOG(ERROR, "Failed to get scratch buffer.");
696*09537850SAkhilesh Sanikop     return false;
697*09537850SAkhilesh Sanikop   }
698*09537850SAkhilesh Sanikop   for (int row4x4 = row4x4_start_, index = row4x4_start_ >> block_width4x4_log2;
699*09537850SAkhilesh Sanikop        row4x4 < row4x4_end_; row4x4 += block_width4x4, ++index) {
700*09537850SAkhilesh Sanikop     if (!ProcessSuperBlockRow<kProcessingModeDecodeOnly, false>(
701*09537850SAkhilesh Sanikop             row4x4, scratch_buffer.get())) {
702*09537850SAkhilesh Sanikop       return false;
703*09537850SAkhilesh Sanikop     }
704*09537850SAkhilesh Sanikop     if (post_filter_.DoDeblock()) {
705*09537850SAkhilesh Sanikop       // Apply vertical deblock filtering for all the columns in this tile
706*09537850SAkhilesh Sanikop       // except for the first 64 columns.
707*09537850SAkhilesh Sanikop       post_filter_.ApplyDeblockFilter(
708*09537850SAkhilesh Sanikop           kLoopFilterTypeVertical, row4x4,
709*09537850SAkhilesh Sanikop           column4x4_start_ + kNum4x4InLoopFilterUnit, column4x4_end_,
710*09537850SAkhilesh Sanikop           block_width4x4);
711*09537850SAkhilesh Sanikop       // If this is the first superblock row of the tile, then we cannot apply
712*09537850SAkhilesh Sanikop       // horizontal deblocking here since we don't know if the top row is
713*09537850SAkhilesh Sanikop       // available. So it will be done by the calling thread in that case.
714*09537850SAkhilesh Sanikop       if (row4x4 != row4x4_start_) {
715*09537850SAkhilesh Sanikop         // Apply horizontal deblock filtering for all the columns in this tile
716*09537850SAkhilesh Sanikop         // except for the first and the last 64 columns.
717*09537850SAkhilesh Sanikop         // Note about the last tile of each row: For the last tile,
718*09537850SAkhilesh Sanikop         // column4x4_end may not be a multiple of 16. In that case it is still
719*09537850SAkhilesh Sanikop         // okay to simply subtract 16 since ApplyDeblockFilter() will only do
720*09537850SAkhilesh Sanikop         // the filters in increments of 64 columns (or 32 columns for chroma
721*09537850SAkhilesh Sanikop         // with subsampling).
722*09537850SAkhilesh Sanikop         post_filter_.ApplyDeblockFilter(
723*09537850SAkhilesh Sanikop             kLoopFilterTypeHorizontal, row4x4,
724*09537850SAkhilesh Sanikop             column4x4_start_ + kNum4x4InLoopFilterUnit,
725*09537850SAkhilesh Sanikop             column4x4_end_ - kNum4x4InLoopFilterUnit, block_width4x4);
726*09537850SAkhilesh Sanikop       }
727*09537850SAkhilesh Sanikop     }
728*09537850SAkhilesh Sanikop     bool notify;
729*09537850SAkhilesh Sanikop     {
730*09537850SAkhilesh Sanikop       std::unique_lock<std::mutex> lock(*mutex);
731*09537850SAkhilesh Sanikop       notify = ++superblock_row_progress[index] ==
732*09537850SAkhilesh Sanikop                frame_header_.tile_info.tile_columns;
733*09537850SAkhilesh Sanikop     }
734*09537850SAkhilesh Sanikop     if (notify) {
735*09537850SAkhilesh Sanikop       // We are done decoding this superblock row. Notify the post filtering
736*09537850SAkhilesh Sanikop       // thread.
737*09537850SAkhilesh Sanikop       superblock_row_progress_condvar[index].notify_one();
738*09537850SAkhilesh Sanikop     }
739*09537850SAkhilesh Sanikop   }
740*09537850SAkhilesh Sanikop   tile_scratch_buffer_pool_->Release(std::move(scratch_buffer));
741*09537850SAkhilesh Sanikop   return true;
742*09537850SAkhilesh Sanikop }
743*09537850SAkhilesh Sanikop 
ThreadedParseAndDecode()744*09537850SAkhilesh Sanikop bool Tile::ThreadedParseAndDecode() {
745*09537850SAkhilesh Sanikop   {
746*09537850SAkhilesh Sanikop     std::lock_guard<std::mutex> lock(threading_.mutex);
747*09537850SAkhilesh Sanikop     if (!threading_.sb_state.Reset(superblock_rows_, superblock_columns_)) {
748*09537850SAkhilesh Sanikop       pending_tiles_->Decrement(false);
749*09537850SAkhilesh Sanikop       LIBGAV1_DLOG(ERROR, "threading.sb_state.Reset() failed.");
750*09537850SAkhilesh Sanikop       return false;
751*09537850SAkhilesh Sanikop     }
752*09537850SAkhilesh Sanikop     // Account for the parsing job.
753*09537850SAkhilesh Sanikop     ++threading_.pending_jobs;
754*09537850SAkhilesh Sanikop   }
755*09537850SAkhilesh Sanikop 
756*09537850SAkhilesh Sanikop   const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
757*09537850SAkhilesh Sanikop 
758*09537850SAkhilesh Sanikop   // Begin parsing.
759*09537850SAkhilesh Sanikop   std::unique_ptr<TileScratchBuffer> scratch_buffer =
760*09537850SAkhilesh Sanikop       tile_scratch_buffer_pool_->Get();
761*09537850SAkhilesh Sanikop   if (scratch_buffer == nullptr) {
762*09537850SAkhilesh Sanikop     pending_tiles_->Decrement(false);
763*09537850SAkhilesh Sanikop     LIBGAV1_DLOG(ERROR, "Failed to get scratch buffer.");
764*09537850SAkhilesh Sanikop     return false;
765*09537850SAkhilesh Sanikop   }
766*09537850SAkhilesh Sanikop   for (int row4x4 = row4x4_start_, row_index = 0; row4x4 < row4x4_end_;
767*09537850SAkhilesh Sanikop        row4x4 += block_width4x4, ++row_index) {
768*09537850SAkhilesh Sanikop     for (int column4x4 = column4x4_start_, column_index = 0;
769*09537850SAkhilesh Sanikop          column4x4 < column4x4_end_;
770*09537850SAkhilesh Sanikop          column4x4 += block_width4x4, ++column_index) {
771*09537850SAkhilesh Sanikop       if (!ProcessSuperBlock(row4x4, column4x4, scratch_buffer.get(),
772*09537850SAkhilesh Sanikop                              kProcessingModeParseOnly)) {
773*09537850SAkhilesh Sanikop         std::lock_guard<std::mutex> lock(threading_.mutex);
774*09537850SAkhilesh Sanikop         threading_.abort = true;
775*09537850SAkhilesh Sanikop         break;
776*09537850SAkhilesh Sanikop       }
777*09537850SAkhilesh Sanikop       std::unique_lock<std::mutex> lock(threading_.mutex);
778*09537850SAkhilesh Sanikop       if (threading_.abort) break;
779*09537850SAkhilesh Sanikop       threading_.sb_state[row_index][column_index] = kSuperBlockStateParsed;
780*09537850SAkhilesh Sanikop       // Schedule the decoding of this superblock if it is allowed.
781*09537850SAkhilesh Sanikop       if (CanDecode(row_index, column_index)) {
782*09537850SAkhilesh Sanikop         ++threading_.pending_jobs;
783*09537850SAkhilesh Sanikop         threading_.sb_state[row_index][column_index] =
784*09537850SAkhilesh Sanikop             kSuperBlockStateScheduled;
785*09537850SAkhilesh Sanikop         lock.unlock();
786*09537850SAkhilesh Sanikop         thread_pool_->Schedule(
787*09537850SAkhilesh Sanikop             [this, row_index, column_index, block_width4x4]() {
788*09537850SAkhilesh Sanikop               DecodeSuperBlock(row_index, column_index, block_width4x4);
789*09537850SAkhilesh Sanikop             });
790*09537850SAkhilesh Sanikop       }
791*09537850SAkhilesh Sanikop     }
792*09537850SAkhilesh Sanikop     std::lock_guard<std::mutex> lock(threading_.mutex);
793*09537850SAkhilesh Sanikop     if (threading_.abort) break;
794*09537850SAkhilesh Sanikop   }
795*09537850SAkhilesh Sanikop   tile_scratch_buffer_pool_->Release(std::move(scratch_buffer));
796*09537850SAkhilesh Sanikop 
797*09537850SAkhilesh Sanikop   // We are done parsing. We can return here since the calling thread will make
798*09537850SAkhilesh Sanikop   // sure that it waits for all the superblocks to be decoded.
799*09537850SAkhilesh Sanikop   //
800*09537850SAkhilesh Sanikop   // Finish using |threading_| before |pending_tiles_->Decrement()| because the
801*09537850SAkhilesh Sanikop   // Tile object could go out of scope as soon as |pending_tiles_->Decrement()|
802*09537850SAkhilesh Sanikop   // is called.
803*09537850SAkhilesh Sanikop   threading_.mutex.lock();
804*09537850SAkhilesh Sanikop   const bool no_pending_jobs = (--threading_.pending_jobs == 0);
805*09537850SAkhilesh Sanikop   const bool job_succeeded = !threading_.abort;
806*09537850SAkhilesh Sanikop   threading_.mutex.unlock();
807*09537850SAkhilesh Sanikop   if (no_pending_jobs) {
808*09537850SAkhilesh Sanikop     // We are done parsing and decoding this tile.
809*09537850SAkhilesh Sanikop     pending_tiles_->Decrement(job_succeeded);
810*09537850SAkhilesh Sanikop   }
811*09537850SAkhilesh Sanikop   return job_succeeded;
812*09537850SAkhilesh Sanikop }
813*09537850SAkhilesh Sanikop 
CanDecode(int row_index,int column_index) const814*09537850SAkhilesh Sanikop bool Tile::CanDecode(int row_index, int column_index) const {
815*09537850SAkhilesh Sanikop   assert(row_index >= 0);
816*09537850SAkhilesh Sanikop   assert(column_index >= 0);
817*09537850SAkhilesh Sanikop   // If |threading_.sb_state[row_index][column_index]| is not equal to
818*09537850SAkhilesh Sanikop   // kSuperBlockStateParsed, then return false. This is ok because if
819*09537850SAkhilesh Sanikop   // |threading_.sb_state[row_index][column_index]| is equal to:
820*09537850SAkhilesh Sanikop   //   kSuperBlockStateNone - then the superblock is not yet parsed.
821*09537850SAkhilesh Sanikop   //   kSuperBlockStateScheduled - then the superblock is already scheduled for
822*09537850SAkhilesh Sanikop   //                               decode.
823*09537850SAkhilesh Sanikop   //   kSuperBlockStateDecoded - then the superblock has already been decoded.
824*09537850SAkhilesh Sanikop   if (row_index >= superblock_rows_ || column_index >= superblock_columns_ ||
825*09537850SAkhilesh Sanikop       threading_.sb_state[row_index][column_index] != kSuperBlockStateParsed) {
826*09537850SAkhilesh Sanikop     return false;
827*09537850SAkhilesh Sanikop   }
828*09537850SAkhilesh Sanikop   // First superblock has no dependencies.
829*09537850SAkhilesh Sanikop   if (row_index == 0 && column_index == 0) {
830*09537850SAkhilesh Sanikop     return true;
831*09537850SAkhilesh Sanikop   }
832*09537850SAkhilesh Sanikop   // Superblocks in the first row only depend on the superblock to the left of
833*09537850SAkhilesh Sanikop   // it.
834*09537850SAkhilesh Sanikop   if (row_index == 0) {
835*09537850SAkhilesh Sanikop     return threading_.sb_state[0][column_index - 1] == kSuperBlockStateDecoded;
836*09537850SAkhilesh Sanikop   }
837*09537850SAkhilesh Sanikop   // All other superblocks depend on superblock to the left of it (if one
838*09537850SAkhilesh Sanikop   // exists) and superblock to the top right with a lag of
839*09537850SAkhilesh Sanikop   // |intra_block_copy_lag_| (if one exists).
840*09537850SAkhilesh Sanikop   const int top_right_column_index =
841*09537850SAkhilesh Sanikop       std::min(column_index + intra_block_copy_lag_, superblock_columns_ - 1);
842*09537850SAkhilesh Sanikop   return threading_.sb_state[row_index - 1][top_right_column_index] ==
843*09537850SAkhilesh Sanikop              kSuperBlockStateDecoded &&
844*09537850SAkhilesh Sanikop          (column_index == 0 ||
845*09537850SAkhilesh Sanikop           threading_.sb_state[row_index][column_index - 1] ==
846*09537850SAkhilesh Sanikop               kSuperBlockStateDecoded);
847*09537850SAkhilesh Sanikop }
848*09537850SAkhilesh Sanikop 
DecodeSuperBlock(int row_index,int column_index,int block_width4x4)849*09537850SAkhilesh Sanikop void Tile::DecodeSuperBlock(int row_index, int column_index,
850*09537850SAkhilesh Sanikop                             int block_width4x4) {
851*09537850SAkhilesh Sanikop   const int row4x4 = row4x4_start_ + (row_index * block_width4x4);
852*09537850SAkhilesh Sanikop   const int column4x4 = column4x4_start_ + (column_index * block_width4x4);
853*09537850SAkhilesh Sanikop   std::unique_ptr<TileScratchBuffer> scratch_buffer =
854*09537850SAkhilesh Sanikop       tile_scratch_buffer_pool_->Get();
855*09537850SAkhilesh Sanikop   bool ok = scratch_buffer != nullptr;
856*09537850SAkhilesh Sanikop   if (ok) {
857*09537850SAkhilesh Sanikop     ok = ProcessSuperBlock(row4x4, column4x4, scratch_buffer.get(),
858*09537850SAkhilesh Sanikop                            kProcessingModeDecodeOnly);
859*09537850SAkhilesh Sanikop     tile_scratch_buffer_pool_->Release(std::move(scratch_buffer));
860*09537850SAkhilesh Sanikop   }
861*09537850SAkhilesh Sanikop   std::unique_lock<std::mutex> lock(threading_.mutex);
862*09537850SAkhilesh Sanikop   if (ok) {
863*09537850SAkhilesh Sanikop     threading_.sb_state[row_index][column_index] = kSuperBlockStateDecoded;
864*09537850SAkhilesh Sanikop     // Candidate rows and columns that we could potentially begin the decoding
865*09537850SAkhilesh Sanikop     // (if it is allowed to do so). The candidates are:
866*09537850SAkhilesh Sanikop     //   1) The superblock to the bottom-left of the current superblock with a
867*09537850SAkhilesh Sanikop     //   lag of |intra_block_copy_lag_| (or the beginning of the next superblock
868*09537850SAkhilesh Sanikop     //   row in case there are less than |intra_block_copy_lag_| superblock
869*09537850SAkhilesh Sanikop     //   columns in the Tile).
870*09537850SAkhilesh Sanikop     //   2) The superblock to the right of the current superblock.
871*09537850SAkhilesh Sanikop     const int candidate_row_indices[] = {row_index + 1, row_index};
872*09537850SAkhilesh Sanikop     const int candidate_column_indices[] = {
873*09537850SAkhilesh Sanikop         std::max(0, column_index - intra_block_copy_lag_), column_index + 1};
874*09537850SAkhilesh Sanikop     for (size_t i = 0; i < std::extent<decltype(candidate_row_indices)>::value;
875*09537850SAkhilesh Sanikop          ++i) {
876*09537850SAkhilesh Sanikop       const int candidate_row_index = candidate_row_indices[i];
877*09537850SAkhilesh Sanikop       const int candidate_column_index = candidate_column_indices[i];
878*09537850SAkhilesh Sanikop       if (!CanDecode(candidate_row_index, candidate_column_index)) {
879*09537850SAkhilesh Sanikop         continue;
880*09537850SAkhilesh Sanikop       }
881*09537850SAkhilesh Sanikop       ++threading_.pending_jobs;
882*09537850SAkhilesh Sanikop       threading_.sb_state[candidate_row_index][candidate_column_index] =
883*09537850SAkhilesh Sanikop           kSuperBlockStateScheduled;
884*09537850SAkhilesh Sanikop       lock.unlock();
885*09537850SAkhilesh Sanikop       thread_pool_->Schedule([this, candidate_row_index, candidate_column_index,
886*09537850SAkhilesh Sanikop                               block_width4x4]() {
887*09537850SAkhilesh Sanikop         DecodeSuperBlock(candidate_row_index, candidate_column_index,
888*09537850SAkhilesh Sanikop                          block_width4x4);
889*09537850SAkhilesh Sanikop       });
890*09537850SAkhilesh Sanikop       lock.lock();
891*09537850SAkhilesh Sanikop     }
892*09537850SAkhilesh Sanikop   } else {
893*09537850SAkhilesh Sanikop     threading_.abort = true;
894*09537850SAkhilesh Sanikop   }
895*09537850SAkhilesh Sanikop   // Finish using |threading_| before |pending_tiles_->Decrement()| because the
896*09537850SAkhilesh Sanikop   // Tile object could go out of scope as soon as |pending_tiles_->Decrement()|
897*09537850SAkhilesh Sanikop   // is called.
898*09537850SAkhilesh Sanikop   const bool no_pending_jobs = (--threading_.pending_jobs == 0);
899*09537850SAkhilesh Sanikop   const bool job_succeeded = !threading_.abort;
900*09537850SAkhilesh Sanikop   lock.unlock();
901*09537850SAkhilesh Sanikop   if (no_pending_jobs) {
902*09537850SAkhilesh Sanikop     // We are done parsing and decoding this tile.
903*09537850SAkhilesh Sanikop     pending_tiles_->Decrement(job_succeeded);
904*09537850SAkhilesh Sanikop   }
905*09537850SAkhilesh Sanikop }
906*09537850SAkhilesh Sanikop 
PopulateIntraPredictionBuffer(int row4x4)907*09537850SAkhilesh Sanikop void Tile::PopulateIntraPredictionBuffer(int row4x4) {
908*09537850SAkhilesh Sanikop   const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
909*09537850SAkhilesh Sanikop   if (!use_intra_prediction_buffer_ || row4x4 + block_width4x4 >= row4x4_end_) {
910*09537850SAkhilesh Sanikop     return;
911*09537850SAkhilesh Sanikop   }
912*09537850SAkhilesh Sanikop   const size_t pixel_size =
913*09537850SAkhilesh Sanikop       (sequence_header_.color_config.bitdepth == 8 ? sizeof(uint8_t)
914*09537850SAkhilesh Sanikop                                                    : sizeof(uint16_t));
915*09537850SAkhilesh Sanikop   for (int plane = kPlaneY; plane < PlaneCount(); ++plane) {
916*09537850SAkhilesh Sanikop     const int row_to_copy =
917*09537850SAkhilesh Sanikop         (MultiplyBy4(row4x4 + block_width4x4) >> subsampling_y_[plane]) - 1;
918*09537850SAkhilesh Sanikop     const size_t pixels_to_copy =
919*09537850SAkhilesh Sanikop         (MultiplyBy4(column4x4_end_ - column4x4_start_) >>
920*09537850SAkhilesh Sanikop          subsampling_x_[plane]) *
921*09537850SAkhilesh Sanikop         pixel_size;
922*09537850SAkhilesh Sanikop     const size_t column_start =
923*09537850SAkhilesh Sanikop         MultiplyBy4(column4x4_start_) >> subsampling_x_[plane];
924*09537850SAkhilesh Sanikop     void* start;
925*09537850SAkhilesh Sanikop #if LIBGAV1_MAX_BITDEPTH >= 10
926*09537850SAkhilesh Sanikop     if (sequence_header_.color_config.bitdepth > 8) {
927*09537850SAkhilesh Sanikop       Array2DView<uint16_t> buffer(
928*09537850SAkhilesh Sanikop           buffer_[plane].rows(), buffer_[plane].columns() / sizeof(uint16_t),
929*09537850SAkhilesh Sanikop           reinterpret_cast<uint16_t*>(&buffer_[plane][0][0]));
930*09537850SAkhilesh Sanikop       start = &buffer[row_to_copy][column_start];
931*09537850SAkhilesh Sanikop     } else  // NOLINT
932*09537850SAkhilesh Sanikop #endif
933*09537850SAkhilesh Sanikop     {
934*09537850SAkhilesh Sanikop       start = &buffer_[plane][row_to_copy][column_start];
935*09537850SAkhilesh Sanikop     }
936*09537850SAkhilesh Sanikop     memcpy((*intra_prediction_buffer_)[plane].get() + column_start * pixel_size,
937*09537850SAkhilesh Sanikop            start, pixels_to_copy);
938*09537850SAkhilesh Sanikop   }
939*09537850SAkhilesh Sanikop }
940*09537850SAkhilesh Sanikop 
GetTransformAllZeroContext(const Block & block,Plane plane,TransformSize tx_size,int x4,int y4,int w4,int h4)941*09537850SAkhilesh Sanikop int Tile::GetTransformAllZeroContext(const Block& block, Plane plane,
942*09537850SAkhilesh Sanikop                                      TransformSize tx_size, int x4, int y4,
943*09537850SAkhilesh Sanikop                                      int w4, int h4) {
944*09537850SAkhilesh Sanikop   const int max_x4x4 = frame_header_.columns4x4 >> subsampling_x_[plane];
945*09537850SAkhilesh Sanikop   const int max_y4x4 = frame_header_.rows4x4 >> subsampling_y_[plane];
946*09537850SAkhilesh Sanikop 
947*09537850SAkhilesh Sanikop   const int tx_width = kTransformWidth[tx_size];
948*09537850SAkhilesh Sanikop   const int tx_height = kTransformHeight[tx_size];
949*09537850SAkhilesh Sanikop   const BlockSize plane_size = block.residual_size[plane];
950*09537850SAkhilesh Sanikop   const int block_width = kBlockWidthPixels[plane_size];
951*09537850SAkhilesh Sanikop   const int block_height = kBlockHeightPixels[plane_size];
952*09537850SAkhilesh Sanikop 
953*09537850SAkhilesh Sanikop   int top = 0;
954*09537850SAkhilesh Sanikop   int left = 0;
955*09537850SAkhilesh Sanikop   const int num_top_elements = GetNumElements(w4, x4, max_x4x4);
956*09537850SAkhilesh Sanikop   const int num_left_elements = GetNumElements(h4, y4, max_y4x4);
957*09537850SAkhilesh Sanikop   if (plane == kPlaneY) {
958*09537850SAkhilesh Sanikop     if (block_width == tx_width && block_height == tx_height) return 0;
959*09537850SAkhilesh Sanikop     const uint8_t* coefficient_levels =
960*09537850SAkhilesh Sanikop         &coefficient_levels_[kEntropyContextTop][plane][x4];
961*09537850SAkhilesh Sanikop     for (int i = 0; i < num_top_elements; ++i) {
962*09537850SAkhilesh Sanikop       top = std::max(top, static_cast<int>(coefficient_levels[i]));
963*09537850SAkhilesh Sanikop     }
964*09537850SAkhilesh Sanikop     coefficient_levels = &coefficient_levels_[kEntropyContextLeft][plane][y4];
965*09537850SAkhilesh Sanikop     for (int i = 0; i < num_left_elements; ++i) {
966*09537850SAkhilesh Sanikop       left = std::max(left, static_cast<int>(coefficient_levels[i]));
967*09537850SAkhilesh Sanikop     }
968*09537850SAkhilesh Sanikop     assert(top <= 4);
969*09537850SAkhilesh Sanikop     assert(left <= 4);
970*09537850SAkhilesh Sanikop     // kAllZeroContextsByTopLeft is pre-computed based on the logic in the spec
971*09537850SAkhilesh Sanikop     // for top and left.
972*09537850SAkhilesh Sanikop     return kAllZeroContextsByTopLeft[top][left];
973*09537850SAkhilesh Sanikop   }
974*09537850SAkhilesh Sanikop   const uint8_t* coefficient_levels =
975*09537850SAkhilesh Sanikop       &coefficient_levels_[kEntropyContextTop][plane][x4];
976*09537850SAkhilesh Sanikop   const int8_t* dc_categories = &dc_categories_[kEntropyContextTop][plane][x4];
977*09537850SAkhilesh Sanikop   for (int i = 0; i < num_top_elements; ++i) {
978*09537850SAkhilesh Sanikop     top |= coefficient_levels[i];
979*09537850SAkhilesh Sanikop     top |= dc_categories[i];
980*09537850SAkhilesh Sanikop   }
981*09537850SAkhilesh Sanikop   coefficient_levels = &coefficient_levels_[kEntropyContextLeft][plane][y4];
982*09537850SAkhilesh Sanikop   dc_categories = &dc_categories_[kEntropyContextLeft][plane][y4];
983*09537850SAkhilesh Sanikop   for (int i = 0; i < num_left_elements; ++i) {
984*09537850SAkhilesh Sanikop     left |= coefficient_levels[i];
985*09537850SAkhilesh Sanikop     left |= dc_categories[i];
986*09537850SAkhilesh Sanikop   }
987*09537850SAkhilesh Sanikop   return static_cast<int>(top != 0) + static_cast<int>(left != 0) + 7 +
988*09537850SAkhilesh Sanikop          3 * static_cast<int>(block_width * block_height >
989*09537850SAkhilesh Sanikop                               tx_width * tx_height);
990*09537850SAkhilesh Sanikop }
991*09537850SAkhilesh Sanikop 
GetTransformSet(TransformSize tx_size,bool is_inter) const992*09537850SAkhilesh Sanikop TransformSet Tile::GetTransformSet(TransformSize tx_size, bool is_inter) const {
993*09537850SAkhilesh Sanikop   const TransformSize tx_size_square_min = kTransformSizeSquareMin[tx_size];
994*09537850SAkhilesh Sanikop   const TransformSize tx_size_square_max = kTransformSizeSquareMax[tx_size];
995*09537850SAkhilesh Sanikop   if (tx_size_square_max == kTransformSize64x64) return kTransformSetDctOnly;
996*09537850SAkhilesh Sanikop   if (is_inter) {
997*09537850SAkhilesh Sanikop     if (frame_header_.reduced_tx_set ||
998*09537850SAkhilesh Sanikop         tx_size_square_max == kTransformSize32x32) {
999*09537850SAkhilesh Sanikop       return kTransformSetInter3;
1000*09537850SAkhilesh Sanikop     }
1001*09537850SAkhilesh Sanikop     if (tx_size_square_min == kTransformSize16x16) return kTransformSetInter2;
1002*09537850SAkhilesh Sanikop     return kTransformSetInter1;
1003*09537850SAkhilesh Sanikop   }
1004*09537850SAkhilesh Sanikop   if (tx_size_square_max == kTransformSize32x32) return kTransformSetDctOnly;
1005*09537850SAkhilesh Sanikop   if (frame_header_.reduced_tx_set ||
1006*09537850SAkhilesh Sanikop       tx_size_square_min == kTransformSize16x16) {
1007*09537850SAkhilesh Sanikop     return kTransformSetIntra2;
1008*09537850SAkhilesh Sanikop   }
1009*09537850SAkhilesh Sanikop   return kTransformSetIntra1;
1010*09537850SAkhilesh Sanikop }
1011*09537850SAkhilesh Sanikop 
ComputeTransformType(const Block & block,Plane plane,TransformSize tx_size,int block_x,int block_y)1012*09537850SAkhilesh Sanikop TransformType Tile::ComputeTransformType(const Block& block, Plane plane,
1013*09537850SAkhilesh Sanikop                                          TransformSize tx_size, int block_x,
1014*09537850SAkhilesh Sanikop                                          int block_y) {
1015*09537850SAkhilesh Sanikop   const BlockParameters& bp = *block.bp;
1016*09537850SAkhilesh Sanikop   const TransformSize tx_size_square_max = kTransformSizeSquareMax[tx_size];
1017*09537850SAkhilesh Sanikop   if (frame_header_.segmentation
1018*09537850SAkhilesh Sanikop           .lossless[bp.prediction_parameters->segment_id] ||
1019*09537850SAkhilesh Sanikop       tx_size_square_max == kTransformSize64x64) {
1020*09537850SAkhilesh Sanikop     return kTransformTypeDctDct;
1021*09537850SAkhilesh Sanikop   }
1022*09537850SAkhilesh Sanikop   if (plane == kPlaneY) {
1023*09537850SAkhilesh Sanikop     return transform_types_[block_y - block.row4x4][block_x - block.column4x4];
1024*09537850SAkhilesh Sanikop   }
1025*09537850SAkhilesh Sanikop   const TransformSet tx_set = GetTransformSet(tx_size, bp.is_inter);
1026*09537850SAkhilesh Sanikop   TransformType tx_type;
1027*09537850SAkhilesh Sanikop   if (bp.is_inter) {
1028*09537850SAkhilesh Sanikop     const int x4 =
1029*09537850SAkhilesh Sanikop         std::max(block.column4x4, block_x << subsampling_x_[kPlaneU]);
1030*09537850SAkhilesh Sanikop     const int y4 = std::max(block.row4x4, block_y << subsampling_y_[kPlaneU]);
1031*09537850SAkhilesh Sanikop     tx_type = transform_types_[y4 - block.row4x4][x4 - block.column4x4];
1032*09537850SAkhilesh Sanikop   } else {
1033*09537850SAkhilesh Sanikop     tx_type = kModeToTransformType[bp.prediction_parameters->uv_mode];
1034*09537850SAkhilesh Sanikop   }
1035*09537850SAkhilesh Sanikop   return kTransformTypeInSetMask[tx_set].Contains(tx_type)
1036*09537850SAkhilesh Sanikop              ? tx_type
1037*09537850SAkhilesh Sanikop              : kTransformTypeDctDct;
1038*09537850SAkhilesh Sanikop }
1039*09537850SAkhilesh Sanikop 
ReadTransformType(const Block & block,int x4,int y4,TransformSize tx_size)1040*09537850SAkhilesh Sanikop void Tile::ReadTransformType(const Block& block, int x4, int y4,
1041*09537850SAkhilesh Sanikop                              TransformSize tx_size) {
1042*09537850SAkhilesh Sanikop   BlockParameters& bp = *block.bp;
1043*09537850SAkhilesh Sanikop   const TransformSet tx_set = GetTransformSet(tx_size, bp.is_inter);
1044*09537850SAkhilesh Sanikop 
1045*09537850SAkhilesh Sanikop   TransformType tx_type = kTransformTypeDctDct;
1046*09537850SAkhilesh Sanikop   if (tx_set != kTransformSetDctOnly &&
1047*09537850SAkhilesh Sanikop       frame_header_.segmentation.qindex[bp.prediction_parameters->segment_id] >
1048*09537850SAkhilesh Sanikop           0) {
1049*09537850SAkhilesh Sanikop     const int cdf_index = SymbolDecoderContext::TxTypeIndex(tx_set);
1050*09537850SAkhilesh Sanikop     const int cdf_tx_size_index =
1051*09537850SAkhilesh Sanikop         TransformSizeToSquareTransformIndex(kTransformSizeSquareMin[tx_size]);
1052*09537850SAkhilesh Sanikop     uint16_t* cdf;
1053*09537850SAkhilesh Sanikop     if (bp.is_inter) {
1054*09537850SAkhilesh Sanikop       cdf = symbol_decoder_context_
1055*09537850SAkhilesh Sanikop                 .inter_tx_type_cdf[cdf_index][cdf_tx_size_index];
1056*09537850SAkhilesh Sanikop       switch (tx_set) {
1057*09537850SAkhilesh Sanikop         case kTransformSetInter1:
1058*09537850SAkhilesh Sanikop           tx_type = static_cast<TransformType>(reader_.ReadSymbol<16>(cdf));
1059*09537850SAkhilesh Sanikop           break;
1060*09537850SAkhilesh Sanikop         case kTransformSetInter2:
1061*09537850SAkhilesh Sanikop           tx_type = static_cast<TransformType>(reader_.ReadSymbol<12>(cdf));
1062*09537850SAkhilesh Sanikop           break;
1063*09537850SAkhilesh Sanikop         default:
1064*09537850SAkhilesh Sanikop           assert(tx_set == kTransformSetInter3);
1065*09537850SAkhilesh Sanikop           tx_type = static_cast<TransformType>(reader_.ReadSymbol(cdf));
1066*09537850SAkhilesh Sanikop           break;
1067*09537850SAkhilesh Sanikop       }
1068*09537850SAkhilesh Sanikop     } else {
1069*09537850SAkhilesh Sanikop       const PredictionMode intra_direction =
1070*09537850SAkhilesh Sanikop           block.bp->prediction_parameters->use_filter_intra
1071*09537850SAkhilesh Sanikop               ? kFilterIntraModeToIntraPredictor[block.bp->prediction_parameters
1072*09537850SAkhilesh Sanikop                                                      ->filter_intra_mode]
1073*09537850SAkhilesh Sanikop               : bp.y_mode;
1074*09537850SAkhilesh Sanikop       cdf =
1075*09537850SAkhilesh Sanikop           symbol_decoder_context_
1076*09537850SAkhilesh Sanikop               .intra_tx_type_cdf[cdf_index][cdf_tx_size_index][intra_direction];
1077*09537850SAkhilesh Sanikop       assert(tx_set == kTransformSetIntra1 || tx_set == kTransformSetIntra2);
1078*09537850SAkhilesh Sanikop       tx_type = static_cast<TransformType>((tx_set == kTransformSetIntra1)
1079*09537850SAkhilesh Sanikop                                                ? reader_.ReadSymbol<7>(cdf)
1080*09537850SAkhilesh Sanikop                                                : reader_.ReadSymbol<5>(cdf));
1081*09537850SAkhilesh Sanikop     }
1082*09537850SAkhilesh Sanikop 
1083*09537850SAkhilesh Sanikop     // This array does not contain an entry for kTransformSetDctOnly, so the
1084*09537850SAkhilesh Sanikop     // first dimension needs to be offset by 1.
1085*09537850SAkhilesh Sanikop     tx_type = kInverseTransformTypeBySet[tx_set - 1][tx_type];
1086*09537850SAkhilesh Sanikop   }
1087*09537850SAkhilesh Sanikop   SetTransformType(block, x4, y4, kTransformWidth4x4[tx_size],
1088*09537850SAkhilesh Sanikop                    kTransformHeight4x4[tx_size], tx_type, transform_types_);
1089*09537850SAkhilesh Sanikop }
1090*09537850SAkhilesh Sanikop 
1091*09537850SAkhilesh Sanikop // Section 8.3.2 in the spec, under coeff_base and coeff_br.
1092*09537850SAkhilesh Sanikop // Bottom boundary checks are avoided by the padded rows.
1093*09537850SAkhilesh Sanikop // For a coefficient near the right boundary, the two right neighbors and the
1094*09537850SAkhilesh Sanikop // one bottom-right neighbor may be out of boundary. We don't check the right
1095*09537850SAkhilesh Sanikop // boundary for them, because the out of boundary neighbors project to positions
1096*09537850SAkhilesh Sanikop // above the diagonal line which goes through the current coefficient and these
1097*09537850SAkhilesh Sanikop // positions are still all 0s according to the diagonal scan order.
1098*09537850SAkhilesh Sanikop template <typename ResidualType>
ReadCoeffBase2D(const uint16_t * scan,TransformSize tx_size,int adjusted_tx_width_log2,int eob,uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount+1],uint16_t coeff_base_range_cdf[kCoeffBaseRangeContexts][kCoeffBaseRangeSymbolCount+1],ResidualType * const quantized_buffer,uint8_t * const level_buffer)1099*09537850SAkhilesh Sanikop void Tile::ReadCoeffBase2D(
1100*09537850SAkhilesh Sanikop     const uint16_t* scan, TransformSize tx_size, int adjusted_tx_width_log2,
1101*09537850SAkhilesh Sanikop     int eob,
1102*09537850SAkhilesh Sanikop     uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1],
1103*09537850SAkhilesh Sanikop     uint16_t coeff_base_range_cdf[kCoeffBaseRangeContexts]
1104*09537850SAkhilesh Sanikop                                  [kCoeffBaseRangeSymbolCount + 1],
1105*09537850SAkhilesh Sanikop     ResidualType* const quantized_buffer, uint8_t* const level_buffer) {
1106*09537850SAkhilesh Sanikop   const int tx_width = 1 << adjusted_tx_width_log2;
1107*09537850SAkhilesh Sanikop   for (int i = eob - 2; i >= 1; --i) {
1108*09537850SAkhilesh Sanikop     const uint16_t pos = scan[i];
1109*09537850SAkhilesh Sanikop     const int row = pos >> adjusted_tx_width_log2;
1110*09537850SAkhilesh Sanikop     const int column = pos & (tx_width - 1);
1111*09537850SAkhilesh Sanikop     auto* const quantized = &quantized_buffer[pos];
1112*09537850SAkhilesh Sanikop     auto* const levels = &level_buffer[pos];
1113*09537850SAkhilesh Sanikop     const int neighbor_sum = 1 + levels[1] + levels[tx_width] +
1114*09537850SAkhilesh Sanikop                              levels[tx_width + 1] + levels[2] +
1115*09537850SAkhilesh Sanikop                              levels[MultiplyBy2(tx_width)];
1116*09537850SAkhilesh Sanikop     const int context =
1117*09537850SAkhilesh Sanikop         ((neighbor_sum > 7) ? 4 : DivideBy2(neighbor_sum)) +
1118*09537850SAkhilesh Sanikop         kCoeffBaseContextOffset[tx_size][std::min(row, 4)][std::min(column, 4)];
1119*09537850SAkhilesh Sanikop     int level =
1120*09537850SAkhilesh Sanikop         reader_.ReadSymbol<kCoeffBaseSymbolCount>(coeff_base_cdf[context]);
1121*09537850SAkhilesh Sanikop     levels[0] = level;
1122*09537850SAkhilesh Sanikop     if (level > kNumQuantizerBaseLevels) {
1123*09537850SAkhilesh Sanikop       // No need to clip quantized values to COEFF_BASE_RANGE + NUM_BASE_LEVELS
1124*09537850SAkhilesh Sanikop       // + 1, because we clip the overall output to 6 and the unclipped
1125*09537850SAkhilesh Sanikop       // quantized values will always result in an output of greater than 6.
1126*09537850SAkhilesh Sanikop       int context = std::min(6, DivideBy2(1 + quantized[1] +          // {0, 1}
1127*09537850SAkhilesh Sanikop                                           quantized[tx_width] +       // {1, 0}
1128*09537850SAkhilesh Sanikop                                           quantized[tx_width + 1]));  // {1, 1}
1129*09537850SAkhilesh Sanikop       context += 14 >> static_cast<int>((row | column) < 2);
1130*09537850SAkhilesh Sanikop       level += ReadCoeffBaseRange(coeff_base_range_cdf[context]);
1131*09537850SAkhilesh Sanikop     }
1132*09537850SAkhilesh Sanikop     quantized[0] = level;
1133*09537850SAkhilesh Sanikop   }
1134*09537850SAkhilesh Sanikop   // Read position 0.
1135*09537850SAkhilesh Sanikop   {
1136*09537850SAkhilesh Sanikop     auto* const quantized = &quantized_buffer[0];
1137*09537850SAkhilesh Sanikop     int level = reader_.ReadSymbol<kCoeffBaseSymbolCount>(coeff_base_cdf[0]);
1138*09537850SAkhilesh Sanikop     level_buffer[0] = level;
1139*09537850SAkhilesh Sanikop     if (level > kNumQuantizerBaseLevels) {
1140*09537850SAkhilesh Sanikop       // No need to clip quantized values to COEFF_BASE_RANGE + NUM_BASE_LEVELS
1141*09537850SAkhilesh Sanikop       // + 1, because we clip the overall output to 6 and the unclipped
1142*09537850SAkhilesh Sanikop       // quantized values will always result in an output of greater than 6.
1143*09537850SAkhilesh Sanikop       const int context =
1144*09537850SAkhilesh Sanikop           std::min(6, DivideBy2(1 + quantized[1] +          // {0, 1}
1145*09537850SAkhilesh Sanikop                                 quantized[tx_width] +       // {1, 0}
1146*09537850SAkhilesh Sanikop                                 quantized[tx_width + 1]));  // {1, 1}
1147*09537850SAkhilesh Sanikop       level += ReadCoeffBaseRange(coeff_base_range_cdf[context]);
1148*09537850SAkhilesh Sanikop     }
1149*09537850SAkhilesh Sanikop     quantized[0] = level;
1150*09537850SAkhilesh Sanikop   }
1151*09537850SAkhilesh Sanikop }
1152*09537850SAkhilesh Sanikop 
1153*09537850SAkhilesh Sanikop // Section 8.3.2 in the spec, under coeff_base and coeff_br.
1154*09537850SAkhilesh Sanikop // Bottom boundary checks are avoided by the padded rows.
1155*09537850SAkhilesh Sanikop // For a coefficient near the right boundary, the four right neighbors may be
1156*09537850SAkhilesh Sanikop // out of boundary. We don't do the boundary check for the first three right
1157*09537850SAkhilesh Sanikop // neighbors, because even for the transform blocks with smallest width 4, the
1158*09537850SAkhilesh Sanikop // first three out of boundary neighbors project to positions left of the
1159*09537850SAkhilesh Sanikop // current coefficient and these positions are still all 0s according to the
1160*09537850SAkhilesh Sanikop // column scan order. However, when transform block width is 4 and the current
1161*09537850SAkhilesh Sanikop // coefficient is on the right boundary, its fourth right neighbor projects to
1162*09537850SAkhilesh Sanikop // the under position on the same column, which could be nonzero. Therefore, we
1163*09537850SAkhilesh Sanikop // must skip the fourth right neighbor. To make it simple, for any coefficient,
1164*09537850SAkhilesh Sanikop // we always do the boundary check for its fourth right neighbor.
1165*09537850SAkhilesh Sanikop template <typename ResidualType>
ReadCoeffBaseHorizontal(const uint16_t * scan,TransformSize,int adjusted_tx_width_log2,int eob,uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount+1],uint16_t coeff_base_range_cdf[kCoeffBaseRangeContexts][kCoeffBaseRangeSymbolCount+1],ResidualType * const quantized_buffer,uint8_t * const level_buffer)1166*09537850SAkhilesh Sanikop void Tile::ReadCoeffBaseHorizontal(
1167*09537850SAkhilesh Sanikop     const uint16_t* scan, TransformSize /*tx_size*/, int adjusted_tx_width_log2,
1168*09537850SAkhilesh Sanikop     int eob,
1169*09537850SAkhilesh Sanikop     uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1],
1170*09537850SAkhilesh Sanikop     uint16_t coeff_base_range_cdf[kCoeffBaseRangeContexts]
1171*09537850SAkhilesh Sanikop                                  [kCoeffBaseRangeSymbolCount + 1],
1172*09537850SAkhilesh Sanikop     ResidualType* const quantized_buffer, uint8_t* const level_buffer) {
1173*09537850SAkhilesh Sanikop   const int tx_width = 1 << adjusted_tx_width_log2;
1174*09537850SAkhilesh Sanikop   int i = eob - 2;
1175*09537850SAkhilesh Sanikop   do {
1176*09537850SAkhilesh Sanikop     const uint16_t pos = scan[i];
1177*09537850SAkhilesh Sanikop     const int column = pos & (tx_width - 1);
1178*09537850SAkhilesh Sanikop     auto* const quantized = &quantized_buffer[pos];
1179*09537850SAkhilesh Sanikop     auto* const levels = &level_buffer[pos];
1180*09537850SAkhilesh Sanikop     const int neighbor_sum =
1181*09537850SAkhilesh Sanikop         1 + (levels[1] +                                  // {0, 1}
1182*09537850SAkhilesh Sanikop              levels[tx_width] +                           // {1, 0}
1183*09537850SAkhilesh Sanikop              levels[2] +                                  // {0, 2}
1184*09537850SAkhilesh Sanikop              levels[3] +                                  // {0, 3}
1185*09537850SAkhilesh Sanikop              ((column + 4 < tx_width) ? levels[4] : 0));  // {0, 4}
1186*09537850SAkhilesh Sanikop     const int context = ((neighbor_sum > 7) ? 4 : DivideBy2(neighbor_sum)) +
1187*09537850SAkhilesh Sanikop                         kCoeffBasePositionContextOffset[column];
1188*09537850SAkhilesh Sanikop     int level =
1189*09537850SAkhilesh Sanikop         reader_.ReadSymbol<kCoeffBaseSymbolCount>(coeff_base_cdf[context]);
1190*09537850SAkhilesh Sanikop     levels[0] = level;
1191*09537850SAkhilesh Sanikop     if (level > kNumQuantizerBaseLevels) {
1192*09537850SAkhilesh Sanikop       // No need to clip quantized values to COEFF_BASE_RANGE + NUM_BASE_LEVELS
1193*09537850SAkhilesh Sanikop       // + 1, because we clip the overall output to 6 and the unclipped
1194*09537850SAkhilesh Sanikop       // quantized values will always result in an output of greater than 6.
1195*09537850SAkhilesh Sanikop       int context = std::min(6, DivideBy2(1 + quantized[1] +     // {0, 1}
1196*09537850SAkhilesh Sanikop                                           quantized[tx_width] +  // {1, 0}
1197*09537850SAkhilesh Sanikop                                           quantized[2]));        // {0, 2}
1198*09537850SAkhilesh Sanikop       if (pos != 0) {
1199*09537850SAkhilesh Sanikop         context += 14 >> static_cast<int>(column == 0);
1200*09537850SAkhilesh Sanikop       }
1201*09537850SAkhilesh Sanikop       level += ReadCoeffBaseRange(coeff_base_range_cdf[context]);
1202*09537850SAkhilesh Sanikop     }
1203*09537850SAkhilesh Sanikop     quantized[0] = level;
1204*09537850SAkhilesh Sanikop   } while (--i >= 0);
1205*09537850SAkhilesh Sanikop }
1206*09537850SAkhilesh Sanikop 
1207*09537850SAkhilesh Sanikop // Section 8.3.2 in the spec, under coeff_base and coeff_br.
1208*09537850SAkhilesh Sanikop // Bottom boundary checks are avoided by the padded rows.
1209*09537850SAkhilesh Sanikop // Right boundary check is performed explicitly.
1210*09537850SAkhilesh Sanikop template <typename ResidualType>
ReadCoeffBaseVertical(const uint16_t * scan,TransformSize,int adjusted_tx_width_log2,int eob,uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount+1],uint16_t coeff_base_range_cdf[kCoeffBaseRangeContexts][kCoeffBaseRangeSymbolCount+1],ResidualType * const quantized_buffer,uint8_t * const level_buffer)1211*09537850SAkhilesh Sanikop void Tile::ReadCoeffBaseVertical(
1212*09537850SAkhilesh Sanikop     const uint16_t* scan, TransformSize /*tx_size*/, int adjusted_tx_width_log2,
1213*09537850SAkhilesh Sanikop     int eob,
1214*09537850SAkhilesh Sanikop     uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1],
1215*09537850SAkhilesh Sanikop     uint16_t coeff_base_range_cdf[kCoeffBaseRangeContexts]
1216*09537850SAkhilesh Sanikop                                  [kCoeffBaseRangeSymbolCount + 1],
1217*09537850SAkhilesh Sanikop     ResidualType* const quantized_buffer, uint8_t* const level_buffer) {
1218*09537850SAkhilesh Sanikop   const int tx_width = 1 << adjusted_tx_width_log2;
1219*09537850SAkhilesh Sanikop   int i = eob - 2;
1220*09537850SAkhilesh Sanikop   do {
1221*09537850SAkhilesh Sanikop     const uint16_t pos = scan[i];
1222*09537850SAkhilesh Sanikop     const int row = pos >> adjusted_tx_width_log2;
1223*09537850SAkhilesh Sanikop     const int column = pos & (tx_width - 1);
1224*09537850SAkhilesh Sanikop     auto* const quantized = &quantized_buffer[pos];
1225*09537850SAkhilesh Sanikop     auto* const levels = &level_buffer[pos];
1226*09537850SAkhilesh Sanikop     const int neighbor_sum =
1227*09537850SAkhilesh Sanikop         1 + (((column + 1 < tx_width) ? levels[1] : 0) +  // {0, 1}
1228*09537850SAkhilesh Sanikop              levels[tx_width] +                           // {1, 0}
1229*09537850SAkhilesh Sanikop              levels[MultiplyBy2(tx_width)] +              // {2, 0}
1230*09537850SAkhilesh Sanikop              levels[tx_width * 3] +                       // {3, 0}
1231*09537850SAkhilesh Sanikop              levels[MultiplyBy4(tx_width)]);              // {4, 0}
1232*09537850SAkhilesh Sanikop     const int context = ((neighbor_sum > 7) ? 4 : DivideBy2(neighbor_sum)) +
1233*09537850SAkhilesh Sanikop                         kCoeffBasePositionContextOffset[row];
1234*09537850SAkhilesh Sanikop     int level =
1235*09537850SAkhilesh Sanikop         reader_.ReadSymbol<kCoeffBaseSymbolCount>(coeff_base_cdf[context]);
1236*09537850SAkhilesh Sanikop     levels[0] = level;
1237*09537850SAkhilesh Sanikop     if (level > kNumQuantizerBaseLevels) {
1238*09537850SAkhilesh Sanikop       // No need to clip quantized values to COEFF_BASE_RANGE + NUM_BASE_LEVELS
1239*09537850SAkhilesh Sanikop       // + 1, because we clip the overall output to 6 and the unclipped
1240*09537850SAkhilesh Sanikop       // quantized values will always result in an output of greater than 6.
1241*09537850SAkhilesh Sanikop       const int quantized_column1 = (column + 1 < tx_width) ? quantized[1] : 0;
1242*09537850SAkhilesh Sanikop       int context =
1243*09537850SAkhilesh Sanikop           std::min(6, DivideBy2(1 + quantized_column1 +              // {0, 1}
1244*09537850SAkhilesh Sanikop                                 quantized[tx_width] +                // {1, 0}
1245*09537850SAkhilesh Sanikop                                 quantized[MultiplyBy2(tx_width)]));  // {2, 0}
1246*09537850SAkhilesh Sanikop       if (pos != 0) {
1247*09537850SAkhilesh Sanikop         context += 14 >> static_cast<int>(row == 0);
1248*09537850SAkhilesh Sanikop       }
1249*09537850SAkhilesh Sanikop       level += ReadCoeffBaseRange(coeff_base_range_cdf[context]);
1250*09537850SAkhilesh Sanikop     }
1251*09537850SAkhilesh Sanikop     quantized[0] = level;
1252*09537850SAkhilesh Sanikop   } while (--i >= 0);
1253*09537850SAkhilesh Sanikop }
1254*09537850SAkhilesh Sanikop 
GetDcSignContext(int x4,int y4,int w4,int h4,Plane plane)1255*09537850SAkhilesh Sanikop int Tile::GetDcSignContext(int x4, int y4, int w4, int h4, Plane plane) {
1256*09537850SAkhilesh Sanikop   const int max_x4x4 = frame_header_.columns4x4 >> subsampling_x_[plane];
1257*09537850SAkhilesh Sanikop   const int8_t* dc_categories = &dc_categories_[kEntropyContextTop][plane][x4];
1258*09537850SAkhilesh Sanikop   // Set dc_sign to 8-bit long so that std::accumulate() saves sign extension.
1259*09537850SAkhilesh Sanikop   int8_t dc_sign = std::accumulate(
1260*09537850SAkhilesh Sanikop       dc_categories, dc_categories + GetNumElements(w4, x4, max_x4x4), 0);
1261*09537850SAkhilesh Sanikop   const int max_y4x4 = frame_header_.rows4x4 >> subsampling_y_[plane];
1262*09537850SAkhilesh Sanikop   dc_categories = &dc_categories_[kEntropyContextLeft][plane][y4];
1263*09537850SAkhilesh Sanikop   dc_sign = std::accumulate(
1264*09537850SAkhilesh Sanikop       dc_categories, dc_categories + GetNumElements(h4, y4, max_y4x4), dc_sign);
1265*09537850SAkhilesh Sanikop   // This return statement is equivalent to:
1266*09537850SAkhilesh Sanikop   //   if (dc_sign < 0) return 1;
1267*09537850SAkhilesh Sanikop   //   if (dc_sign > 0) return 2;
1268*09537850SAkhilesh Sanikop   //   return 0;
1269*09537850SAkhilesh Sanikop   // And it is better than:
1270*09537850SAkhilesh Sanikop   //   return static_cast<int>(dc_sign != 0) + static_cast<int>(dc_sign > 0);
1271*09537850SAkhilesh Sanikop   return static_cast<int>(dc_sign < 0) +
1272*09537850SAkhilesh Sanikop          MultiplyBy2(static_cast<int>(dc_sign > 0));
1273*09537850SAkhilesh Sanikop }
1274*09537850SAkhilesh Sanikop 
SetEntropyContexts(int x4,int y4,int w4,int h4,Plane plane,uint8_t coefficient_level,int8_t dc_category)1275*09537850SAkhilesh Sanikop void Tile::SetEntropyContexts(int x4, int y4, int w4, int h4, Plane plane,
1276*09537850SAkhilesh Sanikop                               uint8_t coefficient_level, int8_t dc_category) {
1277*09537850SAkhilesh Sanikop   const int max_x4x4 = frame_header_.columns4x4 >> subsampling_x_[plane];
1278*09537850SAkhilesh Sanikop   const int num_top_elements = GetNumElements(w4, x4, max_x4x4);
1279*09537850SAkhilesh Sanikop   memset(&coefficient_levels_[kEntropyContextTop][plane][x4], coefficient_level,
1280*09537850SAkhilesh Sanikop          num_top_elements);
1281*09537850SAkhilesh Sanikop   memset(&dc_categories_[kEntropyContextTop][plane][x4], dc_category,
1282*09537850SAkhilesh Sanikop          num_top_elements);
1283*09537850SAkhilesh Sanikop   const int max_y4x4 = frame_header_.rows4x4 >> subsampling_y_[plane];
1284*09537850SAkhilesh Sanikop   const int num_left_elements = GetNumElements(h4, y4, max_y4x4);
1285*09537850SAkhilesh Sanikop   memset(&coefficient_levels_[kEntropyContextLeft][plane][y4],
1286*09537850SAkhilesh Sanikop          coefficient_level, num_left_elements);
1287*09537850SAkhilesh Sanikop   memset(&dc_categories_[kEntropyContextLeft][plane][y4], dc_category,
1288*09537850SAkhilesh Sanikop          num_left_elements);
1289*09537850SAkhilesh Sanikop }
1290*09537850SAkhilesh Sanikop 
1291*09537850SAkhilesh Sanikop template <typename ResidualType, bool is_dc_coefficient>
ReadSignAndApplyDequantization(const uint16_t * const scan,int i,int q_value,const uint8_t * const quantizer_matrix,int shift,int max_value,uint16_t * const dc_sign_cdf,int8_t * const dc_category,int * const coefficient_level,ResidualType * residual_buffer)1292*09537850SAkhilesh Sanikop bool Tile::ReadSignAndApplyDequantization(
1293*09537850SAkhilesh Sanikop     const uint16_t* const scan, int i, int q_value,
1294*09537850SAkhilesh Sanikop     const uint8_t* const quantizer_matrix, int shift, int max_value,
1295*09537850SAkhilesh Sanikop     uint16_t* const dc_sign_cdf, int8_t* const dc_category,
1296*09537850SAkhilesh Sanikop     int* const coefficient_level, ResidualType* residual_buffer) {
1297*09537850SAkhilesh Sanikop   const int pos = is_dc_coefficient ? 0 : scan[i];
1298*09537850SAkhilesh Sanikop   // If residual_buffer[pos] is zero, then the rest of the function has no
1299*09537850SAkhilesh Sanikop   // effect.
1300*09537850SAkhilesh Sanikop   int level = residual_buffer[pos];
1301*09537850SAkhilesh Sanikop   if (level == 0) return true;
1302*09537850SAkhilesh Sanikop   const int sign = is_dc_coefficient
1303*09537850SAkhilesh Sanikop                        ? static_cast<int>(reader_.ReadSymbol(dc_sign_cdf))
1304*09537850SAkhilesh Sanikop                        : reader_.ReadBit();
1305*09537850SAkhilesh Sanikop   if (level > kNumQuantizerBaseLevels + kQuantizerCoefficientBaseRange) {
1306*09537850SAkhilesh Sanikop     int length = 0;
1307*09537850SAkhilesh Sanikop     bool golomb_length_bit = false;
1308*09537850SAkhilesh Sanikop     do {
1309*09537850SAkhilesh Sanikop       golomb_length_bit = reader_.ReadBit() != 0;
1310*09537850SAkhilesh Sanikop       ++length;
1311*09537850SAkhilesh Sanikop       if (length > 20) {
1312*09537850SAkhilesh Sanikop         LIBGAV1_DLOG(ERROR, "Invalid golomb_length %d", length);
1313*09537850SAkhilesh Sanikop         return false;
1314*09537850SAkhilesh Sanikop       }
1315*09537850SAkhilesh Sanikop     } while (!golomb_length_bit);
1316*09537850SAkhilesh Sanikop     int x = 1;
1317*09537850SAkhilesh Sanikop     for (int i = length - 2; i >= 0; --i) {
1318*09537850SAkhilesh Sanikop       x = (x << 1) | reader_.ReadBit();
1319*09537850SAkhilesh Sanikop     }
1320*09537850SAkhilesh Sanikop     level += x - 1;
1321*09537850SAkhilesh Sanikop   }
1322*09537850SAkhilesh Sanikop   if (is_dc_coefficient) {
1323*09537850SAkhilesh Sanikop     *dc_category = (sign != 0) ? -1 : 1;
1324*09537850SAkhilesh Sanikop   }
1325*09537850SAkhilesh Sanikop   level &= 0xfffff;
1326*09537850SAkhilesh Sanikop   *coefficient_level += level;
1327*09537850SAkhilesh Sanikop   // Apply dequantization. Step 1 of section 7.12.3 in the spec.
1328*09537850SAkhilesh Sanikop   int q = q_value;
1329*09537850SAkhilesh Sanikop   if (quantizer_matrix != nullptr) {
1330*09537850SAkhilesh Sanikop     q = RightShiftWithRounding(q * quantizer_matrix[pos], 5);
1331*09537850SAkhilesh Sanikop   }
1332*09537850SAkhilesh Sanikop   // The intermediate multiplication can exceed 32 bits, so it has to be
1333*09537850SAkhilesh Sanikop   // performed by promoting one of the values to int64_t.
1334*09537850SAkhilesh Sanikop   int32_t dequantized_value = (static_cast<int64_t>(q) * level) & 0xffffff;
1335*09537850SAkhilesh Sanikop   dequantized_value >>= shift;
1336*09537850SAkhilesh Sanikop   // At this point:
1337*09537850SAkhilesh Sanikop   //   * |dequantized_value| is always non-negative.
1338*09537850SAkhilesh Sanikop   //   * |sign| can be either 0 or 1.
1339*09537850SAkhilesh Sanikop   //   * min_value = -(max_value + 1).
1340*09537850SAkhilesh Sanikop   // We need to apply the following:
1341*09537850SAkhilesh Sanikop   // dequantized_value = sign ? -dequantized_value : dequantized_value;
1342*09537850SAkhilesh Sanikop   // dequantized_value = Clip3(dequantized_value, min_value, max_value);
1343*09537850SAkhilesh Sanikop   //
1344*09537850SAkhilesh Sanikop   // Note that -x == ~(x - 1).
1345*09537850SAkhilesh Sanikop   //
1346*09537850SAkhilesh Sanikop   // Now, The above two lines can be done with a std::min and xor as follows:
1347*09537850SAkhilesh Sanikop   dequantized_value = std::min(dequantized_value - sign, max_value) ^ -sign;
1348*09537850SAkhilesh Sanikop   residual_buffer[pos] = dequantized_value;
1349*09537850SAkhilesh Sanikop   return true;
1350*09537850SAkhilesh Sanikop }
1351*09537850SAkhilesh Sanikop 
ReadCoeffBaseRange(uint16_t * cdf)1352*09537850SAkhilesh Sanikop int Tile::ReadCoeffBaseRange(uint16_t* cdf) {
1353*09537850SAkhilesh Sanikop   int level = 0;
1354*09537850SAkhilesh Sanikop   for (int j = 0; j < kCoeffBaseRangeMaxIterations; ++j) {
1355*09537850SAkhilesh Sanikop     const int coeff_base_range =
1356*09537850SAkhilesh Sanikop         reader_.ReadSymbol<kCoeffBaseRangeSymbolCount>(cdf);
1357*09537850SAkhilesh Sanikop     level += coeff_base_range;
1358*09537850SAkhilesh Sanikop     if (coeff_base_range < (kCoeffBaseRangeSymbolCount - 1)) break;
1359*09537850SAkhilesh Sanikop   }
1360*09537850SAkhilesh Sanikop   return level;
1361*09537850SAkhilesh Sanikop }
1362*09537850SAkhilesh Sanikop 
1363*09537850SAkhilesh Sanikop template <typename ResidualType>
ReadTransformCoefficients(const Block & block,Plane plane,int start_x,int start_y,TransformSize tx_size,TransformType * const tx_type)1364*09537850SAkhilesh Sanikop int Tile::ReadTransformCoefficients(const Block& block, Plane plane,
1365*09537850SAkhilesh Sanikop                                     int start_x, int start_y,
1366*09537850SAkhilesh Sanikop                                     TransformSize tx_size,
1367*09537850SAkhilesh Sanikop                                     TransformType* const tx_type) {
1368*09537850SAkhilesh Sanikop   const int x4 = DivideBy4(start_x);
1369*09537850SAkhilesh Sanikop   const int y4 = DivideBy4(start_y);
1370*09537850SAkhilesh Sanikop   const int w4 = kTransformWidth4x4[tx_size];
1371*09537850SAkhilesh Sanikop   const int h4 = kTransformHeight4x4[tx_size];
1372*09537850SAkhilesh Sanikop   const int tx_size_context = kTransformSizeContext[tx_size];
1373*09537850SAkhilesh Sanikop   int context =
1374*09537850SAkhilesh Sanikop       GetTransformAllZeroContext(block, plane, tx_size, x4, y4, w4, h4);
1375*09537850SAkhilesh Sanikop   const bool all_zero = reader_.ReadSymbol(
1376*09537850SAkhilesh Sanikop       symbol_decoder_context_.all_zero_cdf[tx_size_context][context]);
1377*09537850SAkhilesh Sanikop   if (all_zero) {
1378*09537850SAkhilesh Sanikop     if (plane == kPlaneY) {
1379*09537850SAkhilesh Sanikop       SetTransformType(block, x4, y4, w4, h4, kTransformTypeDctDct,
1380*09537850SAkhilesh Sanikop                        transform_types_);
1381*09537850SAkhilesh Sanikop     }
1382*09537850SAkhilesh Sanikop     SetEntropyContexts(x4, y4, w4, h4, plane, 0, 0);
1383*09537850SAkhilesh Sanikop     // This is not used in this case, so it can be set to any value.
1384*09537850SAkhilesh Sanikop     *tx_type = kNumTransformTypes;
1385*09537850SAkhilesh Sanikop     return 0;
1386*09537850SAkhilesh Sanikop   }
1387*09537850SAkhilesh Sanikop   const int tx_width = kTransformWidth[tx_size];
1388*09537850SAkhilesh Sanikop   const int tx_height = kTransformHeight[tx_size];
1389*09537850SAkhilesh Sanikop   const TransformSize adjusted_tx_size = kAdjustedTransformSize[tx_size];
1390*09537850SAkhilesh Sanikop   const int adjusted_tx_width_log2 = kTransformWidthLog2[adjusted_tx_size];
1391*09537850SAkhilesh Sanikop   const int tx_padding =
1392*09537850SAkhilesh Sanikop       (1 << adjusted_tx_width_log2) * kResidualPaddingVertical;
1393*09537850SAkhilesh Sanikop   auto* residual = reinterpret_cast<ResidualType*>(*block.residual);
1394*09537850SAkhilesh Sanikop   // Clear padding to avoid bottom boundary checks when parsing quantized
1395*09537850SAkhilesh Sanikop   // coefficients.
1396*09537850SAkhilesh Sanikop   memset(residual, 0, (tx_width * tx_height + tx_padding) * residual_size_);
1397*09537850SAkhilesh Sanikop   uint8_t level_buffer[(32 + kResidualPaddingVertical) * 32];
1398*09537850SAkhilesh Sanikop   memset(
1399*09537850SAkhilesh Sanikop       level_buffer, 0,
1400*09537850SAkhilesh Sanikop       kTransformWidth[adjusted_tx_size] * kTransformHeight[adjusted_tx_size] +
1401*09537850SAkhilesh Sanikop           tx_padding);
1402*09537850SAkhilesh Sanikop   const int clamped_tx_height = std::min(tx_height, 32);
1403*09537850SAkhilesh Sanikop   if (plane == kPlaneY) {
1404*09537850SAkhilesh Sanikop     ReadTransformType(block, x4, y4, tx_size);
1405*09537850SAkhilesh Sanikop   }
1406*09537850SAkhilesh Sanikop   BlockParameters& bp = *block.bp;
1407*09537850SAkhilesh Sanikop   *tx_type = ComputeTransformType(block, plane, tx_size, x4, y4);
1408*09537850SAkhilesh Sanikop   const int eob_multi_size = kEobMultiSizeLookup[tx_size];
1409*09537850SAkhilesh Sanikop   const PlaneType plane_type = GetPlaneType(plane);
1410*09537850SAkhilesh Sanikop   const TransformClass tx_class = GetTransformClass(*tx_type);
1411*09537850SAkhilesh Sanikop   context = static_cast<int>(tx_class != kTransformClass2D);
1412*09537850SAkhilesh Sanikop   int eob_pt = 1;
1413*09537850SAkhilesh Sanikop   switch (eob_multi_size) {
1414*09537850SAkhilesh Sanikop     case 0:
1415*09537850SAkhilesh Sanikop       eob_pt += reader_.ReadSymbol<kEobPt16SymbolCount>(
1416*09537850SAkhilesh Sanikop           symbol_decoder_context_.eob_pt_16_cdf[plane_type][context]);
1417*09537850SAkhilesh Sanikop       break;
1418*09537850SAkhilesh Sanikop     case 1:
1419*09537850SAkhilesh Sanikop       eob_pt += reader_.ReadSymbol<kEobPt32SymbolCount>(
1420*09537850SAkhilesh Sanikop           symbol_decoder_context_.eob_pt_32_cdf[plane_type][context]);
1421*09537850SAkhilesh Sanikop       break;
1422*09537850SAkhilesh Sanikop     case 2:
1423*09537850SAkhilesh Sanikop       eob_pt += reader_.ReadSymbol<kEobPt64SymbolCount>(
1424*09537850SAkhilesh Sanikop           symbol_decoder_context_.eob_pt_64_cdf[plane_type][context]);
1425*09537850SAkhilesh Sanikop       break;
1426*09537850SAkhilesh Sanikop     case 3:
1427*09537850SAkhilesh Sanikop       eob_pt += reader_.ReadSymbol<kEobPt128SymbolCount>(
1428*09537850SAkhilesh Sanikop           symbol_decoder_context_.eob_pt_128_cdf[plane_type][context]);
1429*09537850SAkhilesh Sanikop       break;
1430*09537850SAkhilesh Sanikop     case 4:
1431*09537850SAkhilesh Sanikop       eob_pt += reader_.ReadSymbol<kEobPt256SymbolCount>(
1432*09537850SAkhilesh Sanikop           symbol_decoder_context_.eob_pt_256_cdf[plane_type][context]);
1433*09537850SAkhilesh Sanikop       break;
1434*09537850SAkhilesh Sanikop     case 5:
1435*09537850SAkhilesh Sanikop       eob_pt += reader_.ReadSymbol<kEobPt512SymbolCount>(
1436*09537850SAkhilesh Sanikop           symbol_decoder_context_.eob_pt_512_cdf[plane_type]);
1437*09537850SAkhilesh Sanikop       break;
1438*09537850SAkhilesh Sanikop     case 6:
1439*09537850SAkhilesh Sanikop     default:
1440*09537850SAkhilesh Sanikop       eob_pt += reader_.ReadSymbol<kEobPt1024SymbolCount>(
1441*09537850SAkhilesh Sanikop           symbol_decoder_context_.eob_pt_1024_cdf[plane_type]);
1442*09537850SAkhilesh Sanikop       break;
1443*09537850SAkhilesh Sanikop   }
1444*09537850SAkhilesh Sanikop   int eob = (eob_pt < 2) ? eob_pt : ((1 << (eob_pt - 2)) + 1);
1445*09537850SAkhilesh Sanikop   if (eob_pt >= 3) {
1446*09537850SAkhilesh Sanikop     context = eob_pt - 3;
1447*09537850SAkhilesh Sanikop     const bool eob_extra = reader_.ReadSymbol(
1448*09537850SAkhilesh Sanikop         symbol_decoder_context_
1449*09537850SAkhilesh Sanikop             .eob_extra_cdf[tx_size_context][plane_type][context]);
1450*09537850SAkhilesh Sanikop     if (eob_extra) eob += 1 << (eob_pt - 3);
1451*09537850SAkhilesh Sanikop     for (int i = 1; i < eob_pt - 2; ++i) {
1452*09537850SAkhilesh Sanikop       assert(eob_pt - i >= 3);
1453*09537850SAkhilesh Sanikop       assert(eob_pt <= kEobPt1024SymbolCount);
1454*09537850SAkhilesh Sanikop       if (reader_.ReadBit() != 0) {
1455*09537850SAkhilesh Sanikop         eob += 1 << (eob_pt - i - 3);
1456*09537850SAkhilesh Sanikop       }
1457*09537850SAkhilesh Sanikop     }
1458*09537850SAkhilesh Sanikop   }
1459*09537850SAkhilesh Sanikop   const uint16_t* scan = kScan[tx_class][tx_size];
1460*09537850SAkhilesh Sanikop   const int clamped_tx_size_context = std::min(tx_size_context, 3);
1461*09537850SAkhilesh Sanikop   auto coeff_base_range_cdf =
1462*09537850SAkhilesh Sanikop       symbol_decoder_context_
1463*09537850SAkhilesh Sanikop           .coeff_base_range_cdf[clamped_tx_size_context][plane_type];
1464*09537850SAkhilesh Sanikop   // Read the last coefficient.
1465*09537850SAkhilesh Sanikop   {
1466*09537850SAkhilesh Sanikop     context = GetCoeffBaseContextEob(tx_size, eob - 1);
1467*09537850SAkhilesh Sanikop     const uint16_t pos = scan[eob - 1];
1468*09537850SAkhilesh Sanikop     int level =
1469*09537850SAkhilesh Sanikop         1 + reader_.ReadSymbol<kCoeffBaseEobSymbolCount>(
1470*09537850SAkhilesh Sanikop                 symbol_decoder_context_
1471*09537850SAkhilesh Sanikop                     .coeff_base_eob_cdf[tx_size_context][plane_type][context]);
1472*09537850SAkhilesh Sanikop     level_buffer[pos] = level;
1473*09537850SAkhilesh Sanikop     if (level > kNumQuantizerBaseLevels) {
1474*09537850SAkhilesh Sanikop       level +=
1475*09537850SAkhilesh Sanikop           ReadCoeffBaseRange(coeff_base_range_cdf[GetCoeffBaseRangeContextEob(
1476*09537850SAkhilesh Sanikop               adjusted_tx_width_log2, pos, tx_class)]);
1477*09537850SAkhilesh Sanikop     }
1478*09537850SAkhilesh Sanikop     residual[pos] = level;
1479*09537850SAkhilesh Sanikop   }
1480*09537850SAkhilesh Sanikop   if (eob > 1) {
1481*09537850SAkhilesh Sanikop     // Read all the other coefficients.
1482*09537850SAkhilesh Sanikop     // Lookup used to call the right variant of ReadCoeffBase*() based on the
1483*09537850SAkhilesh Sanikop     // transform class.
1484*09537850SAkhilesh Sanikop     static constexpr void (Tile::*kGetCoeffBaseFunc[])(
1485*09537850SAkhilesh Sanikop         const uint16_t* scan, TransformSize tx_size, int adjusted_tx_width_log2,
1486*09537850SAkhilesh Sanikop         int eob,
1487*09537850SAkhilesh Sanikop         uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1],
1488*09537850SAkhilesh Sanikop         uint16_t coeff_base_range_cdf[kCoeffBaseRangeContexts]
1489*09537850SAkhilesh Sanikop                                      [kCoeffBaseRangeSymbolCount + 1],
1490*09537850SAkhilesh Sanikop         ResidualType* quantized_buffer,
1491*09537850SAkhilesh Sanikop         uint8_t* level_buffer) = {&Tile::ReadCoeffBase2D<ResidualType>,
1492*09537850SAkhilesh Sanikop                                   &Tile::ReadCoeffBaseHorizontal<ResidualType>,
1493*09537850SAkhilesh Sanikop                                   &Tile::ReadCoeffBaseVertical<ResidualType>};
1494*09537850SAkhilesh Sanikop     (this->*kGetCoeffBaseFunc[tx_class])(
1495*09537850SAkhilesh Sanikop         scan, tx_size, adjusted_tx_width_log2, eob,
1496*09537850SAkhilesh Sanikop         symbol_decoder_context_.coeff_base_cdf[tx_size_context][plane_type],
1497*09537850SAkhilesh Sanikop         coeff_base_range_cdf, residual, level_buffer);
1498*09537850SAkhilesh Sanikop   }
1499*09537850SAkhilesh Sanikop   const int max_value = (1 << (7 + sequence_header_.color_config.bitdepth)) - 1;
1500*09537850SAkhilesh Sanikop   const int current_quantizer_index =
1501*09537850SAkhilesh Sanikop       GetQIndex(frame_header_.segmentation,
1502*09537850SAkhilesh Sanikop                 bp.prediction_parameters->segment_id, current_quantizer_index_);
1503*09537850SAkhilesh Sanikop   const int dc_q_value = quantizer_.GetDcValue(plane, current_quantizer_index);
1504*09537850SAkhilesh Sanikop   const int ac_q_value = quantizer_.GetAcValue(plane, current_quantizer_index);
1505*09537850SAkhilesh Sanikop   const int shift = kQuantizationShift[tx_size];
1506*09537850SAkhilesh Sanikop   const uint8_t* const quantizer_matrix =
1507*09537850SAkhilesh Sanikop       (frame_header_.quantizer.use_matrix &&
1508*09537850SAkhilesh Sanikop        *tx_type < kTransformTypeIdentityIdentity &&
1509*09537850SAkhilesh Sanikop        !frame_header_.segmentation
1510*09537850SAkhilesh Sanikop             .lossless[bp.prediction_parameters->segment_id] &&
1511*09537850SAkhilesh Sanikop        frame_header_.quantizer.matrix_level[plane] < 15)
1512*09537850SAkhilesh Sanikop           ? quantizer_matrix_[frame_header_.quantizer.matrix_level[plane]]
1513*09537850SAkhilesh Sanikop                              [plane_type][adjusted_tx_size]
1514*09537850SAkhilesh Sanikop                                  .get()
1515*09537850SAkhilesh Sanikop           : nullptr;
1516*09537850SAkhilesh Sanikop   int coefficient_level = 0;
1517*09537850SAkhilesh Sanikop   int8_t dc_category = 0;
1518*09537850SAkhilesh Sanikop   uint16_t* const dc_sign_cdf =
1519*09537850SAkhilesh Sanikop       (residual[0] != 0)
1520*09537850SAkhilesh Sanikop           ? symbol_decoder_context_.dc_sign_cdf[plane_type][GetDcSignContext(
1521*09537850SAkhilesh Sanikop                 x4, y4, w4, h4, plane)]
1522*09537850SAkhilesh Sanikop           : nullptr;
1523*09537850SAkhilesh Sanikop   assert(scan[0] == 0);
1524*09537850SAkhilesh Sanikop   if (!ReadSignAndApplyDequantization<ResidualType, /*is_dc_coefficient=*/true>(
1525*09537850SAkhilesh Sanikop           scan, 0, dc_q_value, quantizer_matrix, shift, max_value, dc_sign_cdf,
1526*09537850SAkhilesh Sanikop           &dc_category, &coefficient_level, residual)) {
1527*09537850SAkhilesh Sanikop     return -1;
1528*09537850SAkhilesh Sanikop   }
1529*09537850SAkhilesh Sanikop   if (eob > 1) {
1530*09537850SAkhilesh Sanikop     int i = 1;
1531*09537850SAkhilesh Sanikop     do {
1532*09537850SAkhilesh Sanikop       if (!ReadSignAndApplyDequantization<ResidualType,
1533*09537850SAkhilesh Sanikop                                           /*is_dc_coefficient=*/false>(
1534*09537850SAkhilesh Sanikop               scan, i, ac_q_value, quantizer_matrix, shift, max_value, nullptr,
1535*09537850SAkhilesh Sanikop               nullptr, &coefficient_level, residual)) {
1536*09537850SAkhilesh Sanikop         return -1;
1537*09537850SAkhilesh Sanikop       }
1538*09537850SAkhilesh Sanikop     } while (++i < eob);
1539*09537850SAkhilesh Sanikop     MoveCoefficientsForTxWidth64(clamped_tx_height, tx_width, residual);
1540*09537850SAkhilesh Sanikop   }
1541*09537850SAkhilesh Sanikop   SetEntropyContexts(x4, y4, w4, h4, plane, std::min(4, coefficient_level),
1542*09537850SAkhilesh Sanikop                      dc_category);
1543*09537850SAkhilesh Sanikop   if (split_parse_and_decode_) {
1544*09537850SAkhilesh Sanikop     *block.residual += tx_width * tx_height * residual_size_;
1545*09537850SAkhilesh Sanikop   }
1546*09537850SAkhilesh Sanikop   return eob;
1547*09537850SAkhilesh Sanikop }
1548*09537850SAkhilesh Sanikop 
1549*09537850SAkhilesh Sanikop // CALL_BITDEPTH_FUNCTION is a macro that calls the appropriate template
1550*09537850SAkhilesh Sanikop // |function| depending on the value of |sequence_header_.color_config.bitdepth|
1551*09537850SAkhilesh Sanikop // with the variadic arguments.
1552*09537850SAkhilesh Sanikop #if LIBGAV1_MAX_BITDEPTH >= 10
1553*09537850SAkhilesh Sanikop #define CALL_BITDEPTH_FUNCTION(function, ...)         \
1554*09537850SAkhilesh Sanikop   do {                                                \
1555*09537850SAkhilesh Sanikop     if (sequence_header_.color_config.bitdepth > 8) { \
1556*09537850SAkhilesh Sanikop       function<uint16_t>(__VA_ARGS__);                \
1557*09537850SAkhilesh Sanikop     } else {                                          \
1558*09537850SAkhilesh Sanikop       function<uint8_t>(__VA_ARGS__);                 \
1559*09537850SAkhilesh Sanikop     }                                                 \
1560*09537850SAkhilesh Sanikop   } while (false)
1561*09537850SAkhilesh Sanikop #else
1562*09537850SAkhilesh Sanikop #define CALL_BITDEPTH_FUNCTION(function, ...) \
1563*09537850SAkhilesh Sanikop   do {                                        \
1564*09537850SAkhilesh Sanikop     function<uint8_t>(__VA_ARGS__);           \
1565*09537850SAkhilesh Sanikop   } while (false)
1566*09537850SAkhilesh Sanikop #endif
1567*09537850SAkhilesh Sanikop 
TransformBlock(const Block & block,Plane plane,int base_x,int base_y,TransformSize tx_size,int x,int y,ProcessingMode mode)1568*09537850SAkhilesh Sanikop bool Tile::TransformBlock(const Block& block, Plane plane, int base_x,
1569*09537850SAkhilesh Sanikop                           int base_y, TransformSize tx_size, int x, int y,
1570*09537850SAkhilesh Sanikop                           ProcessingMode mode) {
1571*09537850SAkhilesh Sanikop   BlockParameters& bp = *block.bp;
1572*09537850SAkhilesh Sanikop   const int subsampling_x = subsampling_x_[plane];
1573*09537850SAkhilesh Sanikop   const int subsampling_y = subsampling_y_[plane];
1574*09537850SAkhilesh Sanikop   const int start_x = base_x + MultiplyBy4(x);
1575*09537850SAkhilesh Sanikop   const int start_y = base_y + MultiplyBy4(y);
1576*09537850SAkhilesh Sanikop   const int max_x = MultiplyBy4(frame_header_.columns4x4) >> subsampling_x;
1577*09537850SAkhilesh Sanikop   const int max_y = MultiplyBy4(frame_header_.rows4x4) >> subsampling_y;
1578*09537850SAkhilesh Sanikop   if (start_x >= max_x || start_y >= max_y) return true;
1579*09537850SAkhilesh Sanikop   const int row = DivideBy4(start_y << subsampling_y);
1580*09537850SAkhilesh Sanikop   const int column = DivideBy4(start_x << subsampling_x);
1581*09537850SAkhilesh Sanikop   const int mask = sequence_header_.use_128x128_superblock ? 31 : 15;
1582*09537850SAkhilesh Sanikop   const int sub_block_row4x4 = row & mask;
1583*09537850SAkhilesh Sanikop   const int sub_block_column4x4 = column & mask;
1584*09537850SAkhilesh Sanikop   const int step_x = kTransformWidth4x4[tx_size];
1585*09537850SAkhilesh Sanikop   const int step_y = kTransformHeight4x4[tx_size];
1586*09537850SAkhilesh Sanikop   const bool do_decode = mode == kProcessingModeDecodeOnly ||
1587*09537850SAkhilesh Sanikop                          mode == kProcessingModeParseAndDecode;
1588*09537850SAkhilesh Sanikop   if (do_decode && !bp.is_inter) {
1589*09537850SAkhilesh Sanikop     if (bp.prediction_parameters->palette_mode_info.size[GetPlaneType(plane)] >
1590*09537850SAkhilesh Sanikop         0) {
1591*09537850SAkhilesh Sanikop       CALL_BITDEPTH_FUNCTION(PalettePrediction, block, plane, start_x, start_y,
1592*09537850SAkhilesh Sanikop                              x, y, tx_size);
1593*09537850SAkhilesh Sanikop     } else {
1594*09537850SAkhilesh Sanikop       const PredictionMode mode =
1595*09537850SAkhilesh Sanikop           (plane == kPlaneY) ? bp.y_mode
1596*09537850SAkhilesh Sanikop                              : (bp.prediction_parameters->uv_mode ==
1597*09537850SAkhilesh Sanikop                                         kPredictionModeChromaFromLuma
1598*09537850SAkhilesh Sanikop                                     ? kPredictionModeDc
1599*09537850SAkhilesh Sanikop                                     : bp.prediction_parameters->uv_mode);
1600*09537850SAkhilesh Sanikop       const int tr_row4x4 = (sub_block_row4x4 >> subsampling_y);
1601*09537850SAkhilesh Sanikop       const int tr_column4x4 =
1602*09537850SAkhilesh Sanikop           (sub_block_column4x4 >> subsampling_x) + step_x + 1;
1603*09537850SAkhilesh Sanikop       const int bl_row4x4 = (sub_block_row4x4 >> subsampling_y) + step_y + 1;
1604*09537850SAkhilesh Sanikop       const int bl_column4x4 = (sub_block_column4x4 >> subsampling_x);
1605*09537850SAkhilesh Sanikop       const bool has_left = x > 0 || block.left_available[plane];
1606*09537850SAkhilesh Sanikop       const bool has_top = y > 0 || block.top_available[plane];
1607*09537850SAkhilesh Sanikop 
1608*09537850SAkhilesh Sanikop       CALL_BITDEPTH_FUNCTION(
1609*09537850SAkhilesh Sanikop           IntraPrediction, block, plane, start_x, start_y, has_left, has_top,
1610*09537850SAkhilesh Sanikop           block.scratch_buffer->block_decoded[plane][tr_row4x4][tr_column4x4],
1611*09537850SAkhilesh Sanikop           block.scratch_buffer->block_decoded[plane][bl_row4x4][bl_column4x4],
1612*09537850SAkhilesh Sanikop           mode, tx_size);
1613*09537850SAkhilesh Sanikop       if (plane != kPlaneY &&
1614*09537850SAkhilesh Sanikop           bp.prediction_parameters->uv_mode == kPredictionModeChromaFromLuma) {
1615*09537850SAkhilesh Sanikop         CALL_BITDEPTH_FUNCTION(ChromaFromLumaPrediction, block, plane, start_x,
1616*09537850SAkhilesh Sanikop                                start_y, tx_size);
1617*09537850SAkhilesh Sanikop       }
1618*09537850SAkhilesh Sanikop     }
1619*09537850SAkhilesh Sanikop     if (plane == kPlaneY) {
1620*09537850SAkhilesh Sanikop       block.bp->prediction_parameters->max_luma_width =
1621*09537850SAkhilesh Sanikop           start_x + MultiplyBy4(step_x);
1622*09537850SAkhilesh Sanikop       block.bp->prediction_parameters->max_luma_height =
1623*09537850SAkhilesh Sanikop           start_y + MultiplyBy4(step_y);
1624*09537850SAkhilesh Sanikop       block.scratch_buffer->cfl_luma_buffer_valid = false;
1625*09537850SAkhilesh Sanikop     }
1626*09537850SAkhilesh Sanikop   }
1627*09537850SAkhilesh Sanikop   if (!bp.skip) {
1628*09537850SAkhilesh Sanikop     const int sb_row_index = SuperBlockRowIndex(block.row4x4);
1629*09537850SAkhilesh Sanikop     const int sb_column_index = SuperBlockColumnIndex(block.column4x4);
1630*09537850SAkhilesh Sanikop     if (mode == kProcessingModeDecodeOnly) {
1631*09537850SAkhilesh Sanikop       Queue<TransformParameters>& tx_params =
1632*09537850SAkhilesh Sanikop           *residual_buffer_threaded_[sb_row_index][sb_column_index]
1633*09537850SAkhilesh Sanikop                ->transform_parameters();
1634*09537850SAkhilesh Sanikop       ReconstructBlock(block, plane, start_x, start_y, tx_size,
1635*09537850SAkhilesh Sanikop                        tx_params.Front().type,
1636*09537850SAkhilesh Sanikop                        tx_params.Front().non_zero_coeff_count);
1637*09537850SAkhilesh Sanikop       tx_params.Pop();
1638*09537850SAkhilesh Sanikop     } else {
1639*09537850SAkhilesh Sanikop       TransformType tx_type;
1640*09537850SAkhilesh Sanikop       int non_zero_coeff_count;
1641*09537850SAkhilesh Sanikop #if LIBGAV1_MAX_BITDEPTH >= 10
1642*09537850SAkhilesh Sanikop       if (sequence_header_.color_config.bitdepth > 8) {
1643*09537850SAkhilesh Sanikop         non_zero_coeff_count = ReadTransformCoefficients<int32_t>(
1644*09537850SAkhilesh Sanikop             block, plane, start_x, start_y, tx_size, &tx_type);
1645*09537850SAkhilesh Sanikop       } else  // NOLINT
1646*09537850SAkhilesh Sanikop #endif
1647*09537850SAkhilesh Sanikop       {
1648*09537850SAkhilesh Sanikop         non_zero_coeff_count = ReadTransformCoefficients<int16_t>(
1649*09537850SAkhilesh Sanikop             block, plane, start_x, start_y, tx_size, &tx_type);
1650*09537850SAkhilesh Sanikop       }
1651*09537850SAkhilesh Sanikop       if (non_zero_coeff_count < 0) return false;
1652*09537850SAkhilesh Sanikop       if (mode == kProcessingModeParseAndDecode) {
1653*09537850SAkhilesh Sanikop         ReconstructBlock(block, plane, start_x, start_y, tx_size, tx_type,
1654*09537850SAkhilesh Sanikop                          non_zero_coeff_count);
1655*09537850SAkhilesh Sanikop       } else {
1656*09537850SAkhilesh Sanikop         assert(mode == kProcessingModeParseOnly);
1657*09537850SAkhilesh Sanikop         residual_buffer_threaded_[sb_row_index][sb_column_index]
1658*09537850SAkhilesh Sanikop             ->transform_parameters()
1659*09537850SAkhilesh Sanikop             ->Push(TransformParameters(tx_type, non_zero_coeff_count));
1660*09537850SAkhilesh Sanikop       }
1661*09537850SAkhilesh Sanikop     }
1662*09537850SAkhilesh Sanikop   }
1663*09537850SAkhilesh Sanikop   if (do_decode) {
1664*09537850SAkhilesh Sanikop     bool* block_decoded =
1665*09537850SAkhilesh Sanikop         &block.scratch_buffer
1666*09537850SAkhilesh Sanikop              ->block_decoded[plane][(sub_block_row4x4 >> subsampling_y) + 1]
1667*09537850SAkhilesh Sanikop                             [(sub_block_column4x4 >> subsampling_x) + 1];
1668*09537850SAkhilesh Sanikop     SetBlockValues<bool>(step_y, step_x, true, block_decoded,
1669*09537850SAkhilesh Sanikop                          TileScratchBuffer::kBlockDecodedStride);
1670*09537850SAkhilesh Sanikop   }
1671*09537850SAkhilesh Sanikop   return true;
1672*09537850SAkhilesh Sanikop }
1673*09537850SAkhilesh Sanikop 
TransformTree(const Block & block,int start_x,int start_y,BlockSize plane_size,ProcessingMode mode)1674*09537850SAkhilesh Sanikop bool Tile::TransformTree(const Block& block, int start_x, int start_y,
1675*09537850SAkhilesh Sanikop                          BlockSize plane_size, ProcessingMode mode) {
1676*09537850SAkhilesh Sanikop   assert(plane_size <= kBlock64x64);
1677*09537850SAkhilesh Sanikop   // Branching factor is 4; Maximum Depth is 4; So the maximum stack size
1678*09537850SAkhilesh Sanikop   // required is (4 - 1) * 4 + 1 = 13.
1679*09537850SAkhilesh Sanikop   Stack<TransformTreeNode, 13> stack;
1680*09537850SAkhilesh Sanikop   // It is okay to cast BlockSize to TransformSize here since the enum are
1681*09537850SAkhilesh Sanikop   // equivalent for all BlockSize values <= kBlock64x64.
1682*09537850SAkhilesh Sanikop   stack.Push(TransformTreeNode(start_x, start_y,
1683*09537850SAkhilesh Sanikop                                static_cast<TransformSize>(plane_size)));
1684*09537850SAkhilesh Sanikop 
1685*09537850SAkhilesh Sanikop   do {
1686*09537850SAkhilesh Sanikop     TransformTreeNode node = stack.Pop();
1687*09537850SAkhilesh Sanikop     const int row = DivideBy4(node.y);
1688*09537850SAkhilesh Sanikop     const int column = DivideBy4(node.x);
1689*09537850SAkhilesh Sanikop     if (row >= frame_header_.rows4x4 || column >= frame_header_.columns4x4) {
1690*09537850SAkhilesh Sanikop       continue;
1691*09537850SAkhilesh Sanikop     }
1692*09537850SAkhilesh Sanikop     const TransformSize inter_tx_size = inter_transform_sizes_[row][column];
1693*09537850SAkhilesh Sanikop     const int width = kTransformWidth[node.tx_size];
1694*09537850SAkhilesh Sanikop     const int height = kTransformHeight[node.tx_size];
1695*09537850SAkhilesh Sanikop     if (width <= kTransformWidth[inter_tx_size] &&
1696*09537850SAkhilesh Sanikop         height <= kTransformHeight[inter_tx_size]) {
1697*09537850SAkhilesh Sanikop       if (!TransformBlock(block, kPlaneY, node.x, node.y, node.tx_size, 0, 0,
1698*09537850SAkhilesh Sanikop                           mode)) {
1699*09537850SAkhilesh Sanikop         return false;
1700*09537850SAkhilesh Sanikop       }
1701*09537850SAkhilesh Sanikop       continue;
1702*09537850SAkhilesh Sanikop     }
1703*09537850SAkhilesh Sanikop     // The split transform size look up gives the right transform size that we
1704*09537850SAkhilesh Sanikop     // should push in the stack.
1705*09537850SAkhilesh Sanikop     //   if (width > height) => transform size whose width is half.
1706*09537850SAkhilesh Sanikop     //   if (width < height) => transform size whose height is half.
1707*09537850SAkhilesh Sanikop     //   if (width == height) => transform size whose width and height are half.
1708*09537850SAkhilesh Sanikop     const TransformSize split_tx_size = kSplitTransformSize[node.tx_size];
1709*09537850SAkhilesh Sanikop     const int half_width = DivideBy2(width);
1710*09537850SAkhilesh Sanikop     if (width > height) {
1711*09537850SAkhilesh Sanikop       stack.Push(TransformTreeNode(node.x + half_width, node.y, split_tx_size));
1712*09537850SAkhilesh Sanikop       stack.Push(TransformTreeNode(node.x, node.y, split_tx_size));
1713*09537850SAkhilesh Sanikop       continue;
1714*09537850SAkhilesh Sanikop     }
1715*09537850SAkhilesh Sanikop     const int half_height = DivideBy2(height);
1716*09537850SAkhilesh Sanikop     if (width < height) {
1717*09537850SAkhilesh Sanikop       stack.Push(
1718*09537850SAkhilesh Sanikop           TransformTreeNode(node.x, node.y + half_height, split_tx_size));
1719*09537850SAkhilesh Sanikop       stack.Push(TransformTreeNode(node.x, node.y, split_tx_size));
1720*09537850SAkhilesh Sanikop       continue;
1721*09537850SAkhilesh Sanikop     }
1722*09537850SAkhilesh Sanikop     stack.Push(TransformTreeNode(node.x + half_width, node.y + half_height,
1723*09537850SAkhilesh Sanikop                                  split_tx_size));
1724*09537850SAkhilesh Sanikop     stack.Push(TransformTreeNode(node.x, node.y + half_height, split_tx_size));
1725*09537850SAkhilesh Sanikop     stack.Push(TransformTreeNode(node.x + half_width, node.y, split_tx_size));
1726*09537850SAkhilesh Sanikop     stack.Push(TransformTreeNode(node.x, node.y, split_tx_size));
1727*09537850SAkhilesh Sanikop   } while (!stack.Empty());
1728*09537850SAkhilesh Sanikop   return true;
1729*09537850SAkhilesh Sanikop }
1730*09537850SAkhilesh Sanikop 
ReconstructBlock(const Block & block,Plane plane,int start_x,int start_y,TransformSize tx_size,TransformType tx_type,int non_zero_coeff_count)1731*09537850SAkhilesh Sanikop void Tile::ReconstructBlock(const Block& block, Plane plane, int start_x,
1732*09537850SAkhilesh Sanikop                             int start_y, TransformSize tx_size,
1733*09537850SAkhilesh Sanikop                             TransformType tx_type, int non_zero_coeff_count) {
1734*09537850SAkhilesh Sanikop   // Reconstruction process. Steps 2 and 3 of Section 7.12.3 in the spec.
1735*09537850SAkhilesh Sanikop   assert(non_zero_coeff_count >= 0);
1736*09537850SAkhilesh Sanikop   if (non_zero_coeff_count == 0) return;
1737*09537850SAkhilesh Sanikop #if LIBGAV1_MAX_BITDEPTH >= 10
1738*09537850SAkhilesh Sanikop   if (sequence_header_.color_config.bitdepth > 8) {
1739*09537850SAkhilesh Sanikop     Array2DView<uint16_t> buffer(
1740*09537850SAkhilesh Sanikop         buffer_[plane].rows(), buffer_[plane].columns() / sizeof(uint16_t),
1741*09537850SAkhilesh Sanikop         reinterpret_cast<uint16_t*>(&buffer_[plane][0][0]));
1742*09537850SAkhilesh Sanikop     Reconstruct(dsp_, tx_type, tx_size,
1743*09537850SAkhilesh Sanikop                 frame_header_.segmentation
1744*09537850SAkhilesh Sanikop                     .lossless[block.bp->prediction_parameters->segment_id],
1745*09537850SAkhilesh Sanikop                 reinterpret_cast<int32_t*>(*block.residual), start_x, start_y,
1746*09537850SAkhilesh Sanikop                 &buffer, non_zero_coeff_count);
1747*09537850SAkhilesh Sanikop   } else  // NOLINT
1748*09537850SAkhilesh Sanikop #endif
1749*09537850SAkhilesh Sanikop   {
1750*09537850SAkhilesh Sanikop     Reconstruct(dsp_, tx_type, tx_size,
1751*09537850SAkhilesh Sanikop                 frame_header_.segmentation
1752*09537850SAkhilesh Sanikop                     .lossless[block.bp->prediction_parameters->segment_id],
1753*09537850SAkhilesh Sanikop                 reinterpret_cast<int16_t*>(*block.residual), start_x, start_y,
1754*09537850SAkhilesh Sanikop                 &buffer_[plane], non_zero_coeff_count);
1755*09537850SAkhilesh Sanikop   }
1756*09537850SAkhilesh Sanikop   if (split_parse_and_decode_) {
1757*09537850SAkhilesh Sanikop     *block.residual +=
1758*09537850SAkhilesh Sanikop         kTransformWidth[tx_size] * kTransformHeight[tx_size] * residual_size_;
1759*09537850SAkhilesh Sanikop   }
1760*09537850SAkhilesh Sanikop }
1761*09537850SAkhilesh Sanikop 
Residual(const Block & block,ProcessingMode mode)1762*09537850SAkhilesh Sanikop bool Tile::Residual(const Block& block, ProcessingMode mode) {
1763*09537850SAkhilesh Sanikop   const int width_chunks = std::max(1, block.width >> 6);
1764*09537850SAkhilesh Sanikop   const int height_chunks = std::max(1, block.height >> 6);
1765*09537850SAkhilesh Sanikop   const BlockSize size_chunk4x4 =
1766*09537850SAkhilesh Sanikop       (width_chunks > 1 || height_chunks > 1) ? kBlock64x64 : block.size;
1767*09537850SAkhilesh Sanikop   const BlockParameters& bp = *block.bp;
1768*09537850SAkhilesh Sanikop   for (int chunk_y = 0; chunk_y < height_chunks; ++chunk_y) {
1769*09537850SAkhilesh Sanikop     for (int chunk_x = 0; chunk_x < width_chunks; ++chunk_x) {
1770*09537850SAkhilesh Sanikop       const int num_planes = block.HasChroma() ? PlaneCount() : 1;
1771*09537850SAkhilesh Sanikop       int plane = kPlaneY;
1772*09537850SAkhilesh Sanikop       do {
1773*09537850SAkhilesh Sanikop         const int subsampling_x = subsampling_x_[plane];
1774*09537850SAkhilesh Sanikop         const int subsampling_y = subsampling_y_[plane];
1775*09537850SAkhilesh Sanikop         // For Y Plane, when lossless is true |bp.transform_size| is always
1776*09537850SAkhilesh Sanikop         // kTransformSize4x4. So we can simply use |bp.transform_size| here as
1777*09537850SAkhilesh Sanikop         // the Y plane's transform size (part of Section 5.11.37 in the spec).
1778*09537850SAkhilesh Sanikop         const TransformSize tx_size =
1779*09537850SAkhilesh Sanikop             (plane == kPlaneY)
1780*09537850SAkhilesh Sanikop                 ? inter_transform_sizes_[block.row4x4][block.column4x4]
1781*09537850SAkhilesh Sanikop                 : bp.uv_transform_size;
1782*09537850SAkhilesh Sanikop         const BlockSize plane_size =
1783*09537850SAkhilesh Sanikop             kPlaneResidualSize[size_chunk4x4][subsampling_x][subsampling_y];
1784*09537850SAkhilesh Sanikop         assert(plane_size != kBlockInvalid);
1785*09537850SAkhilesh Sanikop         if (bp.is_inter &&
1786*09537850SAkhilesh Sanikop             !frame_header_.segmentation
1787*09537850SAkhilesh Sanikop                  .lossless[bp.prediction_parameters->segment_id] &&
1788*09537850SAkhilesh Sanikop             plane == kPlaneY) {
1789*09537850SAkhilesh Sanikop           const int row_chunk4x4 = block.row4x4 + MultiplyBy16(chunk_y);
1790*09537850SAkhilesh Sanikop           const int column_chunk4x4 = block.column4x4 + MultiplyBy16(chunk_x);
1791*09537850SAkhilesh Sanikop           const int base_x = MultiplyBy4(column_chunk4x4 >> subsampling_x);
1792*09537850SAkhilesh Sanikop           const int base_y = MultiplyBy4(row_chunk4x4 >> subsampling_y);
1793*09537850SAkhilesh Sanikop           if (!TransformTree(block, base_x, base_y, plane_size, mode)) {
1794*09537850SAkhilesh Sanikop             return false;
1795*09537850SAkhilesh Sanikop           }
1796*09537850SAkhilesh Sanikop         } else {
1797*09537850SAkhilesh Sanikop           const int base_x = MultiplyBy4(block.column4x4 >> subsampling_x);
1798*09537850SAkhilesh Sanikop           const int base_y = MultiplyBy4(block.row4x4 >> subsampling_y);
1799*09537850SAkhilesh Sanikop           const int step_x = kTransformWidth4x4[tx_size];
1800*09537850SAkhilesh Sanikop           const int step_y = kTransformHeight4x4[tx_size];
1801*09537850SAkhilesh Sanikop           const int num4x4_wide = kNum4x4BlocksWide[plane_size];
1802*09537850SAkhilesh Sanikop           const int num4x4_high = kNum4x4BlocksHigh[plane_size];
1803*09537850SAkhilesh Sanikop           for (int y = 0; y < num4x4_high; y += step_y) {
1804*09537850SAkhilesh Sanikop             for (int x = 0; x < num4x4_wide; x += step_x) {
1805*09537850SAkhilesh Sanikop               if (!TransformBlock(
1806*09537850SAkhilesh Sanikop                       block, static_cast<Plane>(plane), base_x, base_y, tx_size,
1807*09537850SAkhilesh Sanikop                       x + (MultiplyBy16(chunk_x) >> subsampling_x),
1808*09537850SAkhilesh Sanikop                       y + (MultiplyBy16(chunk_y) >> subsampling_y), mode)) {
1809*09537850SAkhilesh Sanikop                 return false;
1810*09537850SAkhilesh Sanikop               }
1811*09537850SAkhilesh Sanikop             }
1812*09537850SAkhilesh Sanikop           }
1813*09537850SAkhilesh Sanikop         }
1814*09537850SAkhilesh Sanikop       } while (++plane < num_planes);
1815*09537850SAkhilesh Sanikop     }
1816*09537850SAkhilesh Sanikop   }
1817*09537850SAkhilesh Sanikop   return true;
1818*09537850SAkhilesh Sanikop }
1819*09537850SAkhilesh Sanikop 
1820*09537850SAkhilesh Sanikop // The purpose of this function is to limit the maximum size of motion vectors
1821*09537850SAkhilesh Sanikop // and also, if use_intra_block_copy is true, to additionally constrain the
1822*09537850SAkhilesh Sanikop // motion vector so that the data is fetched from parts of the tile that have
1823*09537850SAkhilesh Sanikop // already been decoded and are not too close to the current block (in order to
1824*09537850SAkhilesh Sanikop // make a pipelined decoder implementation feasible).
IsMvValid(const Block & block,bool is_compound) const1825*09537850SAkhilesh Sanikop bool Tile::IsMvValid(const Block& block, bool is_compound) const {
1826*09537850SAkhilesh Sanikop   const BlockParameters& bp = *block.bp;
1827*09537850SAkhilesh Sanikop   for (int i = 0; i < 1 + static_cast<int>(is_compound); ++i) {
1828*09537850SAkhilesh Sanikop     for (int mv_component : bp.mv.mv[i].mv) {
1829*09537850SAkhilesh Sanikop       if (std::abs(mv_component) >= (1 << 14)) {
1830*09537850SAkhilesh Sanikop         return false;
1831*09537850SAkhilesh Sanikop       }
1832*09537850SAkhilesh Sanikop     }
1833*09537850SAkhilesh Sanikop   }
1834*09537850SAkhilesh Sanikop   if (!block.bp->prediction_parameters->use_intra_block_copy) {
1835*09537850SAkhilesh Sanikop     return true;
1836*09537850SAkhilesh Sanikop   }
1837*09537850SAkhilesh Sanikop   if ((bp.mv.mv[0].mv32 & 0x00070007) != 0) {
1838*09537850SAkhilesh Sanikop     return false;
1839*09537850SAkhilesh Sanikop   }
1840*09537850SAkhilesh Sanikop   const int delta_row = bp.mv.mv[0].mv[0] >> 3;
1841*09537850SAkhilesh Sanikop   const int delta_column = bp.mv.mv[0].mv[1] >> 3;
1842*09537850SAkhilesh Sanikop   int src_top_edge = MultiplyBy4(block.row4x4) + delta_row;
1843*09537850SAkhilesh Sanikop   int src_left_edge = MultiplyBy4(block.column4x4) + delta_column;
1844*09537850SAkhilesh Sanikop   const int src_bottom_edge = src_top_edge + block.height;
1845*09537850SAkhilesh Sanikop   const int src_right_edge = src_left_edge + block.width;
1846*09537850SAkhilesh Sanikop   if (block.HasChroma()) {
1847*09537850SAkhilesh Sanikop     if (block.width < 8 && subsampling_x_[kPlaneU] != 0) {
1848*09537850SAkhilesh Sanikop       src_left_edge -= 4;
1849*09537850SAkhilesh Sanikop     }
1850*09537850SAkhilesh Sanikop     if (block.height < 8 && subsampling_y_[kPlaneU] != 0) {
1851*09537850SAkhilesh Sanikop       src_top_edge -= 4;
1852*09537850SAkhilesh Sanikop     }
1853*09537850SAkhilesh Sanikop   }
1854*09537850SAkhilesh Sanikop   if (src_top_edge < MultiplyBy4(row4x4_start_) ||
1855*09537850SAkhilesh Sanikop       src_left_edge < MultiplyBy4(column4x4_start_) ||
1856*09537850SAkhilesh Sanikop       src_bottom_edge > MultiplyBy4(row4x4_end_) ||
1857*09537850SAkhilesh Sanikop       src_right_edge > MultiplyBy4(column4x4_end_)) {
1858*09537850SAkhilesh Sanikop     return false;
1859*09537850SAkhilesh Sanikop   }
1860*09537850SAkhilesh Sanikop   // sb_height_log2 = use_128x128_superblock ? log2(128) : log2(64)
1861*09537850SAkhilesh Sanikop   const int sb_height_log2 =
1862*09537850SAkhilesh Sanikop       6 + static_cast<int>(sequence_header_.use_128x128_superblock);
1863*09537850SAkhilesh Sanikop   const int active_sb_row = MultiplyBy4(block.row4x4) >> sb_height_log2;
1864*09537850SAkhilesh Sanikop   const int active_64x64_block_column = MultiplyBy4(block.column4x4) >> 6;
1865*09537850SAkhilesh Sanikop   const int src_sb_row = (src_bottom_edge - 1) >> sb_height_log2;
1866*09537850SAkhilesh Sanikop   const int src_64x64_block_column = (src_right_edge - 1) >> 6;
1867*09537850SAkhilesh Sanikop   const int total_64x64_blocks_per_row =
1868*09537850SAkhilesh Sanikop       ((column4x4_end_ - column4x4_start_ - 1) >> 4) + 1;
1869*09537850SAkhilesh Sanikop   const int active_64x64_block =
1870*09537850SAkhilesh Sanikop       active_sb_row * total_64x64_blocks_per_row + active_64x64_block_column;
1871*09537850SAkhilesh Sanikop   const int src_64x64_block =
1872*09537850SAkhilesh Sanikop       src_sb_row * total_64x64_blocks_per_row + src_64x64_block_column;
1873*09537850SAkhilesh Sanikop   if (src_64x64_block >= active_64x64_block - kIntraBlockCopyDelay64x64Blocks) {
1874*09537850SAkhilesh Sanikop     return false;
1875*09537850SAkhilesh Sanikop   }
1876*09537850SAkhilesh Sanikop 
1877*09537850SAkhilesh Sanikop   // Wavefront constraint: use only top left area of frame for reference.
1878*09537850SAkhilesh Sanikop   if (src_sb_row > active_sb_row) return false;
1879*09537850SAkhilesh Sanikop   const int gradient =
1880*09537850SAkhilesh Sanikop       1 + kIntraBlockCopyDelay64x64Blocks +
1881*09537850SAkhilesh Sanikop       static_cast<int>(sequence_header_.use_128x128_superblock);
1882*09537850SAkhilesh Sanikop   const int wavefront_offset = gradient * (active_sb_row - src_sb_row);
1883*09537850SAkhilesh Sanikop   return src_64x64_block_column < active_64x64_block_column -
1884*09537850SAkhilesh Sanikop                                       kIntraBlockCopyDelay64x64Blocks +
1885*09537850SAkhilesh Sanikop                                       wavefront_offset;
1886*09537850SAkhilesh Sanikop }
1887*09537850SAkhilesh Sanikop 
AssignInterMv(const Block & block,bool is_compound)1888*09537850SAkhilesh Sanikop bool Tile::AssignInterMv(const Block& block, bool is_compound) {
1889*09537850SAkhilesh Sanikop   int min[2];
1890*09537850SAkhilesh Sanikop   int max[2];
1891*09537850SAkhilesh Sanikop   GetClampParameters(block, min, max);
1892*09537850SAkhilesh Sanikop   BlockParameters& bp = *block.bp;
1893*09537850SAkhilesh Sanikop   const PredictionParameters& prediction_parameters = *bp.prediction_parameters;
1894*09537850SAkhilesh Sanikop   bp.mv.mv64 = 0;
1895*09537850SAkhilesh Sanikop   if (is_compound) {
1896*09537850SAkhilesh Sanikop     for (int i = 0; i < 2; ++i) {
1897*09537850SAkhilesh Sanikop       const PredictionMode mode = GetSinglePredictionMode(i, bp.y_mode);
1898*09537850SAkhilesh Sanikop       MotionVector predicted_mv;
1899*09537850SAkhilesh Sanikop       if (mode == kPredictionModeGlobalMv) {
1900*09537850SAkhilesh Sanikop         predicted_mv = prediction_parameters.global_mv[i];
1901*09537850SAkhilesh Sanikop       } else {
1902*09537850SAkhilesh Sanikop         const int ref_mv_index = (mode == kPredictionModeNearestMv ||
1903*09537850SAkhilesh Sanikop                                   (mode == kPredictionModeNewMv &&
1904*09537850SAkhilesh Sanikop                                    prediction_parameters.ref_mv_count <= 1))
1905*09537850SAkhilesh Sanikop                                      ? 0
1906*09537850SAkhilesh Sanikop                                      : prediction_parameters.ref_mv_index;
1907*09537850SAkhilesh Sanikop         predicted_mv = prediction_parameters.reference_mv(ref_mv_index, i);
1908*09537850SAkhilesh Sanikop         if (ref_mv_index < prediction_parameters.ref_mv_count) {
1909*09537850SAkhilesh Sanikop           predicted_mv.mv[0] = Clip3(predicted_mv.mv[0], min[0], max[0]);
1910*09537850SAkhilesh Sanikop           predicted_mv.mv[1] = Clip3(predicted_mv.mv[1], min[1], max[1]);
1911*09537850SAkhilesh Sanikop         }
1912*09537850SAkhilesh Sanikop       }
1913*09537850SAkhilesh Sanikop       if (mode == kPredictionModeNewMv) {
1914*09537850SAkhilesh Sanikop         ReadMotionVector(block, i);
1915*09537850SAkhilesh Sanikop         bp.mv.mv[i].mv[0] += predicted_mv.mv[0];
1916*09537850SAkhilesh Sanikop         bp.mv.mv[i].mv[1] += predicted_mv.mv[1];
1917*09537850SAkhilesh Sanikop       } else {
1918*09537850SAkhilesh Sanikop         bp.mv.mv[i] = predicted_mv;
1919*09537850SAkhilesh Sanikop       }
1920*09537850SAkhilesh Sanikop     }
1921*09537850SAkhilesh Sanikop   } else {
1922*09537850SAkhilesh Sanikop     const PredictionMode mode = GetSinglePredictionMode(0, bp.y_mode);
1923*09537850SAkhilesh Sanikop     MotionVector predicted_mv;
1924*09537850SAkhilesh Sanikop     if (mode == kPredictionModeGlobalMv) {
1925*09537850SAkhilesh Sanikop       predicted_mv = prediction_parameters.global_mv[0];
1926*09537850SAkhilesh Sanikop     } else {
1927*09537850SAkhilesh Sanikop       const int ref_mv_index = (mode == kPredictionModeNearestMv ||
1928*09537850SAkhilesh Sanikop                                 (mode == kPredictionModeNewMv &&
1929*09537850SAkhilesh Sanikop                                  prediction_parameters.ref_mv_count <= 1))
1930*09537850SAkhilesh Sanikop                                    ? 0
1931*09537850SAkhilesh Sanikop                                    : prediction_parameters.ref_mv_index;
1932*09537850SAkhilesh Sanikop       predicted_mv = prediction_parameters.reference_mv(ref_mv_index);
1933*09537850SAkhilesh Sanikop       if (ref_mv_index < prediction_parameters.ref_mv_count) {
1934*09537850SAkhilesh Sanikop         predicted_mv.mv[0] = Clip3(predicted_mv.mv[0], min[0], max[0]);
1935*09537850SAkhilesh Sanikop         predicted_mv.mv[1] = Clip3(predicted_mv.mv[1], min[1], max[1]);
1936*09537850SAkhilesh Sanikop       }
1937*09537850SAkhilesh Sanikop     }
1938*09537850SAkhilesh Sanikop     if (mode == kPredictionModeNewMv) {
1939*09537850SAkhilesh Sanikop       ReadMotionVector(block, 0);
1940*09537850SAkhilesh Sanikop       bp.mv.mv[0].mv[0] += predicted_mv.mv[0];
1941*09537850SAkhilesh Sanikop       bp.mv.mv[0].mv[1] += predicted_mv.mv[1];
1942*09537850SAkhilesh Sanikop     } else {
1943*09537850SAkhilesh Sanikop       bp.mv.mv[0] = predicted_mv;
1944*09537850SAkhilesh Sanikop     }
1945*09537850SAkhilesh Sanikop   }
1946*09537850SAkhilesh Sanikop   return IsMvValid(block, is_compound);
1947*09537850SAkhilesh Sanikop }
1948*09537850SAkhilesh Sanikop 
AssignIntraMv(const Block & block)1949*09537850SAkhilesh Sanikop bool Tile::AssignIntraMv(const Block& block) {
1950*09537850SAkhilesh Sanikop   // TODO(linfengz): Check if the clamping process is necessary.
1951*09537850SAkhilesh Sanikop   int min[2];
1952*09537850SAkhilesh Sanikop   int max[2];
1953*09537850SAkhilesh Sanikop   GetClampParameters(block, min, max);
1954*09537850SAkhilesh Sanikop   BlockParameters& bp = *block.bp;
1955*09537850SAkhilesh Sanikop   const PredictionParameters& prediction_parameters = *bp.prediction_parameters;
1956*09537850SAkhilesh Sanikop   const MotionVector& ref_mv_0 = prediction_parameters.reference_mv(0);
1957*09537850SAkhilesh Sanikop   bp.mv.mv64 = 0;
1958*09537850SAkhilesh Sanikop   ReadMotionVector(block, 0);
1959*09537850SAkhilesh Sanikop   if (ref_mv_0.mv32 == 0) {
1960*09537850SAkhilesh Sanikop     const MotionVector& ref_mv_1 = prediction_parameters.reference_mv(1);
1961*09537850SAkhilesh Sanikop     if (ref_mv_1.mv32 == 0) {
1962*09537850SAkhilesh Sanikop       const int super_block_size4x4 = kNum4x4BlocksHigh[SuperBlockSize()];
1963*09537850SAkhilesh Sanikop       if (block.row4x4 - super_block_size4x4 < row4x4_start_) {
1964*09537850SAkhilesh Sanikop         bp.mv.mv[0].mv[1] -= MultiplyBy32(super_block_size4x4);
1965*09537850SAkhilesh Sanikop         bp.mv.mv[0].mv[1] -= MultiplyBy8(kIntraBlockCopyDelayPixels);
1966*09537850SAkhilesh Sanikop       } else {
1967*09537850SAkhilesh Sanikop         bp.mv.mv[0].mv[0] -= MultiplyBy32(super_block_size4x4);
1968*09537850SAkhilesh Sanikop       }
1969*09537850SAkhilesh Sanikop     } else {
1970*09537850SAkhilesh Sanikop       bp.mv.mv[0].mv[0] += Clip3(ref_mv_1.mv[0], min[0], max[0]);
1971*09537850SAkhilesh Sanikop       bp.mv.mv[0].mv[1] += Clip3(ref_mv_1.mv[1], min[0], max[0]);
1972*09537850SAkhilesh Sanikop     }
1973*09537850SAkhilesh Sanikop   } else {
1974*09537850SAkhilesh Sanikop     bp.mv.mv[0].mv[0] += Clip3(ref_mv_0.mv[0], min[0], max[0]);
1975*09537850SAkhilesh Sanikop     bp.mv.mv[0].mv[1] += Clip3(ref_mv_0.mv[1], min[1], max[1]);
1976*09537850SAkhilesh Sanikop   }
1977*09537850SAkhilesh Sanikop   return IsMvValid(block, /*is_compound=*/false);
1978*09537850SAkhilesh Sanikop }
1979*09537850SAkhilesh Sanikop 
ResetEntropyContext(const Block & block)1980*09537850SAkhilesh Sanikop void Tile::ResetEntropyContext(const Block& block) {
1981*09537850SAkhilesh Sanikop   const int num_planes = block.HasChroma() ? PlaneCount() : 1;
1982*09537850SAkhilesh Sanikop   int plane = kPlaneY;
1983*09537850SAkhilesh Sanikop   do {
1984*09537850SAkhilesh Sanikop     const int subsampling_x = subsampling_x_[plane];
1985*09537850SAkhilesh Sanikop     const int start_x = block.column4x4 >> subsampling_x;
1986*09537850SAkhilesh Sanikop     const int end_x =
1987*09537850SAkhilesh Sanikop         std::min((block.column4x4 + block.width4x4) >> subsampling_x,
1988*09537850SAkhilesh Sanikop                  frame_header_.columns4x4);
1989*09537850SAkhilesh Sanikop     memset(&coefficient_levels_[kEntropyContextTop][plane][start_x], 0,
1990*09537850SAkhilesh Sanikop            end_x - start_x);
1991*09537850SAkhilesh Sanikop     memset(&dc_categories_[kEntropyContextTop][plane][start_x], 0,
1992*09537850SAkhilesh Sanikop            end_x - start_x);
1993*09537850SAkhilesh Sanikop     const int subsampling_y = subsampling_y_[plane];
1994*09537850SAkhilesh Sanikop     const int start_y = block.row4x4 >> subsampling_y;
1995*09537850SAkhilesh Sanikop     const int end_y =
1996*09537850SAkhilesh Sanikop         std::min((block.row4x4 + block.height4x4) >> subsampling_y,
1997*09537850SAkhilesh Sanikop                  frame_header_.rows4x4);
1998*09537850SAkhilesh Sanikop     memset(&coefficient_levels_[kEntropyContextLeft][plane][start_y], 0,
1999*09537850SAkhilesh Sanikop            end_y - start_y);
2000*09537850SAkhilesh Sanikop     memset(&dc_categories_[kEntropyContextLeft][plane][start_y], 0,
2001*09537850SAkhilesh Sanikop            end_y - start_y);
2002*09537850SAkhilesh Sanikop   } while (++plane < num_planes);
2003*09537850SAkhilesh Sanikop }
2004*09537850SAkhilesh Sanikop 
ComputePrediction(const Block & block)2005*09537850SAkhilesh Sanikop bool Tile::ComputePrediction(const Block& block) {
2006*09537850SAkhilesh Sanikop   const BlockParameters& bp = *block.bp;
2007*09537850SAkhilesh Sanikop   if (!bp.is_inter) return true;
2008*09537850SAkhilesh Sanikop   const int mask =
2009*09537850SAkhilesh Sanikop       (1 << (4 + static_cast<int>(sequence_header_.use_128x128_superblock))) -
2010*09537850SAkhilesh Sanikop       1;
2011*09537850SAkhilesh Sanikop   const int sub_block_row4x4 = block.row4x4 & mask;
2012*09537850SAkhilesh Sanikop   const int sub_block_column4x4 = block.column4x4 & mask;
2013*09537850SAkhilesh Sanikop   const int plane_count = block.HasChroma() ? PlaneCount() : 1;
2014*09537850SAkhilesh Sanikop   // Returns true if this block applies local warping. The state is determined
2015*09537850SAkhilesh Sanikop   // in the Y plane and carried for use in the U/V planes.
2016*09537850SAkhilesh Sanikop   // But the U/V planes will not apply warping when the block size is smaller
2017*09537850SAkhilesh Sanikop   // than 8x8, even if this variable is true.
2018*09537850SAkhilesh Sanikop   bool is_local_valid = false;
2019*09537850SAkhilesh Sanikop   // Local warping parameters, similar usage as is_local_valid.
2020*09537850SAkhilesh Sanikop   GlobalMotion local_warp_params;
2021*09537850SAkhilesh Sanikop   int plane = kPlaneY;
2022*09537850SAkhilesh Sanikop   do {
2023*09537850SAkhilesh Sanikop     const int8_t subsampling_x = subsampling_x_[plane];
2024*09537850SAkhilesh Sanikop     const int8_t subsampling_y = subsampling_y_[plane];
2025*09537850SAkhilesh Sanikop     const BlockSize plane_size = block.residual_size[plane];
2026*09537850SAkhilesh Sanikop     const int block_width4x4 = kNum4x4BlocksWide[plane_size];
2027*09537850SAkhilesh Sanikop     const int block_height4x4 = kNum4x4BlocksHigh[plane_size];
2028*09537850SAkhilesh Sanikop     const int block_width = MultiplyBy4(block_width4x4);
2029*09537850SAkhilesh Sanikop     const int block_height = MultiplyBy4(block_height4x4);
2030*09537850SAkhilesh Sanikop     const int base_x = MultiplyBy4(block.column4x4 >> subsampling_x);
2031*09537850SAkhilesh Sanikop     const int base_y = MultiplyBy4(block.row4x4 >> subsampling_y);
2032*09537850SAkhilesh Sanikop     if (bp.reference_frame[1] == kReferenceFrameIntra) {
2033*09537850SAkhilesh Sanikop       const int tr_row4x4 = sub_block_row4x4 >> subsampling_y;
2034*09537850SAkhilesh Sanikop       const int tr_column4x4 =
2035*09537850SAkhilesh Sanikop           (sub_block_column4x4 >> subsampling_x) + block_width4x4 + 1;
2036*09537850SAkhilesh Sanikop       const int bl_row4x4 =
2037*09537850SAkhilesh Sanikop           (sub_block_row4x4 >> subsampling_y) + block_height4x4;
2038*09537850SAkhilesh Sanikop       const int bl_column4x4 = (sub_block_column4x4 >> subsampling_x) + 1;
2039*09537850SAkhilesh Sanikop       const TransformSize tx_size =
2040*09537850SAkhilesh Sanikop           k4x4SizeToTransformSize[k4x4WidthLog2[plane_size]]
2041*09537850SAkhilesh Sanikop                                  [k4x4HeightLog2[plane_size]];
2042*09537850SAkhilesh Sanikop       const bool has_left = block.left_available[plane];
2043*09537850SAkhilesh Sanikop       const bool has_top = block.top_available[plane];
2044*09537850SAkhilesh Sanikop       CALL_BITDEPTH_FUNCTION(
2045*09537850SAkhilesh Sanikop           IntraPrediction, block, static_cast<Plane>(plane), base_x, base_y,
2046*09537850SAkhilesh Sanikop           has_left, has_top,
2047*09537850SAkhilesh Sanikop           block.scratch_buffer->block_decoded[plane][tr_row4x4][tr_column4x4],
2048*09537850SAkhilesh Sanikop           block.scratch_buffer->block_decoded[plane][bl_row4x4][bl_column4x4],
2049*09537850SAkhilesh Sanikop           kInterIntraToIntraMode[block.bp->prediction_parameters
2050*09537850SAkhilesh Sanikop                                      ->inter_intra_mode],
2051*09537850SAkhilesh Sanikop           tx_size);
2052*09537850SAkhilesh Sanikop     }
2053*09537850SAkhilesh Sanikop     int candidate_row = block.row4x4;
2054*09537850SAkhilesh Sanikop     int candidate_column = block.column4x4;
2055*09537850SAkhilesh Sanikop     bool some_use_intra = bp.reference_frame[0] == kReferenceFrameIntra;
2056*09537850SAkhilesh Sanikop     if (!some_use_intra && plane != 0) {
2057*09537850SAkhilesh Sanikop       candidate_row = (candidate_row >> subsampling_y) << subsampling_y;
2058*09537850SAkhilesh Sanikop       candidate_column = (candidate_column >> subsampling_x) << subsampling_x;
2059*09537850SAkhilesh Sanikop       if (candidate_row != block.row4x4) {
2060*09537850SAkhilesh Sanikop         // Top block.
2061*09537850SAkhilesh Sanikop         const BlockParameters& bp_top =
2062*09537850SAkhilesh Sanikop             *block_parameters_holder_.Find(candidate_row, block.column4x4);
2063*09537850SAkhilesh Sanikop         some_use_intra = bp_top.reference_frame[0] == kReferenceFrameIntra;
2064*09537850SAkhilesh Sanikop         if (!some_use_intra && candidate_column != block.column4x4) {
2065*09537850SAkhilesh Sanikop           // Top-left block.
2066*09537850SAkhilesh Sanikop           const BlockParameters& bp_top_left =
2067*09537850SAkhilesh Sanikop               *block_parameters_holder_.Find(candidate_row, candidate_column);
2068*09537850SAkhilesh Sanikop           some_use_intra =
2069*09537850SAkhilesh Sanikop               bp_top_left.reference_frame[0] == kReferenceFrameIntra;
2070*09537850SAkhilesh Sanikop         }
2071*09537850SAkhilesh Sanikop       }
2072*09537850SAkhilesh Sanikop       if (!some_use_intra && candidate_column != block.column4x4) {
2073*09537850SAkhilesh Sanikop         // Left block.
2074*09537850SAkhilesh Sanikop         const BlockParameters& bp_left =
2075*09537850SAkhilesh Sanikop             *block_parameters_holder_.Find(block.row4x4, candidate_column);
2076*09537850SAkhilesh Sanikop         some_use_intra = bp_left.reference_frame[0] == kReferenceFrameIntra;
2077*09537850SAkhilesh Sanikop       }
2078*09537850SAkhilesh Sanikop     }
2079*09537850SAkhilesh Sanikop     int prediction_width;
2080*09537850SAkhilesh Sanikop     int prediction_height;
2081*09537850SAkhilesh Sanikop     if (some_use_intra) {
2082*09537850SAkhilesh Sanikop       candidate_row = block.row4x4;
2083*09537850SAkhilesh Sanikop       candidate_column = block.column4x4;
2084*09537850SAkhilesh Sanikop       prediction_width = block_width;
2085*09537850SAkhilesh Sanikop       prediction_height = block_height;
2086*09537850SAkhilesh Sanikop     } else {
2087*09537850SAkhilesh Sanikop       prediction_width = block.width >> subsampling_x;
2088*09537850SAkhilesh Sanikop       prediction_height = block.height >> subsampling_y;
2089*09537850SAkhilesh Sanikop     }
2090*09537850SAkhilesh Sanikop     int r = 0;
2091*09537850SAkhilesh Sanikop     int y = 0;
2092*09537850SAkhilesh Sanikop     do {
2093*09537850SAkhilesh Sanikop       int c = 0;
2094*09537850SAkhilesh Sanikop       int x = 0;
2095*09537850SAkhilesh Sanikop       do {
2096*09537850SAkhilesh Sanikop         if (!InterPrediction(block, static_cast<Plane>(plane), base_x + x,
2097*09537850SAkhilesh Sanikop                              base_y + y, prediction_width, prediction_height,
2098*09537850SAkhilesh Sanikop                              candidate_row + r, candidate_column + c,
2099*09537850SAkhilesh Sanikop                              &is_local_valid, &local_warp_params)) {
2100*09537850SAkhilesh Sanikop           return false;
2101*09537850SAkhilesh Sanikop         }
2102*09537850SAkhilesh Sanikop         ++c;
2103*09537850SAkhilesh Sanikop         x += prediction_width;
2104*09537850SAkhilesh Sanikop       } while (x < block_width);
2105*09537850SAkhilesh Sanikop       ++r;
2106*09537850SAkhilesh Sanikop       y += prediction_height;
2107*09537850SAkhilesh Sanikop     } while (y < block_height);
2108*09537850SAkhilesh Sanikop   } while (++plane < plane_count);
2109*09537850SAkhilesh Sanikop   return true;
2110*09537850SAkhilesh Sanikop }
2111*09537850SAkhilesh Sanikop 
2112*09537850SAkhilesh Sanikop #undef CALL_BITDEPTH_FUNCTION
2113*09537850SAkhilesh Sanikop 
PopulateDeblockFilterLevel(const Block & block)2114*09537850SAkhilesh Sanikop void Tile::PopulateDeblockFilterLevel(const Block& block) {
2115*09537850SAkhilesh Sanikop   if (!post_filter_.DoDeblock()) return;
2116*09537850SAkhilesh Sanikop   BlockParameters& bp = *block.bp;
2117*09537850SAkhilesh Sanikop   const int mode_id =
2118*09537850SAkhilesh Sanikop       static_cast<int>(kPredictionModeDeltasMask.Contains(bp.y_mode));
2119*09537850SAkhilesh Sanikop   for (int i = 0; i < kFrameLfCount; ++i) {
2120*09537850SAkhilesh Sanikop     if (delta_lf_all_zero_) {
2121*09537850SAkhilesh Sanikop       bp.deblock_filter_level[i] = post_filter_.GetZeroDeltaDeblockFilterLevel(
2122*09537850SAkhilesh Sanikop           bp.prediction_parameters->segment_id, i, bp.reference_frame[0],
2123*09537850SAkhilesh Sanikop           mode_id);
2124*09537850SAkhilesh Sanikop     } else {
2125*09537850SAkhilesh Sanikop       bp.deblock_filter_level[i] =
2126*09537850SAkhilesh Sanikop           deblock_filter_levels_[bp.prediction_parameters->segment_id][i]
2127*09537850SAkhilesh Sanikop                                 [bp.reference_frame[0]][mode_id];
2128*09537850SAkhilesh Sanikop     }
2129*09537850SAkhilesh Sanikop   }
2130*09537850SAkhilesh Sanikop }
2131*09537850SAkhilesh Sanikop 
PopulateCdefSkip(const Block & block)2132*09537850SAkhilesh Sanikop void Tile::PopulateCdefSkip(const Block& block) {
2133*09537850SAkhilesh Sanikop   if (!post_filter_.DoCdef() || block.bp->skip ||
2134*09537850SAkhilesh Sanikop       (frame_header_.cdef.bits > 0 &&
2135*09537850SAkhilesh Sanikop        cdef_index_[DivideBy16(block.row4x4)][DivideBy16(block.column4x4)] ==
2136*09537850SAkhilesh Sanikop            -1)) {
2137*09537850SAkhilesh Sanikop     return;
2138*09537850SAkhilesh Sanikop   }
2139*09537850SAkhilesh Sanikop   // The rest of this function is an efficient version of the following code:
2140*09537850SAkhilesh Sanikop   // for (int y = block.row4x4; y < block.row4x4 + block.height4x4; y++) {
2141*09537850SAkhilesh Sanikop   //   for (int x = block.column4x4; y < block.column4x4 + block.width4x4;
2142*09537850SAkhilesh Sanikop   //        x++) {
2143*09537850SAkhilesh Sanikop   //     const uint8_t mask = uint8_t{1} << ((x >> 1) & 0x7);
2144*09537850SAkhilesh Sanikop   //     cdef_skip_[y >> 1][x >> 4] |= mask;
2145*09537850SAkhilesh Sanikop   //   }
2146*09537850SAkhilesh Sanikop   // }
2147*09537850SAkhilesh Sanikop 
2148*09537850SAkhilesh Sanikop   // For all block widths other than 32, the mask will fit in uint8_t. For
2149*09537850SAkhilesh Sanikop   // block width == 32, the mask is always 0xFFFF.
2150*09537850SAkhilesh Sanikop   const int bw4 =
2151*09537850SAkhilesh Sanikop       std::max(DivideBy2(block.width4x4) + (block.column4x4 & 1), 1);
2152*09537850SAkhilesh Sanikop   const uint8_t mask = (block.width4x4 == 32)
2153*09537850SAkhilesh Sanikop                            ? 0xFF
2154*09537850SAkhilesh Sanikop                            : (uint8_t{0xFF} >> (8 - bw4))
2155*09537850SAkhilesh Sanikop                                  << (DivideBy2(block.column4x4) & 0x7);
2156*09537850SAkhilesh Sanikop   uint8_t* cdef_skip = &cdef_skip_[block.row4x4 >> 1][block.column4x4 >> 4];
2157*09537850SAkhilesh Sanikop   const int stride = cdef_skip_.columns();
2158*09537850SAkhilesh Sanikop   int row = 0;
2159*09537850SAkhilesh Sanikop   do {
2160*09537850SAkhilesh Sanikop     *cdef_skip |= mask;
2161*09537850SAkhilesh Sanikop     if (block.width4x4 == 32) {
2162*09537850SAkhilesh Sanikop       *(cdef_skip + 1) = 0xFF;
2163*09537850SAkhilesh Sanikop     }
2164*09537850SAkhilesh Sanikop     cdef_skip += stride;
2165*09537850SAkhilesh Sanikop     row += 2;
2166*09537850SAkhilesh Sanikop   } while (row < block.height4x4);
2167*09537850SAkhilesh Sanikop }
2168*09537850SAkhilesh Sanikop 
ProcessBlock(int row4x4,int column4x4,BlockSize block_size,TileScratchBuffer * const scratch_buffer,ResidualPtr * residual)2169*09537850SAkhilesh Sanikop bool Tile::ProcessBlock(int row4x4, int column4x4, BlockSize block_size,
2170*09537850SAkhilesh Sanikop                         TileScratchBuffer* const scratch_buffer,
2171*09537850SAkhilesh Sanikop                         ResidualPtr* residual) {
2172*09537850SAkhilesh Sanikop   // Do not process the block if the starting point is beyond the visible frame.
2173*09537850SAkhilesh Sanikop   // This is equivalent to the has_row/has_column check in the
2174*09537850SAkhilesh Sanikop   // decode_partition() section of the spec when partition equals
2175*09537850SAkhilesh Sanikop   // kPartitionHorizontal or kPartitionVertical.
2176*09537850SAkhilesh Sanikop   if (row4x4 >= frame_header_.rows4x4 ||
2177*09537850SAkhilesh Sanikop       column4x4 >= frame_header_.columns4x4) {
2178*09537850SAkhilesh Sanikop     return true;
2179*09537850SAkhilesh Sanikop   }
2180*09537850SAkhilesh Sanikop 
2181*09537850SAkhilesh Sanikop   if (split_parse_and_decode_) {
2182*09537850SAkhilesh Sanikop     // Push block ordering info to the queue. DecodeBlock() will use this queue
2183*09537850SAkhilesh Sanikop     // to decode the blocks in the correct order.
2184*09537850SAkhilesh Sanikop     const int sb_row_index = SuperBlockRowIndex(row4x4);
2185*09537850SAkhilesh Sanikop     const int sb_column_index = SuperBlockColumnIndex(column4x4);
2186*09537850SAkhilesh Sanikop     residual_buffer_threaded_[sb_row_index][sb_column_index]
2187*09537850SAkhilesh Sanikop         ->partition_tree_order()
2188*09537850SAkhilesh Sanikop         ->Push(PartitionTreeNode(row4x4, column4x4, block_size));
2189*09537850SAkhilesh Sanikop   }
2190*09537850SAkhilesh Sanikop 
2191*09537850SAkhilesh Sanikop   BlockParameters* bp_ptr =
2192*09537850SAkhilesh Sanikop       block_parameters_holder_.Get(row4x4, column4x4, block_size);
2193*09537850SAkhilesh Sanikop   if (bp_ptr == nullptr) {
2194*09537850SAkhilesh Sanikop     LIBGAV1_DLOG(ERROR, "Failed to get BlockParameters.");
2195*09537850SAkhilesh Sanikop     return false;
2196*09537850SAkhilesh Sanikop   }
2197*09537850SAkhilesh Sanikop   BlockParameters& bp = *bp_ptr;
2198*09537850SAkhilesh Sanikop   Block block(this, block_size, row4x4, column4x4, scratch_buffer, residual);
2199*09537850SAkhilesh Sanikop   bp.size = block_size;
2200*09537850SAkhilesh Sanikop   bp.prediction_parameters =
2201*09537850SAkhilesh Sanikop       split_parse_and_decode_ ? std::unique_ptr<PredictionParameters>(
2202*09537850SAkhilesh Sanikop                                     new (std::nothrow) PredictionParameters())
2203*09537850SAkhilesh Sanikop                               : std::move(prediction_parameters_);
2204*09537850SAkhilesh Sanikop   if (bp.prediction_parameters == nullptr) return false;
2205*09537850SAkhilesh Sanikop   if (!DecodeModeInfo(block)) return false;
2206*09537850SAkhilesh Sanikop   PopulateDeblockFilterLevel(block);
2207*09537850SAkhilesh Sanikop   if (!ReadPaletteTokens(block)) return false;
2208*09537850SAkhilesh Sanikop   DecodeTransformSize(block);
2209*09537850SAkhilesh Sanikop   // Part of Section 5.11.37 in the spec (implemented as a simple lookup).
2210*09537850SAkhilesh Sanikop   bp.uv_transform_size =
2211*09537850SAkhilesh Sanikop       frame_header_.segmentation.lossless[bp.prediction_parameters->segment_id]
2212*09537850SAkhilesh Sanikop           ? kTransformSize4x4
2213*09537850SAkhilesh Sanikop           : kUVTransformSize[block.residual_size[kPlaneU]];
2214*09537850SAkhilesh Sanikop   if (bp.skip) ResetEntropyContext(block);
2215*09537850SAkhilesh Sanikop   PopulateCdefSkip(block);
2216*09537850SAkhilesh Sanikop   if (split_parse_and_decode_) {
2217*09537850SAkhilesh Sanikop     if (!Residual(block, kProcessingModeParseOnly)) return false;
2218*09537850SAkhilesh Sanikop   } else {
2219*09537850SAkhilesh Sanikop     if (!ComputePrediction(block) ||
2220*09537850SAkhilesh Sanikop         !Residual(block, kProcessingModeParseAndDecode)) {
2221*09537850SAkhilesh Sanikop       return false;
2222*09537850SAkhilesh Sanikop     }
2223*09537850SAkhilesh Sanikop   }
2224*09537850SAkhilesh Sanikop   // If frame_header_.segmentation.enabled is false,
2225*09537850SAkhilesh Sanikop   // bp.prediction_parameters->segment_id is 0 for all blocks. We don't need to
2226*09537850SAkhilesh Sanikop   // call save bp.prediction_parameters->segment_id in the current frame because
2227*09537850SAkhilesh Sanikop   // the current frame's segmentation map will be cleared to all 0s.
2228*09537850SAkhilesh Sanikop   //
2229*09537850SAkhilesh Sanikop   // If frame_header_.segmentation.enabled is true and
2230*09537850SAkhilesh Sanikop   // frame_header_.segmentation.update_map is false, we will copy the previous
2231*09537850SAkhilesh Sanikop   // frame's segmentation map to the current frame. So we don't need to call
2232*09537850SAkhilesh Sanikop   // save bp.prediction_parameters->segment_id in the current frame.
2233*09537850SAkhilesh Sanikop   if (frame_header_.segmentation.enabled &&
2234*09537850SAkhilesh Sanikop       frame_header_.segmentation.update_map) {
2235*09537850SAkhilesh Sanikop     const int x_limit = std::min(frame_header_.columns4x4 - column4x4,
2236*09537850SAkhilesh Sanikop                                  static_cast<int>(block.width4x4));
2237*09537850SAkhilesh Sanikop     const int y_limit = std::min(frame_header_.rows4x4 - row4x4,
2238*09537850SAkhilesh Sanikop                                  static_cast<int>(block.height4x4));
2239*09537850SAkhilesh Sanikop     current_frame_.segmentation_map()->FillBlock(
2240*09537850SAkhilesh Sanikop         row4x4, column4x4, x_limit, y_limit,
2241*09537850SAkhilesh Sanikop         bp.prediction_parameters->segment_id);
2242*09537850SAkhilesh Sanikop   }
2243*09537850SAkhilesh Sanikop   StoreMotionFieldMvsIntoCurrentFrame(block);
2244*09537850SAkhilesh Sanikop   if (!split_parse_and_decode_) {
2245*09537850SAkhilesh Sanikop     prediction_parameters_ = std::move(bp.prediction_parameters);
2246*09537850SAkhilesh Sanikop   }
2247*09537850SAkhilesh Sanikop   return true;
2248*09537850SAkhilesh Sanikop }
2249*09537850SAkhilesh Sanikop 
DecodeBlock(int row4x4,int column4x4,BlockSize block_size,TileScratchBuffer * const scratch_buffer,ResidualPtr * residual)2250*09537850SAkhilesh Sanikop bool Tile::DecodeBlock(int row4x4, int column4x4, BlockSize block_size,
2251*09537850SAkhilesh Sanikop                        TileScratchBuffer* const scratch_buffer,
2252*09537850SAkhilesh Sanikop                        ResidualPtr* residual) {
2253*09537850SAkhilesh Sanikop   if (row4x4 >= frame_header_.rows4x4 ||
2254*09537850SAkhilesh Sanikop       column4x4 >= frame_header_.columns4x4) {
2255*09537850SAkhilesh Sanikop     return true;
2256*09537850SAkhilesh Sanikop   }
2257*09537850SAkhilesh Sanikop   Block block(this, block_size, row4x4, column4x4, scratch_buffer, residual);
2258*09537850SAkhilesh Sanikop   if (!ComputePrediction(block) ||
2259*09537850SAkhilesh Sanikop       !Residual(block, kProcessingModeDecodeOnly)) {
2260*09537850SAkhilesh Sanikop     return false;
2261*09537850SAkhilesh Sanikop   }
2262*09537850SAkhilesh Sanikop   block.bp->prediction_parameters.reset(nullptr);
2263*09537850SAkhilesh Sanikop   return true;
2264*09537850SAkhilesh Sanikop }
2265*09537850SAkhilesh Sanikop 
ProcessPartition(int row4x4_start,int column4x4_start,TileScratchBuffer * const scratch_buffer,ResidualPtr * residual)2266*09537850SAkhilesh Sanikop bool Tile::ProcessPartition(int row4x4_start, int column4x4_start,
2267*09537850SAkhilesh Sanikop                             TileScratchBuffer* const scratch_buffer,
2268*09537850SAkhilesh Sanikop                             ResidualPtr* residual) {
2269*09537850SAkhilesh Sanikop   Stack<PartitionTreeNode, kDfsStackSize> stack;
2270*09537850SAkhilesh Sanikop 
2271*09537850SAkhilesh Sanikop   // Set up the first iteration.
2272*09537850SAkhilesh Sanikop   stack.Push(
2273*09537850SAkhilesh Sanikop       PartitionTreeNode(row4x4_start, column4x4_start, SuperBlockSize()));
2274*09537850SAkhilesh Sanikop 
2275*09537850SAkhilesh Sanikop   // DFS loop. If it sees a terminal node (leaf node), ProcessBlock is invoked.
2276*09537850SAkhilesh Sanikop   // Otherwise, the children are pushed into the stack for future processing.
2277*09537850SAkhilesh Sanikop   do {
2278*09537850SAkhilesh Sanikop     PartitionTreeNode node = stack.Pop();
2279*09537850SAkhilesh Sanikop     int row4x4 = node.row4x4;
2280*09537850SAkhilesh Sanikop     int column4x4 = node.column4x4;
2281*09537850SAkhilesh Sanikop     BlockSize block_size = node.block_size;
2282*09537850SAkhilesh Sanikop 
2283*09537850SAkhilesh Sanikop     if (row4x4 >= frame_header_.rows4x4 ||
2284*09537850SAkhilesh Sanikop         column4x4 >= frame_header_.columns4x4) {
2285*09537850SAkhilesh Sanikop       continue;
2286*09537850SAkhilesh Sanikop     }
2287*09537850SAkhilesh Sanikop     const int block_width4x4 = kNum4x4BlocksWide[block_size];
2288*09537850SAkhilesh Sanikop     assert(block_width4x4 == kNum4x4BlocksHigh[block_size]);
2289*09537850SAkhilesh Sanikop     const int half_block4x4 = block_width4x4 >> 1;
2290*09537850SAkhilesh Sanikop     const bool has_rows = (row4x4 + half_block4x4) < frame_header_.rows4x4;
2291*09537850SAkhilesh Sanikop     const bool has_columns =
2292*09537850SAkhilesh Sanikop         (column4x4 + half_block4x4) < frame_header_.columns4x4;
2293*09537850SAkhilesh Sanikop     Partition partition;
2294*09537850SAkhilesh Sanikop     if (!ReadPartition(row4x4, column4x4, block_size, has_rows, has_columns,
2295*09537850SAkhilesh Sanikop                        &partition)) {
2296*09537850SAkhilesh Sanikop       LIBGAV1_DLOG(ERROR, "Failed to read partition for row: %d column: %d",
2297*09537850SAkhilesh Sanikop                    row4x4, column4x4);
2298*09537850SAkhilesh Sanikop       return false;
2299*09537850SAkhilesh Sanikop     }
2300*09537850SAkhilesh Sanikop     const BlockSize sub_size = kSubSize[partition][block_size];
2301*09537850SAkhilesh Sanikop     // Section 6.10.4: It is a requirement of bitstream conformance that
2302*09537850SAkhilesh Sanikop     // get_plane_residual_size( subSize, 1 ) is not equal to BLOCK_INVALID
2303*09537850SAkhilesh Sanikop     // every time subSize is computed.
2304*09537850SAkhilesh Sanikop     if (sub_size == kBlockInvalid ||
2305*09537850SAkhilesh Sanikop         kPlaneResidualSize[sub_size]
2306*09537850SAkhilesh Sanikop                           [sequence_header_.color_config.subsampling_x]
2307*09537850SAkhilesh Sanikop                           [sequence_header_.color_config.subsampling_y] ==
2308*09537850SAkhilesh Sanikop             kBlockInvalid) {
2309*09537850SAkhilesh Sanikop       LIBGAV1_DLOG(
2310*09537850SAkhilesh Sanikop           ERROR,
2311*09537850SAkhilesh Sanikop           "Invalid sub-block/plane size for row: %d column: %d partition: "
2312*09537850SAkhilesh Sanikop           "%d block_size: %d sub_size: %d subsampling_x/y: %d, %d",
2313*09537850SAkhilesh Sanikop           row4x4, column4x4, partition, block_size, sub_size,
2314*09537850SAkhilesh Sanikop           sequence_header_.color_config.subsampling_x,
2315*09537850SAkhilesh Sanikop           sequence_header_.color_config.subsampling_y);
2316*09537850SAkhilesh Sanikop       return false;
2317*09537850SAkhilesh Sanikop     }
2318*09537850SAkhilesh Sanikop 
2319*09537850SAkhilesh Sanikop     const int quarter_block4x4 = half_block4x4 >> 1;
2320*09537850SAkhilesh Sanikop     const BlockSize split_size = kSubSize[kPartitionSplit][block_size];
2321*09537850SAkhilesh Sanikop     assert(partition == kPartitionNone || sub_size != kBlockInvalid);
2322*09537850SAkhilesh Sanikop     switch (partition) {
2323*09537850SAkhilesh Sanikop       case kPartitionNone:
2324*09537850SAkhilesh Sanikop         if (!ProcessBlock(row4x4, column4x4, sub_size, scratch_buffer,
2325*09537850SAkhilesh Sanikop                           residual)) {
2326*09537850SAkhilesh Sanikop           return false;
2327*09537850SAkhilesh Sanikop         }
2328*09537850SAkhilesh Sanikop         break;
2329*09537850SAkhilesh Sanikop       case kPartitionSplit:
2330*09537850SAkhilesh Sanikop         // The children must be added in reverse order since a stack is being
2331*09537850SAkhilesh Sanikop         // used.
2332*09537850SAkhilesh Sanikop         stack.Push(PartitionTreeNode(row4x4 + half_block4x4,
2333*09537850SAkhilesh Sanikop                                      column4x4 + half_block4x4, sub_size));
2334*09537850SAkhilesh Sanikop         stack.Push(
2335*09537850SAkhilesh Sanikop             PartitionTreeNode(row4x4 + half_block4x4, column4x4, sub_size));
2336*09537850SAkhilesh Sanikop         stack.Push(
2337*09537850SAkhilesh Sanikop             PartitionTreeNode(row4x4, column4x4 + half_block4x4, sub_size));
2338*09537850SAkhilesh Sanikop         stack.Push(PartitionTreeNode(row4x4, column4x4, sub_size));
2339*09537850SAkhilesh Sanikop         break;
2340*09537850SAkhilesh Sanikop       case kPartitionHorizontal:
2341*09537850SAkhilesh Sanikop         if (!ProcessBlock(row4x4, column4x4, sub_size, scratch_buffer,
2342*09537850SAkhilesh Sanikop                           residual) ||
2343*09537850SAkhilesh Sanikop             !ProcessBlock(row4x4 + half_block4x4, column4x4, sub_size,
2344*09537850SAkhilesh Sanikop                           scratch_buffer, residual)) {
2345*09537850SAkhilesh Sanikop           return false;
2346*09537850SAkhilesh Sanikop         }
2347*09537850SAkhilesh Sanikop         break;
2348*09537850SAkhilesh Sanikop       case kPartitionVertical:
2349*09537850SAkhilesh Sanikop         if (!ProcessBlock(row4x4, column4x4, sub_size, scratch_buffer,
2350*09537850SAkhilesh Sanikop                           residual) ||
2351*09537850SAkhilesh Sanikop             !ProcessBlock(row4x4, column4x4 + half_block4x4, sub_size,
2352*09537850SAkhilesh Sanikop                           scratch_buffer, residual)) {
2353*09537850SAkhilesh Sanikop           return false;
2354*09537850SAkhilesh Sanikop         }
2355*09537850SAkhilesh Sanikop         break;
2356*09537850SAkhilesh Sanikop       case kPartitionHorizontalWithTopSplit:
2357*09537850SAkhilesh Sanikop         if (!ProcessBlock(row4x4, column4x4, split_size, scratch_buffer,
2358*09537850SAkhilesh Sanikop                           residual) ||
2359*09537850SAkhilesh Sanikop             !ProcessBlock(row4x4, column4x4 + half_block4x4, split_size,
2360*09537850SAkhilesh Sanikop                           scratch_buffer, residual) ||
2361*09537850SAkhilesh Sanikop             !ProcessBlock(row4x4 + half_block4x4, column4x4, sub_size,
2362*09537850SAkhilesh Sanikop                           scratch_buffer, residual)) {
2363*09537850SAkhilesh Sanikop           return false;
2364*09537850SAkhilesh Sanikop         }
2365*09537850SAkhilesh Sanikop         break;
2366*09537850SAkhilesh Sanikop       case kPartitionHorizontalWithBottomSplit:
2367*09537850SAkhilesh Sanikop         if (!ProcessBlock(row4x4, column4x4, sub_size, scratch_buffer,
2368*09537850SAkhilesh Sanikop                           residual) ||
2369*09537850SAkhilesh Sanikop             !ProcessBlock(row4x4 + half_block4x4, column4x4, split_size,
2370*09537850SAkhilesh Sanikop                           scratch_buffer, residual) ||
2371*09537850SAkhilesh Sanikop             !ProcessBlock(row4x4 + half_block4x4, column4x4 + half_block4x4,
2372*09537850SAkhilesh Sanikop                           split_size, scratch_buffer, residual)) {
2373*09537850SAkhilesh Sanikop           return false;
2374*09537850SAkhilesh Sanikop         }
2375*09537850SAkhilesh Sanikop         break;
2376*09537850SAkhilesh Sanikop       case kPartitionVerticalWithLeftSplit:
2377*09537850SAkhilesh Sanikop         if (!ProcessBlock(row4x4, column4x4, split_size, scratch_buffer,
2378*09537850SAkhilesh Sanikop                           residual) ||
2379*09537850SAkhilesh Sanikop             !ProcessBlock(row4x4 + half_block4x4, column4x4, split_size,
2380*09537850SAkhilesh Sanikop                           scratch_buffer, residual) ||
2381*09537850SAkhilesh Sanikop             !ProcessBlock(row4x4, column4x4 + half_block4x4, sub_size,
2382*09537850SAkhilesh Sanikop                           scratch_buffer, residual)) {
2383*09537850SAkhilesh Sanikop           return false;
2384*09537850SAkhilesh Sanikop         }
2385*09537850SAkhilesh Sanikop         break;
2386*09537850SAkhilesh Sanikop       case kPartitionVerticalWithRightSplit:
2387*09537850SAkhilesh Sanikop         if (!ProcessBlock(row4x4, column4x4, sub_size, scratch_buffer,
2388*09537850SAkhilesh Sanikop                           residual) ||
2389*09537850SAkhilesh Sanikop             !ProcessBlock(row4x4, column4x4 + half_block4x4, split_size,
2390*09537850SAkhilesh Sanikop                           scratch_buffer, residual) ||
2391*09537850SAkhilesh Sanikop             !ProcessBlock(row4x4 + half_block4x4, column4x4 + half_block4x4,
2392*09537850SAkhilesh Sanikop                           split_size, scratch_buffer, residual)) {
2393*09537850SAkhilesh Sanikop           return false;
2394*09537850SAkhilesh Sanikop         }
2395*09537850SAkhilesh Sanikop         break;
2396*09537850SAkhilesh Sanikop       case kPartitionHorizontal4:
2397*09537850SAkhilesh Sanikop         for (int i = 0; i < 4; ++i) {
2398*09537850SAkhilesh Sanikop           if (!ProcessBlock(row4x4 + i * quarter_block4x4, column4x4, sub_size,
2399*09537850SAkhilesh Sanikop                             scratch_buffer, residual)) {
2400*09537850SAkhilesh Sanikop             return false;
2401*09537850SAkhilesh Sanikop           }
2402*09537850SAkhilesh Sanikop         }
2403*09537850SAkhilesh Sanikop         break;
2404*09537850SAkhilesh Sanikop       case kPartitionVertical4:
2405*09537850SAkhilesh Sanikop         for (int i = 0; i < 4; ++i) {
2406*09537850SAkhilesh Sanikop           if (!ProcessBlock(row4x4, column4x4 + i * quarter_block4x4, sub_size,
2407*09537850SAkhilesh Sanikop                             scratch_buffer, residual)) {
2408*09537850SAkhilesh Sanikop             return false;
2409*09537850SAkhilesh Sanikop           }
2410*09537850SAkhilesh Sanikop         }
2411*09537850SAkhilesh Sanikop         break;
2412*09537850SAkhilesh Sanikop     }
2413*09537850SAkhilesh Sanikop   } while (!stack.Empty());
2414*09537850SAkhilesh Sanikop   return true;
2415*09537850SAkhilesh Sanikop }
2416*09537850SAkhilesh Sanikop 
ResetLoopRestorationParams()2417*09537850SAkhilesh Sanikop void Tile::ResetLoopRestorationParams() {
2418*09537850SAkhilesh Sanikop   for (int plane = kPlaneY; plane < kMaxPlanes; ++plane) {
2419*09537850SAkhilesh Sanikop     for (int i = WienerInfo::kVertical; i <= WienerInfo::kHorizontal; ++i) {
2420*09537850SAkhilesh Sanikop       reference_unit_info_[plane].sgr_proj_info.multiplier[i] =
2421*09537850SAkhilesh Sanikop           kSgrProjDefaultMultiplier[i];
2422*09537850SAkhilesh Sanikop       for (int j = 0; j < kNumWienerCoefficients; ++j) {
2423*09537850SAkhilesh Sanikop         reference_unit_info_[plane].wiener_info.filter[i][j] =
2424*09537850SAkhilesh Sanikop             kWienerDefaultFilter[j];
2425*09537850SAkhilesh Sanikop       }
2426*09537850SAkhilesh Sanikop     }
2427*09537850SAkhilesh Sanikop   }
2428*09537850SAkhilesh Sanikop }
2429*09537850SAkhilesh Sanikop 
ResetCdef(const int row4x4,const int column4x4)2430*09537850SAkhilesh Sanikop void Tile::ResetCdef(const int row4x4, const int column4x4) {
2431*09537850SAkhilesh Sanikop   if (frame_header_.cdef.bits == 0) return;
2432*09537850SAkhilesh Sanikop   const int row = DivideBy16(row4x4);
2433*09537850SAkhilesh Sanikop   const int column = DivideBy16(column4x4);
2434*09537850SAkhilesh Sanikop   cdef_index_[row][column] = -1;
2435*09537850SAkhilesh Sanikop   if (sequence_header_.use_128x128_superblock) {
2436*09537850SAkhilesh Sanikop     const int cdef_size4x4 = kNum4x4BlocksWide[kBlock64x64];
2437*09537850SAkhilesh Sanikop     const int border_row = DivideBy16(row4x4 + cdef_size4x4);
2438*09537850SAkhilesh Sanikop     const int border_column = DivideBy16(column4x4 + cdef_size4x4);
2439*09537850SAkhilesh Sanikop     cdef_index_[row][border_column] = -1;
2440*09537850SAkhilesh Sanikop     cdef_index_[border_row][column] = -1;
2441*09537850SAkhilesh Sanikop     cdef_index_[border_row][border_column] = -1;
2442*09537850SAkhilesh Sanikop   }
2443*09537850SAkhilesh Sanikop }
2444*09537850SAkhilesh Sanikop 
ClearBlockDecoded(TileScratchBuffer * const scratch_buffer,int row4x4,int column4x4)2445*09537850SAkhilesh Sanikop void Tile::ClearBlockDecoded(TileScratchBuffer* const scratch_buffer,
2446*09537850SAkhilesh Sanikop                              int row4x4, int column4x4) {
2447*09537850SAkhilesh Sanikop   // Set everything to false.
2448*09537850SAkhilesh Sanikop   memset(scratch_buffer->block_decoded, 0,
2449*09537850SAkhilesh Sanikop          sizeof(scratch_buffer->block_decoded));
2450*09537850SAkhilesh Sanikop   // Set specific edge cases to true.
2451*09537850SAkhilesh Sanikop   const int sb_size4 = sequence_header_.use_128x128_superblock ? 32 : 16;
2452*09537850SAkhilesh Sanikop   for (int plane = kPlaneY; plane < PlaneCount(); ++plane) {
2453*09537850SAkhilesh Sanikop     const int subsampling_x = subsampling_x_[plane];
2454*09537850SAkhilesh Sanikop     const int subsampling_y = subsampling_y_[plane];
2455*09537850SAkhilesh Sanikop     const int sb_width4 = (column4x4_end_ - column4x4) >> subsampling_x;
2456*09537850SAkhilesh Sanikop     const int sb_height4 = (row4x4_end_ - row4x4) >> subsampling_y;
2457*09537850SAkhilesh Sanikop     // The memset is equivalent to the following lines in the spec:
2458*09537850SAkhilesh Sanikop     // for ( x = -1; x <= ( sbSize4 >> subX ); x++ ) {
2459*09537850SAkhilesh Sanikop     //   if ( y < 0 && x < sbWidth4 ) {
2460*09537850SAkhilesh Sanikop     //     BlockDecoded[plane][y][x] = 1
2461*09537850SAkhilesh Sanikop     //   }
2462*09537850SAkhilesh Sanikop     // }
2463*09537850SAkhilesh Sanikop     const int num_elements =
2464*09537850SAkhilesh Sanikop         std::min((sb_size4 >> subsampling_x_[plane]) + 1, sb_width4) + 1;
2465*09537850SAkhilesh Sanikop     memset(&scratch_buffer->block_decoded[plane][0][0], 1, num_elements);
2466*09537850SAkhilesh Sanikop     // The for loop is equivalent to the following lines in the spec:
2467*09537850SAkhilesh Sanikop     // for ( y = -1; y <= ( sbSize4 >> subY ); y++ )
2468*09537850SAkhilesh Sanikop     //   if ( x < 0 && y < sbHeight4 )
2469*09537850SAkhilesh Sanikop     //     BlockDecoded[plane][y][x] = 1
2470*09537850SAkhilesh Sanikop     //   }
2471*09537850SAkhilesh Sanikop     // }
2472*09537850SAkhilesh Sanikop     // BlockDecoded[plane][sbSize4 >> subY][-1] = 0
2473*09537850SAkhilesh Sanikop     for (int y = -1; y < std::min((sb_size4 >> subsampling_y), sb_height4);
2474*09537850SAkhilesh Sanikop          ++y) {
2475*09537850SAkhilesh Sanikop       scratch_buffer->block_decoded[plane][y + 1][0] = true;
2476*09537850SAkhilesh Sanikop     }
2477*09537850SAkhilesh Sanikop   }
2478*09537850SAkhilesh Sanikop }
2479*09537850SAkhilesh Sanikop 
ProcessSuperBlock(int row4x4,int column4x4,TileScratchBuffer * const scratch_buffer,ProcessingMode mode)2480*09537850SAkhilesh Sanikop bool Tile::ProcessSuperBlock(int row4x4, int column4x4,
2481*09537850SAkhilesh Sanikop                              TileScratchBuffer* const scratch_buffer,
2482*09537850SAkhilesh Sanikop                              ProcessingMode mode) {
2483*09537850SAkhilesh Sanikop   const bool parsing =
2484*09537850SAkhilesh Sanikop       mode == kProcessingModeParseOnly || mode == kProcessingModeParseAndDecode;
2485*09537850SAkhilesh Sanikop   const bool decoding = mode == kProcessingModeDecodeOnly ||
2486*09537850SAkhilesh Sanikop                         mode == kProcessingModeParseAndDecode;
2487*09537850SAkhilesh Sanikop   if (parsing) {
2488*09537850SAkhilesh Sanikop     read_deltas_ = frame_header_.delta_q.present;
2489*09537850SAkhilesh Sanikop     ResetCdef(row4x4, column4x4);
2490*09537850SAkhilesh Sanikop   }
2491*09537850SAkhilesh Sanikop   if (decoding) {
2492*09537850SAkhilesh Sanikop     ClearBlockDecoded(scratch_buffer, row4x4, column4x4);
2493*09537850SAkhilesh Sanikop   }
2494*09537850SAkhilesh Sanikop   const BlockSize block_size = SuperBlockSize();
2495*09537850SAkhilesh Sanikop   if (parsing) {
2496*09537850SAkhilesh Sanikop     ReadLoopRestorationCoefficients(row4x4, column4x4, block_size);
2497*09537850SAkhilesh Sanikop   }
2498*09537850SAkhilesh Sanikop   if (parsing && decoding) {
2499*09537850SAkhilesh Sanikop     uint8_t* residual_buffer = residual_buffer_.get();
2500*09537850SAkhilesh Sanikop     if (!ProcessPartition(row4x4, column4x4, scratch_buffer,
2501*09537850SAkhilesh Sanikop                           &residual_buffer)) {
2502*09537850SAkhilesh Sanikop       LIBGAV1_DLOG(ERROR, "Error decoding partition row: %d column: %d", row4x4,
2503*09537850SAkhilesh Sanikop                    column4x4);
2504*09537850SAkhilesh Sanikop       return false;
2505*09537850SAkhilesh Sanikop     }
2506*09537850SAkhilesh Sanikop     return true;
2507*09537850SAkhilesh Sanikop   }
2508*09537850SAkhilesh Sanikop   const int sb_row_index = SuperBlockRowIndex(row4x4);
2509*09537850SAkhilesh Sanikop   const int sb_column_index = SuperBlockColumnIndex(column4x4);
2510*09537850SAkhilesh Sanikop   if (parsing) {
2511*09537850SAkhilesh Sanikop     residual_buffer_threaded_[sb_row_index][sb_column_index] =
2512*09537850SAkhilesh Sanikop         residual_buffer_pool_->Get();
2513*09537850SAkhilesh Sanikop     if (residual_buffer_threaded_[sb_row_index][sb_column_index] == nullptr) {
2514*09537850SAkhilesh Sanikop       LIBGAV1_DLOG(ERROR, "Failed to get residual buffer.");
2515*09537850SAkhilesh Sanikop       return false;
2516*09537850SAkhilesh Sanikop     }
2517*09537850SAkhilesh Sanikop     uint8_t* residual_buffer =
2518*09537850SAkhilesh Sanikop         residual_buffer_threaded_[sb_row_index][sb_column_index]->buffer();
2519*09537850SAkhilesh Sanikop     if (!ProcessPartition(row4x4, column4x4, scratch_buffer,
2520*09537850SAkhilesh Sanikop                           &residual_buffer)) {
2521*09537850SAkhilesh Sanikop       LIBGAV1_DLOG(ERROR, "Error parsing partition row: %d column: %d", row4x4,
2522*09537850SAkhilesh Sanikop                    column4x4);
2523*09537850SAkhilesh Sanikop       return false;
2524*09537850SAkhilesh Sanikop     }
2525*09537850SAkhilesh Sanikop   } else {
2526*09537850SAkhilesh Sanikop     if (!DecodeSuperBlock(sb_row_index, sb_column_index, scratch_buffer)) {
2527*09537850SAkhilesh Sanikop       LIBGAV1_DLOG(ERROR, "Error decoding superblock row: %d column: %d",
2528*09537850SAkhilesh Sanikop                    row4x4, column4x4);
2529*09537850SAkhilesh Sanikop       return false;
2530*09537850SAkhilesh Sanikop     }
2531*09537850SAkhilesh Sanikop     residual_buffer_pool_->Release(
2532*09537850SAkhilesh Sanikop         std::move(residual_buffer_threaded_[sb_row_index][sb_column_index]));
2533*09537850SAkhilesh Sanikop   }
2534*09537850SAkhilesh Sanikop   return true;
2535*09537850SAkhilesh Sanikop }
2536*09537850SAkhilesh Sanikop 
DecodeSuperBlock(int sb_row_index,int sb_column_index,TileScratchBuffer * const scratch_buffer)2537*09537850SAkhilesh Sanikop bool Tile::DecodeSuperBlock(int sb_row_index, int sb_column_index,
2538*09537850SAkhilesh Sanikop                             TileScratchBuffer* const scratch_buffer) {
2539*09537850SAkhilesh Sanikop   uint8_t* residual_buffer =
2540*09537850SAkhilesh Sanikop       residual_buffer_threaded_[sb_row_index][sb_column_index]->buffer();
2541*09537850SAkhilesh Sanikop   Queue<PartitionTreeNode>& partition_tree_order =
2542*09537850SAkhilesh Sanikop       *residual_buffer_threaded_[sb_row_index][sb_column_index]
2543*09537850SAkhilesh Sanikop            ->partition_tree_order();
2544*09537850SAkhilesh Sanikop   while (!partition_tree_order.Empty()) {
2545*09537850SAkhilesh Sanikop     PartitionTreeNode block = partition_tree_order.Front();
2546*09537850SAkhilesh Sanikop     if (!DecodeBlock(block.row4x4, block.column4x4, block.block_size,
2547*09537850SAkhilesh Sanikop                      scratch_buffer, &residual_buffer)) {
2548*09537850SAkhilesh Sanikop       LIBGAV1_DLOG(ERROR, "Error decoding block row: %d column: %d",
2549*09537850SAkhilesh Sanikop                    block.row4x4, block.column4x4);
2550*09537850SAkhilesh Sanikop       return false;
2551*09537850SAkhilesh Sanikop     }
2552*09537850SAkhilesh Sanikop     partition_tree_order.Pop();
2553*09537850SAkhilesh Sanikop   }
2554*09537850SAkhilesh Sanikop   return true;
2555*09537850SAkhilesh Sanikop }
2556*09537850SAkhilesh Sanikop 
ReadLoopRestorationCoefficients(int row4x4,int column4x4,BlockSize block_size)2557*09537850SAkhilesh Sanikop void Tile::ReadLoopRestorationCoefficients(int row4x4, int column4x4,
2558*09537850SAkhilesh Sanikop                                            BlockSize block_size) {
2559*09537850SAkhilesh Sanikop   if (frame_header_.allow_intrabc) return;
2560*09537850SAkhilesh Sanikop   LoopRestorationInfo* const restoration_info = post_filter_.restoration_info();
2561*09537850SAkhilesh Sanikop   const bool is_superres_scaled =
2562*09537850SAkhilesh Sanikop       frame_header_.width != frame_header_.upscaled_width;
2563*09537850SAkhilesh Sanikop   for (int plane = kPlaneY; plane < PlaneCount(); ++plane) {
2564*09537850SAkhilesh Sanikop     LoopRestorationUnitInfo unit_info;
2565*09537850SAkhilesh Sanikop     if (restoration_info->PopulateUnitInfoForSuperBlock(
2566*09537850SAkhilesh Sanikop             static_cast<Plane>(plane), block_size, is_superres_scaled,
2567*09537850SAkhilesh Sanikop             frame_header_.superres_scale_denominator, row4x4, column4x4,
2568*09537850SAkhilesh Sanikop             &unit_info)) {
2569*09537850SAkhilesh Sanikop       for (int unit_row = unit_info.row_start; unit_row < unit_info.row_end;
2570*09537850SAkhilesh Sanikop            ++unit_row) {
2571*09537850SAkhilesh Sanikop         for (int unit_column = unit_info.column_start;
2572*09537850SAkhilesh Sanikop              unit_column < unit_info.column_end; ++unit_column) {
2573*09537850SAkhilesh Sanikop           const int unit_id = unit_row * restoration_info->num_horizontal_units(
2574*09537850SAkhilesh Sanikop                                              static_cast<Plane>(plane)) +
2575*09537850SAkhilesh Sanikop                               unit_column;
2576*09537850SAkhilesh Sanikop           restoration_info->ReadUnitCoefficients(
2577*09537850SAkhilesh Sanikop               &reader_, &symbol_decoder_context_, static_cast<Plane>(plane),
2578*09537850SAkhilesh Sanikop               unit_id, &reference_unit_info_);
2579*09537850SAkhilesh Sanikop         }
2580*09537850SAkhilesh Sanikop       }
2581*09537850SAkhilesh Sanikop     }
2582*09537850SAkhilesh Sanikop   }
2583*09537850SAkhilesh Sanikop }
2584*09537850SAkhilesh Sanikop 
StoreMotionFieldMvsIntoCurrentFrame(const Block & block)2585*09537850SAkhilesh Sanikop void Tile::StoreMotionFieldMvsIntoCurrentFrame(const Block& block) {
2586*09537850SAkhilesh Sanikop   if (frame_header_.refresh_frame_flags == 0 ||
2587*09537850SAkhilesh Sanikop       IsIntraFrame(frame_header_.frame_type)) {
2588*09537850SAkhilesh Sanikop     return;
2589*09537850SAkhilesh Sanikop   }
2590*09537850SAkhilesh Sanikop   // Iterate over odd rows/columns beginning at the first odd row/column for the
2591*09537850SAkhilesh Sanikop   // block. It is done this way because motion field mvs are only needed at a
2592*09537850SAkhilesh Sanikop   // 8x8 granularity.
2593*09537850SAkhilesh Sanikop   const int row_start4x4 = block.row4x4 | 1;
2594*09537850SAkhilesh Sanikop   const int row_limit4x4 =
2595*09537850SAkhilesh Sanikop       std::min(block.row4x4 + block.height4x4, frame_header_.rows4x4);
2596*09537850SAkhilesh Sanikop   if (row_start4x4 >= row_limit4x4) return;
2597*09537850SAkhilesh Sanikop   const int column_start4x4 = block.column4x4 | 1;
2598*09537850SAkhilesh Sanikop   const int column_limit4x4 =
2599*09537850SAkhilesh Sanikop       std::min(block.column4x4 + block.width4x4, frame_header_.columns4x4);
2600*09537850SAkhilesh Sanikop   if (column_start4x4 >= column_limit4x4) return;
2601*09537850SAkhilesh Sanikop 
2602*09537850SAkhilesh Sanikop   // The largest reference MV component that can be saved.
2603*09537850SAkhilesh Sanikop   constexpr int kRefMvsLimit = (1 << 12) - 1;
2604*09537850SAkhilesh Sanikop   const BlockParameters& bp = *block.bp;
2605*09537850SAkhilesh Sanikop   ReferenceInfo* reference_info = current_frame_.reference_info();
2606*09537850SAkhilesh Sanikop   for (int i = 1; i >= 0; --i) {
2607*09537850SAkhilesh Sanikop     const ReferenceFrameType reference_frame_to_store = bp.reference_frame[i];
2608*09537850SAkhilesh Sanikop     if (reference_frame_to_store <= kReferenceFrameIntra) continue;
2609*09537850SAkhilesh Sanikop     // Must make a local copy so that StoreMotionFieldMvs() knows there is no
2610*09537850SAkhilesh Sanikop     // overlap between load and store.
2611*09537850SAkhilesh Sanikop     const MotionVector mv_to_store = bp.mv.mv[i];
2612*09537850SAkhilesh Sanikop     const int mv_row = std::abs(mv_to_store.mv[0]);
2613*09537850SAkhilesh Sanikop     const int mv_column = std::abs(mv_to_store.mv[1]);
2614*09537850SAkhilesh Sanikop     // kRefMvsLimit equals 0x07FF, so we can first bitwise OR the two absolute
2615*09537850SAkhilesh Sanikop     // values and then compare with kRefMvsLimit to save a branch.
2616*09537850SAkhilesh Sanikop     // The next line is equivalent to:
2617*09537850SAkhilesh Sanikop     // mv_row <= kRefMvsLimit && mv_column <= kRefMvsLimit
2618*09537850SAkhilesh Sanikop     if ((mv_row | mv_column) <= kRefMvsLimit &&
2619*09537850SAkhilesh Sanikop         reference_info->relative_distance_from[reference_frame_to_store] < 0) {
2620*09537850SAkhilesh Sanikop       const int row_start8x8 = DivideBy2(row_start4x4);
2621*09537850SAkhilesh Sanikop       const int row_limit8x8 = DivideBy2(row_limit4x4);
2622*09537850SAkhilesh Sanikop       const int column_start8x8 = DivideBy2(column_start4x4);
2623*09537850SAkhilesh Sanikop       const int column_limit8x8 = DivideBy2(column_limit4x4);
2624*09537850SAkhilesh Sanikop       const int rows = row_limit8x8 - row_start8x8;
2625*09537850SAkhilesh Sanikop       const int columns = column_limit8x8 - column_start8x8;
2626*09537850SAkhilesh Sanikop       const ptrdiff_t stride = DivideBy2(current_frame_.columns4x4());
2627*09537850SAkhilesh Sanikop       ReferenceFrameType* const reference_frame_row_start =
2628*09537850SAkhilesh Sanikop           &reference_info
2629*09537850SAkhilesh Sanikop                ->motion_field_reference_frame[row_start8x8][column_start8x8];
2630*09537850SAkhilesh Sanikop       MotionVector* const mv =
2631*09537850SAkhilesh Sanikop           &reference_info->motion_field_mv[row_start8x8][column_start8x8];
2632*09537850SAkhilesh Sanikop 
2633*09537850SAkhilesh Sanikop       // Specialize columns cases 1, 2, 4, 8 and 16. This makes memset() inlined
2634*09537850SAkhilesh Sanikop       // and simplifies std::fill() for these cases.
2635*09537850SAkhilesh Sanikop       if (columns <= 1) {
2636*09537850SAkhilesh Sanikop         // Don't change the above condition to (columns == 1).
2637*09537850SAkhilesh Sanikop         // Condition (columns <= 1) may help the compiler simplify the inlining
2638*09537850SAkhilesh Sanikop         // of the general case of StoreMotionFieldMvs() by eliminating the
2639*09537850SAkhilesh Sanikop         // (columns == 0) case.
2640*09537850SAkhilesh Sanikop         assert(columns == 1);
2641*09537850SAkhilesh Sanikop         StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
2642*09537850SAkhilesh Sanikop                             1, reference_frame_row_start, mv);
2643*09537850SAkhilesh Sanikop       } else if (columns == 2) {
2644*09537850SAkhilesh Sanikop         StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
2645*09537850SAkhilesh Sanikop                             2, reference_frame_row_start, mv);
2646*09537850SAkhilesh Sanikop       } else if (columns == 4) {
2647*09537850SAkhilesh Sanikop         StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
2648*09537850SAkhilesh Sanikop                             4, reference_frame_row_start, mv);
2649*09537850SAkhilesh Sanikop       } else if (columns == 8) {
2650*09537850SAkhilesh Sanikop         StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
2651*09537850SAkhilesh Sanikop                             8, reference_frame_row_start, mv);
2652*09537850SAkhilesh Sanikop       } else if (columns == 16) {
2653*09537850SAkhilesh Sanikop         StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
2654*09537850SAkhilesh Sanikop                             16, reference_frame_row_start, mv);
2655*09537850SAkhilesh Sanikop       } else if (columns < 16) {
2656*09537850SAkhilesh Sanikop         // This always true condition (columns < 16) may help the compiler
2657*09537850SAkhilesh Sanikop         // simplify the inlining of the following function.
2658*09537850SAkhilesh Sanikop         // This general case is rare and usually only happens to the blocks
2659*09537850SAkhilesh Sanikop         // which contain the right boundary of the frame.
2660*09537850SAkhilesh Sanikop         StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
2661*09537850SAkhilesh Sanikop                             columns, reference_frame_row_start, mv);
2662*09537850SAkhilesh Sanikop       } else {
2663*09537850SAkhilesh Sanikop         assert(false);
2664*09537850SAkhilesh Sanikop       }
2665*09537850SAkhilesh Sanikop       return;
2666*09537850SAkhilesh Sanikop     }
2667*09537850SAkhilesh Sanikop   }
2668*09537850SAkhilesh Sanikop }
2669*09537850SAkhilesh Sanikop 
2670*09537850SAkhilesh Sanikop }  // namespace libgav1
2671