xref: /aosp_15_r20/external/libgav1/src/tile_scratch_buffer.h (revision 095378508e87ed692bf8dfeb34008b65b3735891)
1*09537850SAkhilesh Sanikop /*
2*09537850SAkhilesh Sanikop  * Copyright 2019 The libgav1 Authors
3*09537850SAkhilesh Sanikop  *
4*09537850SAkhilesh Sanikop  * Licensed under the Apache License, Version 2.0 (the "License");
5*09537850SAkhilesh Sanikop  * you may not use this file except in compliance with the License.
6*09537850SAkhilesh Sanikop  * You may obtain a copy of the License at
7*09537850SAkhilesh Sanikop  *
8*09537850SAkhilesh Sanikop  *      http://www.apache.org/licenses/LICENSE-2.0
9*09537850SAkhilesh Sanikop  *
10*09537850SAkhilesh Sanikop  * Unless required by applicable law or agreed to in writing, software
11*09537850SAkhilesh Sanikop  * distributed under the License is distributed on an "AS IS" BASIS,
12*09537850SAkhilesh Sanikop  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*09537850SAkhilesh Sanikop  * See the License for the specific language governing permissions and
14*09537850SAkhilesh Sanikop  * limitations under the License.
15*09537850SAkhilesh Sanikop  */
16*09537850SAkhilesh Sanikop 
17*09537850SAkhilesh Sanikop #ifndef LIBGAV1_SRC_TILE_SCRATCH_BUFFER_H_
18*09537850SAkhilesh Sanikop #define LIBGAV1_SRC_TILE_SCRATCH_BUFFER_H_
19*09537850SAkhilesh Sanikop 
20*09537850SAkhilesh Sanikop #include <cstddef>
21*09537850SAkhilesh Sanikop #include <cstdint>
22*09537850SAkhilesh Sanikop #include <cstring>
23*09537850SAkhilesh Sanikop #include <memory>
24*09537850SAkhilesh Sanikop #include <mutex>  // NOLINT (unapproved c++11 header)
25*09537850SAkhilesh Sanikop #include <new>
26*09537850SAkhilesh Sanikop #include <utility>
27*09537850SAkhilesh Sanikop 
28*09537850SAkhilesh Sanikop #include "src/dsp/constants.h"
29*09537850SAkhilesh Sanikop #include "src/utils/common.h"
30*09537850SAkhilesh Sanikop #include "src/utils/compiler_attributes.h"
31*09537850SAkhilesh Sanikop #include "src/utils/constants.h"
32*09537850SAkhilesh Sanikop #include "src/utils/memory.h"
33*09537850SAkhilesh Sanikop #include "src/utils/stack.h"
34*09537850SAkhilesh Sanikop 
35*09537850SAkhilesh Sanikop namespace libgav1 {
36*09537850SAkhilesh Sanikop 
37*09537850SAkhilesh Sanikop // Buffer to facilitate decoding a superblock.
38*09537850SAkhilesh Sanikop struct TileScratchBuffer : public MaxAlignedAllocable {
39*09537850SAkhilesh Sanikop   static constexpr int kBlockDecodedStride = 34;
40*09537850SAkhilesh Sanikop 
InitTileScratchBuffer41*09537850SAkhilesh Sanikop   LIBGAV1_MUST_USE_RESULT bool Init(int bitdepth) {
42*09537850SAkhilesh Sanikop #if LIBGAV1_MAX_BITDEPTH >= 10
43*09537850SAkhilesh Sanikop     const int pixel_size = (bitdepth == 8) ? 1 : 2;
44*09537850SAkhilesh Sanikop #else
45*09537850SAkhilesh Sanikop     assert(bitdepth == 8);
46*09537850SAkhilesh Sanikop     static_cast<void>(bitdepth);
47*09537850SAkhilesh Sanikop     const int pixel_size = 1;
48*09537850SAkhilesh Sanikop #endif
49*09537850SAkhilesh Sanikop 
50*09537850SAkhilesh Sanikop     static_assert(kConvolveScaleBorderRight >= kConvolveBorderRight, "");
51*09537850SAkhilesh Sanikop     constexpr int unaligned_convolve_buffer_stride =
52*09537850SAkhilesh Sanikop         kMaxScaledSuperBlockSizeInPixels + kConvolveBorderLeftTop +
53*09537850SAkhilesh Sanikop         kConvolveScaleBorderRight;
54*09537850SAkhilesh Sanikop     convolve_block_buffer_stride = Align<ptrdiff_t>(
55*09537850SAkhilesh Sanikop         unaligned_convolve_buffer_stride * pixel_size, kMaxAlignment);
56*09537850SAkhilesh Sanikop     constexpr int convolve_buffer_height = kMaxScaledSuperBlockSizeInPixels +
57*09537850SAkhilesh Sanikop                                            kConvolveBorderLeftTop +
58*09537850SAkhilesh Sanikop                                            kConvolveBorderBottom;
59*09537850SAkhilesh Sanikop 
60*09537850SAkhilesh Sanikop     convolve_block_buffer = MakeAlignedUniquePtr<uint8_t>(
61*09537850SAkhilesh Sanikop         kMaxAlignment, convolve_buffer_height * convolve_block_buffer_stride);
62*09537850SAkhilesh Sanikop #if LIBGAV1_MSAN
63*09537850SAkhilesh Sanikop     // Quiet msan warnings in ConvolveScale2D_NEON(). Set with random non-zero
64*09537850SAkhilesh Sanikop     // value to aid in future debugging.
65*09537850SAkhilesh Sanikop     memset(convolve_block_buffer.get(), 0x66,
66*09537850SAkhilesh Sanikop            convolve_buffer_height * convolve_block_buffer_stride);
67*09537850SAkhilesh Sanikop #endif
68*09537850SAkhilesh Sanikop 
69*09537850SAkhilesh Sanikop     return convolve_block_buffer != nullptr;
70*09537850SAkhilesh Sanikop   }
71*09537850SAkhilesh Sanikop 
72*09537850SAkhilesh Sanikop   // kCompoundPredictionTypeDiffWeighted prediction mode needs a mask of the
73*09537850SAkhilesh Sanikop   // prediction block size. This buffer is used to store that mask. The masks
74*09537850SAkhilesh Sanikop   // will be created for the Y plane and will be re-used for the U & V planes.
75*09537850SAkhilesh Sanikop   alignas(kMaxAlignment) uint8_t weight_mask[kMaxSuperBlockSizeSquareInPixels];
76*09537850SAkhilesh Sanikop 
77*09537850SAkhilesh Sanikop   // For each instance of the TileScratchBuffer, only one of the following
78*09537850SAkhilesh Sanikop   // buffers will be used at any given time, so it is ok to share them in a
79*09537850SAkhilesh Sanikop   // union.
80*09537850SAkhilesh Sanikop   union {
81*09537850SAkhilesh Sanikop     // Buffers used for prediction process.
82*09537850SAkhilesh Sanikop     // Compound prediction calculations always output 16-bit values. Depending
83*09537850SAkhilesh Sanikop     // on the bitdepth the values may be treated as int16_t or uint16_t. See
84*09537850SAkhilesh Sanikop     // src/dsp/convolve.cc and src/dsp/warp.cc for explanations.
85*09537850SAkhilesh Sanikop     // Inter/intra calculations output Pixel values.
86*09537850SAkhilesh Sanikop     // These buffers always use width as the stride. This enables packing the
87*09537850SAkhilesh Sanikop     // values in and simplifies loads/stores for small values.
88*09537850SAkhilesh Sanikop 
89*09537850SAkhilesh Sanikop     // 10/12 bit compound prediction and 10/12 bit inter/intra prediction.
90*09537850SAkhilesh Sanikop     alignas(kMaxAlignment) uint16_t
91*09537850SAkhilesh Sanikop         prediction_buffer[2][kMaxSuperBlockSizeSquareInPixels];
92*09537850SAkhilesh Sanikop     // 8 bit compound prediction buffer.
93*09537850SAkhilesh Sanikop     alignas(kMaxAlignment) int16_t
94*09537850SAkhilesh Sanikop         compound_prediction_buffer_8bpp[2][kMaxSuperBlockSizeSquareInPixels];
95*09537850SAkhilesh Sanikop 
96*09537850SAkhilesh Sanikop     // Union usage note: This is used only by functions in the "intra"
97*09537850SAkhilesh Sanikop     // prediction path.
98*09537850SAkhilesh Sanikop     //
99*09537850SAkhilesh Sanikop     // Buffer used for storing subsampled luma samples needed for CFL
100*09537850SAkhilesh Sanikop     // prediction. This buffer is used to avoid repetition of the subsampling
101*09537850SAkhilesh Sanikop     // for the V plane when it is already done for the U plane.
102*09537850SAkhilesh Sanikop     int16_t cfl_luma_buffer[kCflLumaBufferStride][kCflLumaBufferStride];
103*09537850SAkhilesh Sanikop   };
104*09537850SAkhilesh Sanikop 
105*09537850SAkhilesh Sanikop   // Buffer used for convolve. The maximum size required for this buffer is:
106*09537850SAkhilesh Sanikop   //  maximum block height (with scaling and border) = 2 * 128 + 3 + 4 = 263.
107*09537850SAkhilesh Sanikop   //  maximum block stride (with scaling and border aligned to 16) =
108*09537850SAkhilesh Sanikop   //     (2 * 128 + 3 + 8 + 5) * pixel_size = 272 * pixel_size.
109*09537850SAkhilesh Sanikop   //  Where pixel_size is (bitdepth == 8) ? 1 : 2.
110*09537850SAkhilesh Sanikop   // Has an alignment of kMaxAlignment when allocated.
111*09537850SAkhilesh Sanikop   AlignedUniquePtr<uint8_t> convolve_block_buffer;
112*09537850SAkhilesh Sanikop   ptrdiff_t convolve_block_buffer_stride;
113*09537850SAkhilesh Sanikop 
114*09537850SAkhilesh Sanikop   // Flag indicating whether the data in |cfl_luma_buffer| is valid.
115*09537850SAkhilesh Sanikop   bool cfl_luma_buffer_valid;
116*09537850SAkhilesh Sanikop 
117*09537850SAkhilesh Sanikop   // Equivalent to BlockDecoded array in the spec. This stores the decoded
118*09537850SAkhilesh Sanikop   // state of every 4x4 block in a superblock. It has 1 row/column border on
119*09537850SAkhilesh Sanikop   // all 4 sides (hence the 34x34 dimension instead of 32x32). Note that the
120*09537850SAkhilesh Sanikop   // spec uses "-1" as an index to access the left and top borders. In the
121*09537850SAkhilesh Sanikop   // code, we treat the index (1, 1) as equivalent to the spec's (0, 0). So
122*09537850SAkhilesh Sanikop   // all accesses into this array will be offset by +1 when compared with the
123*09537850SAkhilesh Sanikop   // spec.
124*09537850SAkhilesh Sanikop   bool block_decoded[kMaxPlanes][kBlockDecodedStride][kBlockDecodedStride];
125*09537850SAkhilesh Sanikop };
126*09537850SAkhilesh Sanikop 
127*09537850SAkhilesh Sanikop class TileScratchBufferPool {
128*09537850SAkhilesh Sanikop  public:
Reset(int bitdepth)129*09537850SAkhilesh Sanikop   void Reset(int bitdepth) {
130*09537850SAkhilesh Sanikop     if (bitdepth_ == bitdepth) return;
131*09537850SAkhilesh Sanikop #if LIBGAV1_MAX_BITDEPTH >= 10
132*09537850SAkhilesh Sanikop     if (bitdepth_ == 8 && bitdepth != 8) {
133*09537850SAkhilesh Sanikop       // We are going from a pixel size of 1 to a pixel size of 2. So invalidate
134*09537850SAkhilesh Sanikop       // the stack.
135*09537850SAkhilesh Sanikop       std::lock_guard<std::mutex> lock(mutex_);
136*09537850SAkhilesh Sanikop       while (!buffers_.Empty()) {
137*09537850SAkhilesh Sanikop         buffers_.Pop();
138*09537850SAkhilesh Sanikop       }
139*09537850SAkhilesh Sanikop     }
140*09537850SAkhilesh Sanikop #endif
141*09537850SAkhilesh Sanikop     bitdepth_ = bitdepth;
142*09537850SAkhilesh Sanikop   }
143*09537850SAkhilesh Sanikop 
Get()144*09537850SAkhilesh Sanikop   std::unique_ptr<TileScratchBuffer> Get() {
145*09537850SAkhilesh Sanikop     std::lock_guard<std::mutex> lock(mutex_);
146*09537850SAkhilesh Sanikop     if (buffers_.Empty()) {
147*09537850SAkhilesh Sanikop       std::unique_ptr<TileScratchBuffer> scratch_buffer(new (std::nothrow)
148*09537850SAkhilesh Sanikop                                                             TileScratchBuffer);
149*09537850SAkhilesh Sanikop       if (scratch_buffer == nullptr || !scratch_buffer->Init(bitdepth_)) {
150*09537850SAkhilesh Sanikop         return nullptr;
151*09537850SAkhilesh Sanikop       }
152*09537850SAkhilesh Sanikop       return scratch_buffer;
153*09537850SAkhilesh Sanikop     }
154*09537850SAkhilesh Sanikop     return buffers_.Pop();
155*09537850SAkhilesh Sanikop   }
156*09537850SAkhilesh Sanikop 
Release(std::unique_ptr<TileScratchBuffer> scratch_buffer)157*09537850SAkhilesh Sanikop   void Release(std::unique_ptr<TileScratchBuffer> scratch_buffer) {
158*09537850SAkhilesh Sanikop     std::lock_guard<std::mutex> lock(mutex_);
159*09537850SAkhilesh Sanikop     buffers_.Push(std::move(scratch_buffer));
160*09537850SAkhilesh Sanikop   }
161*09537850SAkhilesh Sanikop 
162*09537850SAkhilesh Sanikop  private:
163*09537850SAkhilesh Sanikop   std::mutex mutex_;
164*09537850SAkhilesh Sanikop   // We will never need more than kMaxThreads scratch buffers since that is the
165*09537850SAkhilesh Sanikop   // maximum amount of work that will be done at any given time.
166*09537850SAkhilesh Sanikop   Stack<std::unique_ptr<TileScratchBuffer>, kMaxThreads> buffers_
167*09537850SAkhilesh Sanikop       LIBGAV1_GUARDED_BY(mutex_);
168*09537850SAkhilesh Sanikop   int bitdepth_ = 0;
169*09537850SAkhilesh Sanikop };
170*09537850SAkhilesh Sanikop 
171*09537850SAkhilesh Sanikop }  // namespace libgav1
172*09537850SAkhilesh Sanikop 
173*09537850SAkhilesh Sanikop #endif  // LIBGAV1_SRC_TILE_SCRATCH_BUFFER_H_
174