1*09537850SAkhilesh Sanikop /* 2*09537850SAkhilesh Sanikop * Copyright 2019 The libgav1 Authors 3*09537850SAkhilesh Sanikop * 4*09537850SAkhilesh Sanikop * Licensed under the Apache License, Version 2.0 (the "License"); 5*09537850SAkhilesh Sanikop * you may not use this file except in compliance with the License. 6*09537850SAkhilesh Sanikop * You may obtain a copy of the License at 7*09537850SAkhilesh Sanikop * 8*09537850SAkhilesh Sanikop * http://www.apache.org/licenses/LICENSE-2.0 9*09537850SAkhilesh Sanikop * 10*09537850SAkhilesh Sanikop * Unless required by applicable law or agreed to in writing, software 11*09537850SAkhilesh Sanikop * distributed under the License is distributed on an "AS IS" BASIS, 12*09537850SAkhilesh Sanikop * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13*09537850SAkhilesh Sanikop * See the License for the specific language governing permissions and 14*09537850SAkhilesh Sanikop * limitations under the License. 15*09537850SAkhilesh Sanikop */ 16*09537850SAkhilesh Sanikop 17*09537850SAkhilesh Sanikop #ifndef LIBGAV1_SRC_TILE_SCRATCH_BUFFER_H_ 18*09537850SAkhilesh Sanikop #define LIBGAV1_SRC_TILE_SCRATCH_BUFFER_H_ 19*09537850SAkhilesh Sanikop 20*09537850SAkhilesh Sanikop #include <cstddef> 21*09537850SAkhilesh Sanikop #include <cstdint> 22*09537850SAkhilesh Sanikop #include <cstring> 23*09537850SAkhilesh Sanikop #include <memory> 24*09537850SAkhilesh Sanikop #include <mutex> // NOLINT (unapproved c++11 header) 25*09537850SAkhilesh Sanikop #include <new> 26*09537850SAkhilesh Sanikop #include <utility> 27*09537850SAkhilesh Sanikop 28*09537850SAkhilesh Sanikop #include "src/dsp/constants.h" 29*09537850SAkhilesh Sanikop #include "src/utils/common.h" 30*09537850SAkhilesh Sanikop #include "src/utils/compiler_attributes.h" 31*09537850SAkhilesh Sanikop #include "src/utils/constants.h" 32*09537850SAkhilesh Sanikop #include "src/utils/memory.h" 33*09537850SAkhilesh Sanikop #include "src/utils/stack.h" 34*09537850SAkhilesh Sanikop 35*09537850SAkhilesh Sanikop namespace libgav1 { 36*09537850SAkhilesh Sanikop 37*09537850SAkhilesh Sanikop // Buffer to facilitate decoding a superblock. 38*09537850SAkhilesh Sanikop struct TileScratchBuffer : public MaxAlignedAllocable { 39*09537850SAkhilesh Sanikop static constexpr int kBlockDecodedStride = 34; 40*09537850SAkhilesh Sanikop InitTileScratchBuffer41*09537850SAkhilesh Sanikop LIBGAV1_MUST_USE_RESULT bool Init(int bitdepth) { 42*09537850SAkhilesh Sanikop #if LIBGAV1_MAX_BITDEPTH >= 10 43*09537850SAkhilesh Sanikop const int pixel_size = (bitdepth == 8) ? 1 : 2; 44*09537850SAkhilesh Sanikop #else 45*09537850SAkhilesh Sanikop assert(bitdepth == 8); 46*09537850SAkhilesh Sanikop static_cast<void>(bitdepth); 47*09537850SAkhilesh Sanikop const int pixel_size = 1; 48*09537850SAkhilesh Sanikop #endif 49*09537850SAkhilesh Sanikop 50*09537850SAkhilesh Sanikop static_assert(kConvolveScaleBorderRight >= kConvolveBorderRight, ""); 51*09537850SAkhilesh Sanikop constexpr int unaligned_convolve_buffer_stride = 52*09537850SAkhilesh Sanikop kMaxScaledSuperBlockSizeInPixels + kConvolveBorderLeftTop + 53*09537850SAkhilesh Sanikop kConvolveScaleBorderRight; 54*09537850SAkhilesh Sanikop convolve_block_buffer_stride = Align<ptrdiff_t>( 55*09537850SAkhilesh Sanikop unaligned_convolve_buffer_stride * pixel_size, kMaxAlignment); 56*09537850SAkhilesh Sanikop constexpr int convolve_buffer_height = kMaxScaledSuperBlockSizeInPixels + 57*09537850SAkhilesh Sanikop kConvolveBorderLeftTop + 58*09537850SAkhilesh Sanikop kConvolveBorderBottom; 59*09537850SAkhilesh Sanikop 60*09537850SAkhilesh Sanikop convolve_block_buffer = MakeAlignedUniquePtr<uint8_t>( 61*09537850SAkhilesh Sanikop kMaxAlignment, convolve_buffer_height * convolve_block_buffer_stride); 62*09537850SAkhilesh Sanikop #if LIBGAV1_MSAN 63*09537850SAkhilesh Sanikop // Quiet msan warnings in ConvolveScale2D_NEON(). Set with random non-zero 64*09537850SAkhilesh Sanikop // value to aid in future debugging. 65*09537850SAkhilesh Sanikop memset(convolve_block_buffer.get(), 0x66, 66*09537850SAkhilesh Sanikop convolve_buffer_height * convolve_block_buffer_stride); 67*09537850SAkhilesh Sanikop #endif 68*09537850SAkhilesh Sanikop 69*09537850SAkhilesh Sanikop return convolve_block_buffer != nullptr; 70*09537850SAkhilesh Sanikop } 71*09537850SAkhilesh Sanikop 72*09537850SAkhilesh Sanikop // kCompoundPredictionTypeDiffWeighted prediction mode needs a mask of the 73*09537850SAkhilesh Sanikop // prediction block size. This buffer is used to store that mask. The masks 74*09537850SAkhilesh Sanikop // will be created for the Y plane and will be re-used for the U & V planes. 75*09537850SAkhilesh Sanikop alignas(kMaxAlignment) uint8_t weight_mask[kMaxSuperBlockSizeSquareInPixels]; 76*09537850SAkhilesh Sanikop 77*09537850SAkhilesh Sanikop // For each instance of the TileScratchBuffer, only one of the following 78*09537850SAkhilesh Sanikop // buffers will be used at any given time, so it is ok to share them in a 79*09537850SAkhilesh Sanikop // union. 80*09537850SAkhilesh Sanikop union { 81*09537850SAkhilesh Sanikop // Buffers used for prediction process. 82*09537850SAkhilesh Sanikop // Compound prediction calculations always output 16-bit values. Depending 83*09537850SAkhilesh Sanikop // on the bitdepth the values may be treated as int16_t or uint16_t. See 84*09537850SAkhilesh Sanikop // src/dsp/convolve.cc and src/dsp/warp.cc for explanations. 85*09537850SAkhilesh Sanikop // Inter/intra calculations output Pixel values. 86*09537850SAkhilesh Sanikop // These buffers always use width as the stride. This enables packing the 87*09537850SAkhilesh Sanikop // values in and simplifies loads/stores for small values. 88*09537850SAkhilesh Sanikop 89*09537850SAkhilesh Sanikop // 10/12 bit compound prediction and 10/12 bit inter/intra prediction. 90*09537850SAkhilesh Sanikop alignas(kMaxAlignment) uint16_t 91*09537850SAkhilesh Sanikop prediction_buffer[2][kMaxSuperBlockSizeSquareInPixels]; 92*09537850SAkhilesh Sanikop // 8 bit compound prediction buffer. 93*09537850SAkhilesh Sanikop alignas(kMaxAlignment) int16_t 94*09537850SAkhilesh Sanikop compound_prediction_buffer_8bpp[2][kMaxSuperBlockSizeSquareInPixels]; 95*09537850SAkhilesh Sanikop 96*09537850SAkhilesh Sanikop // Union usage note: This is used only by functions in the "intra" 97*09537850SAkhilesh Sanikop // prediction path. 98*09537850SAkhilesh Sanikop // 99*09537850SAkhilesh Sanikop // Buffer used for storing subsampled luma samples needed for CFL 100*09537850SAkhilesh Sanikop // prediction. This buffer is used to avoid repetition of the subsampling 101*09537850SAkhilesh Sanikop // for the V plane when it is already done for the U plane. 102*09537850SAkhilesh Sanikop int16_t cfl_luma_buffer[kCflLumaBufferStride][kCflLumaBufferStride]; 103*09537850SAkhilesh Sanikop }; 104*09537850SAkhilesh Sanikop 105*09537850SAkhilesh Sanikop // Buffer used for convolve. The maximum size required for this buffer is: 106*09537850SAkhilesh Sanikop // maximum block height (with scaling and border) = 2 * 128 + 3 + 4 = 263. 107*09537850SAkhilesh Sanikop // maximum block stride (with scaling and border aligned to 16) = 108*09537850SAkhilesh Sanikop // (2 * 128 + 3 + 8 + 5) * pixel_size = 272 * pixel_size. 109*09537850SAkhilesh Sanikop // Where pixel_size is (bitdepth == 8) ? 1 : 2. 110*09537850SAkhilesh Sanikop // Has an alignment of kMaxAlignment when allocated. 111*09537850SAkhilesh Sanikop AlignedUniquePtr<uint8_t> convolve_block_buffer; 112*09537850SAkhilesh Sanikop ptrdiff_t convolve_block_buffer_stride; 113*09537850SAkhilesh Sanikop 114*09537850SAkhilesh Sanikop // Flag indicating whether the data in |cfl_luma_buffer| is valid. 115*09537850SAkhilesh Sanikop bool cfl_luma_buffer_valid; 116*09537850SAkhilesh Sanikop 117*09537850SAkhilesh Sanikop // Equivalent to BlockDecoded array in the spec. This stores the decoded 118*09537850SAkhilesh Sanikop // state of every 4x4 block in a superblock. It has 1 row/column border on 119*09537850SAkhilesh Sanikop // all 4 sides (hence the 34x34 dimension instead of 32x32). Note that the 120*09537850SAkhilesh Sanikop // spec uses "-1" as an index to access the left and top borders. In the 121*09537850SAkhilesh Sanikop // code, we treat the index (1, 1) as equivalent to the spec's (0, 0). So 122*09537850SAkhilesh Sanikop // all accesses into this array will be offset by +1 when compared with the 123*09537850SAkhilesh Sanikop // spec. 124*09537850SAkhilesh Sanikop bool block_decoded[kMaxPlanes][kBlockDecodedStride][kBlockDecodedStride]; 125*09537850SAkhilesh Sanikop }; 126*09537850SAkhilesh Sanikop 127*09537850SAkhilesh Sanikop class TileScratchBufferPool { 128*09537850SAkhilesh Sanikop public: Reset(int bitdepth)129*09537850SAkhilesh Sanikop void Reset(int bitdepth) { 130*09537850SAkhilesh Sanikop if (bitdepth_ == bitdepth) return; 131*09537850SAkhilesh Sanikop #if LIBGAV1_MAX_BITDEPTH >= 10 132*09537850SAkhilesh Sanikop if (bitdepth_ == 8 && bitdepth != 8) { 133*09537850SAkhilesh Sanikop // We are going from a pixel size of 1 to a pixel size of 2. So invalidate 134*09537850SAkhilesh Sanikop // the stack. 135*09537850SAkhilesh Sanikop std::lock_guard<std::mutex> lock(mutex_); 136*09537850SAkhilesh Sanikop while (!buffers_.Empty()) { 137*09537850SAkhilesh Sanikop buffers_.Pop(); 138*09537850SAkhilesh Sanikop } 139*09537850SAkhilesh Sanikop } 140*09537850SAkhilesh Sanikop #endif 141*09537850SAkhilesh Sanikop bitdepth_ = bitdepth; 142*09537850SAkhilesh Sanikop } 143*09537850SAkhilesh Sanikop Get()144*09537850SAkhilesh Sanikop std::unique_ptr<TileScratchBuffer> Get() { 145*09537850SAkhilesh Sanikop std::lock_guard<std::mutex> lock(mutex_); 146*09537850SAkhilesh Sanikop if (buffers_.Empty()) { 147*09537850SAkhilesh Sanikop std::unique_ptr<TileScratchBuffer> scratch_buffer(new (std::nothrow) 148*09537850SAkhilesh Sanikop TileScratchBuffer); 149*09537850SAkhilesh Sanikop if (scratch_buffer == nullptr || !scratch_buffer->Init(bitdepth_)) { 150*09537850SAkhilesh Sanikop return nullptr; 151*09537850SAkhilesh Sanikop } 152*09537850SAkhilesh Sanikop return scratch_buffer; 153*09537850SAkhilesh Sanikop } 154*09537850SAkhilesh Sanikop return buffers_.Pop(); 155*09537850SAkhilesh Sanikop } 156*09537850SAkhilesh Sanikop Release(std::unique_ptr<TileScratchBuffer> scratch_buffer)157*09537850SAkhilesh Sanikop void Release(std::unique_ptr<TileScratchBuffer> scratch_buffer) { 158*09537850SAkhilesh Sanikop std::lock_guard<std::mutex> lock(mutex_); 159*09537850SAkhilesh Sanikop buffers_.Push(std::move(scratch_buffer)); 160*09537850SAkhilesh Sanikop } 161*09537850SAkhilesh Sanikop 162*09537850SAkhilesh Sanikop private: 163*09537850SAkhilesh Sanikop std::mutex mutex_; 164*09537850SAkhilesh Sanikop // We will never need more than kMaxThreads scratch buffers since that is the 165*09537850SAkhilesh Sanikop // maximum amount of work that will be done at any given time. 166*09537850SAkhilesh Sanikop Stack<std::unique_ptr<TileScratchBuffer>, kMaxThreads> buffers_ 167*09537850SAkhilesh Sanikop LIBGAV1_GUARDED_BY(mutex_); 168*09537850SAkhilesh Sanikop int bitdepth_ = 0; 169*09537850SAkhilesh Sanikop }; 170*09537850SAkhilesh Sanikop 171*09537850SAkhilesh Sanikop } // namespace libgav1 172*09537850SAkhilesh Sanikop 173*09537850SAkhilesh Sanikop #endif // LIBGAV1_SRC_TILE_SCRATCH_BUFFER_H_ 174