/*
 * Copyright 2015 Google Inc.
 *
 * Use of this source code is governed by a BSD-style license that can be
 * found in the LICENSE file.
 */

#include "src/gpu/ganesh/GrBlurUtils.h"

#include "include/core/SkAlphaType.h"
#include "include/core/SkBitmap.h"
#include "include/core/SkBlendMode.h"
#include "include/core/SkBlurTypes.h"
#include "include/core/SkCanvas.h"
#include "include/core/SkColorSpace.h"
#include "include/core/SkData.h"
#include "include/core/SkImageInfo.h"
#include "include/core/SkM44.h"
#include "include/core/SkMatrix.h"
#include "include/core/SkPaint.h"
#include "include/core/SkPath.h"
#include "include/core/SkPoint.h"
#include "include/core/SkRRect.h"
#include "include/core/SkRect.h"
#include "include/core/SkRefCnt.h"
#include "include/core/SkRegion.h"
#include "include/core/SkSamplingOptions.h"
#include "include/core/SkScalar.h"
#include "include/core/SkSize.h"
#include "include/core/SkSpan.h"
#include "include/core/SkString.h"
#include "include/core/SkStrokeRec.h"
#include "include/core/SkSurface.h"
#include "include/core/SkSurfaceProps.h"
#include "include/core/SkTileMode.h"
#include "include/effects/SkRuntimeEffect.h"
#include "include/gpu/GpuTypes.h"
#include "include/gpu/ganesh/GrDirectContext.h"
#include "include/gpu/ganesh/GrRecordingContext.h"
#include "include/gpu/ganesh/GrTypes.h"
#include "include/private/SkColorData.h"
#include "include/private/base/SkAssert.h"
#include "include/private/base/SkFixed.h"
#include "include/private/base/SkFloatingPoint.h"
#include "include/private/base/SkMath.h"
#include "include/private/base/SkTemplates.h"
#include "include/private/gpu/ganesh/GrTypesPriv.h"
#include "src/base/SkFloatBits.h"
#include "src/base/SkTLazy.h"
#include "src/core/SkBlurMaskFilterImpl.h"
#include "src/core/SkDraw.h"
#include "src/core/SkMask.h"
#include "src/core/SkMaskFilterBase.h"
#include "src/core/SkRRectPriv.h"
#include "src/core/SkRuntimeEffectPriv.h"
#include "src/core/SkTraceEvent.h"
#include "src/gpu/BlurUtils.h"
#include "src/gpu/ResourceKey.h"
#include "src/gpu/SkBackingFit.h"
#include "src/gpu/Swizzle.h"
#include "src/gpu/ganesh/GrCaps.h"
#include "src/gpu/ganesh/GrClip.h"
#include "src/gpu/ganesh/GrColorInfo.h"
#include "src/gpu/ganesh/GrColorSpaceXform.h"
#include "src/gpu/ganesh/GrDirectContextPriv.h"
#include "src/gpu/ganesh/GrFixedClip.h"
#include "src/gpu/ganesh/GrFragmentProcessor.h"
#include "src/gpu/ganesh/GrFragmentProcessors.h"
#include "src/gpu/ganesh/GrPaint.h"
#include "src/gpu/ganesh/GrRecordingContextPriv.h"
#include "src/gpu/ganesh/GrSamplerState.h"
#include "src/gpu/ganesh/GrShaderCaps.h"
#include "src/gpu/ganesh/GrStyle.h"
#include "src/gpu/ganesh/GrSurfaceProxy.h"
#include "src/gpu/ganesh/GrSurfaceProxyView.h"
#include "src/gpu/ganesh/GrTextureProxy.h"
#include "src/gpu/ganesh/GrThreadSafeCache.h"
#include "src/gpu/ganesh/GrUtil.h"
#include "src/gpu/ganesh/SkGr.h"
#include "src/gpu/ganesh/SurfaceContext.h"
#include "src/gpu/ganesh/SurfaceDrawContext.h"
#include "src/gpu/ganesh/SurfaceFillContext.h"
#include "src/gpu/ganesh/effects/GrBlendFragmentProcessor.h"
#include "src/gpu/ganesh/effects/GrMatrixEffect.h"
#include "src/gpu/ganesh/effects/GrSkSLFP.h"
#include "src/gpu/ganesh/effects/GrTextureEffect.h"
#include "src/gpu/ganesh/geometry/GrStyledShape.h"

#include <algorithm>
#include <array>
#include <cstdint>
#include <initializer_list>
#include <memory>
#include <tuple>
#include <utility>

namespace GrBlurUtils {

static bool clip_bounds_quick_reject(const SkIRect& clipBounds, const SkIRect& rect) {
    return clipBounds.isEmpty() || rect.isEmpty() || !SkIRect::Intersects(clipBounds, rect);
}

static constexpr auto kMaskOrigin = kTopLeft_GrSurfaceOrigin;

// Draw a mask using the supplied paint. Since the coverage/geometry
// is already burnt into the mask this boils down to a rect draw.
// Return true if the mask was successfully drawn.
static bool draw_mask(skgpu::ganesh::SurfaceDrawContext* sdc,
                      const GrClip* clip,
                      const SkMatrix& viewMatrix,
                      const SkIRect& maskBounds,
                      GrPaint&& paint,
                      GrSurfaceProxyView mask) {
    SkMatrix inverse;
    if (!viewMatrix.invert(&inverse)) {
        return false;
    }

    mask.concatSwizzle(skgpu::Swizzle("aaaa"));

    SkMatrix matrix = SkMatrix::Translate(-SkIntToScalar(maskBounds.fLeft),
                                          -SkIntToScalar(maskBounds.fTop));
    matrix.preConcat(viewMatrix);
    paint.setCoverageFragmentProcessor(
            GrTextureEffect::Make(std::move(mask), kUnknown_SkAlphaType, matrix));

    sdc->fillPixelsWithLocalMatrix(clip, std::move(paint), maskBounds, inverse);
    return true;
}

static void mask_release_proc(void* addr, void* /*context*/) {
    SkMaskBuilder::FreeImage(addr);
}

// This stores the mapping from an unclipped, integerized, device-space, shape bounds to
// the filtered mask's draw rect.
struct DrawRectData {
    SkIVector fOffset;
    SkISize   fSize;
};

static sk_sp<SkData> create_data(const SkIRect& drawRect, const SkIRect& origDevBounds) {

    DrawRectData drawRectData { {drawRect.fLeft - origDevBounds.fLeft,
                                 drawRect.fTop - origDevBounds.fTop},
                                drawRect.size() };

    return SkData::MakeWithCopy(&drawRectData, sizeof(drawRectData));
}

static SkIRect extract_draw_rect_from_data(SkData* data, const SkIRect& origDevBounds) {
    auto drawRectData = static_cast<const DrawRectData*>(data->data());

    return SkIRect::MakeXYWH(origDevBounds.fLeft + drawRectData->fOffset.fX,
                             origDevBounds.fTop + drawRectData->fOffset.fY,
                             drawRectData->fSize.fWidth,
                             drawRectData->fSize.fHeight);
}

static GrSurfaceProxyView sw_create_filtered_mask(GrRecordingContext* rContext,
                                                  const SkMatrix& viewMatrix,
                                                  const GrStyledShape& shape,
                                                  const SkMaskFilter* filter,
                                                  const SkIRect& unclippedDevShapeBounds,
                                                  const SkIRect& clipBounds,
                                                  SkIRect* drawRect,
                                                  skgpu::UniqueKey* key) {
    SkASSERT(filter);
    SkASSERT(!shape.style().applies());

    auto threadSafeCache = rContext->priv().threadSafeCache();

    GrSurfaceProxyView filteredMaskView;
    sk_sp<SkData> data;

    if (key->isValid()) {
        std::tie(filteredMaskView, data) = threadSafeCache->findWithData(*key);
    }

    if (filteredMaskView) {
        SkASSERT(data);
        SkASSERT(kMaskOrigin == filteredMaskView.origin());

        *drawRect = extract_draw_rect_from_data(data.get(), unclippedDevShapeBounds);
    } else {
        SkStrokeRec::InitStyle fillOrHairline = shape.style().isSimpleHairline()
                                                        ? SkStrokeRec::kHairline_InitStyle
                                                        : SkStrokeRec::kFill_InitStyle;

        // TODO: it seems like we could create an SkDraw here and set its fMatrix field rather
        // than explicitly transforming the path to device space.
        SkPath devPath;

        shape.asPath(&devPath);

        devPath.transform(viewMatrix);

        SkMaskBuilder srcM, dstM;
        if (!SkDraw::DrawToMask(devPath, clipBounds, filter, &viewMatrix, &srcM,
                                SkMaskBuilder::kComputeBoundsAndRenderImage_CreateMode,
                                fillOrHairline)) {
            return {};
        }
        SkAutoMaskFreeImage autoSrc(srcM.image());

        SkASSERT(SkMask::kA8_Format == srcM.fFormat);

        if (!as_MFB(filter)->filterMask(&dstM, srcM, viewMatrix, nullptr)) {
            return {};
        }
        // this will free-up dstM when we're done (allocated in filterMask())
        SkAutoMaskFreeImage autoDst(dstM.image());

        if (clip_bounds_quick_reject(clipBounds, dstM.fBounds)) {
            return {};
        }

        // we now have a device-aligned 8bit mask in dstM, ready to be drawn using
        // the current clip (and identity matrix) and GrPaint settings
        SkBitmap bm;
        if (!bm.installPixels(SkImageInfo::MakeA8(dstM.fBounds.width(), dstM.fBounds.height()),
                              autoDst.release(), dstM.fRowBytes, mask_release_proc, nullptr)) {
            return {};
        }
        bm.setImmutable();

        std::tie(filteredMaskView, std::ignore) = GrMakeUncachedBitmapProxyView(
                rContext, bm, skgpu::Mipmapped::kNo, SkBackingFit::kApprox);
        if (!filteredMaskView) {
            return {};
        }

        SkASSERT(kMaskOrigin == filteredMaskView.origin());

        *drawRect = dstM.fBounds;

        if (key->isValid()) {
            key->setCustomData(create_data(*drawRect, unclippedDevShapeBounds));
            std::tie(filteredMaskView, data) = threadSafeCache->addWithData(*key, filteredMaskView);
            // If we got a different view back from 'addWithData' it could have a different drawRect
            *drawRect = extract_draw_rect_from_data(data.get(), unclippedDevShapeBounds);
        }
    }

    return filteredMaskView;
}

// Create a mask of 'shape' and return the resulting surfaceDrawContext
static std::unique_ptr<skgpu::ganesh::SurfaceDrawContext> create_mask_GPU(
        GrRecordingContext* rContext,
        const SkIRect& maskRect,
        const SkMatrix& origViewMatrix,
        const GrStyledShape& shape,
        int sampleCnt) {
    // We cache blur masks. Use default surface props here so we can use the same cached mask
    // regardless of the final dst surface.
    SkSurfaceProps defaultSurfaceProps;

    // Use GetApproxSize to implement our own approximate size matching, but demand
    // a "SkBackingFit::kExact" size match on the actual render target. We do this because the
    // filter will reach outside the src bounds, so we need to pre-clear these values to ensure a
    // "decal" sampling effect (i.e., ensure reads outside the src bounds return alpha=0).
    //
    // FIXME: Reads outside the left and top edges will actually clamp to the edge pixel. And in the
    // event that GetApproxSize does not change the size, reads outside the right and/or bottom will
    // do the same. We should offset our filter within the render target and expand the size as
    // needed to guarantee at least 1px of padding on all sides.
    auto approxSize = skgpu::GetApproxSize(maskRect.size());
    auto sdc = skgpu::ganesh::SurfaceDrawContext::MakeWithFallback(rContext,
                                                                   GrColorType::kAlpha_8,
                                                                   nullptr,
                                                                   SkBackingFit::kExact,
                                                                   approxSize,
                                                                   defaultSurfaceProps,
                                                                   sampleCnt,
                                                                   skgpu::Mipmapped::kNo,
                                                                   GrProtected::kNo,
                                                                   kMaskOrigin);
    if (!sdc) {
        return nullptr;
    }

    sdc->clear(SK_PMColor4fTRANSPARENT);

    GrPaint maskPaint;
    maskPaint.setCoverageSetOpXPFactory(SkRegion::kReplace_Op);

    // setup new clip
    GrFixedClip clip(sdc->dimensions(), SkIRect::MakeWH(maskRect.width(), maskRect.height()));

    // Draw the mask into maskTexture with the path's integerized top-left at the origin using
    // maskPaint.
    SkMatrix viewMatrix = origViewMatrix;
    viewMatrix.postTranslate(-SkIntToScalar(maskRect.fLeft), -SkIntToScalar(maskRect.fTop));
    sdc->drawShape(&clip, std::move(maskPaint), GrAA::kYes, viewMatrix, GrStyledShape(shape));
    return sdc;
}

static bool get_unclipped_shape_dev_bounds(const GrStyledShape& shape, const SkMatrix& matrix,
                                           SkIRect* devBounds) {
    SkRect shapeDevBounds;
    if (shape.inverseFilled()) {
        shapeDevBounds = {SK_ScalarNegativeInfinity, SK_ScalarNegativeInfinity,
                          SK_ScalarInfinity, SK_ScalarInfinity};
    } else {
        SkRect shapeBounds = shape.styledBounds();
        if (shapeBounds.isEmpty()) {
            return false;
        }
        matrix.mapRect(&shapeDevBounds, shapeBounds);
    }
    // Even though these are "unclipped" bounds we still clip to the int32_t range.
    // This is the largest int32_t that is representable exactly as a float. The next 63 larger ints
    // would round down to this value when cast to a float, but who really cares.
    // INT32_MIN is exactly representable.
    static constexpr int32_t kMaxInt = 2147483520;
    if (!shapeDevBounds.intersect(SkRect::MakeLTRB(INT32_MIN, INT32_MIN, kMaxInt, kMaxInt))) {
        return false;
    }
    // Make sure that the resulting SkIRect can have representable width and height
    if (SkScalarRoundToInt(shapeDevBounds.width()) > kMaxInt ||
        SkScalarRoundToInt(shapeDevBounds.height()) > kMaxInt) {
        return false;
    }
    shapeDevBounds.roundOut(devBounds);
    return true;
}

// Gets the shape bounds, the clip bounds, and the intersection (if any). Returns false if there
// is no intersection.
static bool get_shape_and_clip_bounds(skgpu::ganesh::SurfaceDrawContext* sdc,
                                      const GrClip* clip,
                                      const GrStyledShape& shape,
                                      const SkMatrix& matrix,
                                      SkIRect* unclippedDevShapeBounds,
                                      SkIRect* devClipBounds) {
    // compute bounds as intersection of rt size, clip, and path
    *devClipBounds = clip ? clip->getConservativeBounds()
                          : SkIRect::MakeWH(sdc->width(), sdc->height());

    if (!get_unclipped_shape_dev_bounds(shape, matrix, unclippedDevShapeBounds)) {
        *unclippedDevShapeBounds = SkIRect::MakeEmpty();
        return false;
    }

    return true;
}

/**
 *  If we cannot create a FragmentProcess for a mask filter, we might have special logic for
 *  it here. That code path requires constructing a src mask as input. Since that is a potentially
 *  expensive operation, this function tests if filter_mask would succeed if the mask
 *  were to be created.
 *
 *  'maskRect' returns the device space portion of the mask that the filter needs. The mask
 *  passed into 'filter_mask' should have the same extent as 'maskRect' but be
 *  translated to the upper-left corner of the mask (i.e., (maskRect.fLeft, maskRect.fTop)
 *  appears at (0, 0) in the mask).
 *
 * Logically, how this works is:
 *    can_filter_mask is called
 *    if (it returns true)
 *        the returned mask rect is used for quick rejecting
 *            the mask rect is used to generate the mask
 *            filter_mask is called to filter the mask
 *
 * TODO: this should work as:
 *    if (can_filter_mask(devShape, ...)) // rect, rrect, drrect, path
 *        filter_mask(devShape, ...)
 * this would hide the RRect special case and the mask generation
 */
static bool can_filter_mask(const SkMaskFilterBase* maskFilter,
                            const GrStyledShape& shape,
                            const SkIRect& devSpaceShapeBounds,
                            const SkIRect& clipBounds,
                            const SkMatrix& ctm,
                            SkIRect* maskRect) {
    if (maskFilter->type() != SkMaskFilterBase::Type::kBlur) {
        return false;
    }
    auto bmf = static_cast<const SkBlurMaskFilterImpl*>(maskFilter);
    SkScalar xformedSigma = bmf->computeXformedSigma(ctm);
    if (skgpu::BlurIsEffectivelyIdentity(xformedSigma)) {
        *maskRect = devSpaceShapeBounds;
        return maskRect->intersect(clipBounds);
    }

    if (maskRect) {
        float sigma3 = 3 * xformedSigma;

        // Outset srcRect and clipRect by 3 * sigma, to compute affected blur area.
        SkIRect clipRect = clipBounds.makeOutset(sigma3, sigma3);
        SkIRect srcRect = devSpaceShapeBounds.makeOutset(sigma3, sigma3);

        if (!srcRect.intersect(clipRect)) {
            srcRect.setEmpty();
        }
        *maskRect = srcRect;
    }

    // We prefer to blur paths with small blur radii on the CPU.
    static const SkScalar kMIN_GPU_BLUR_SIZE  = SkIntToScalar(64);
    static const SkScalar kMIN_GPU_BLUR_SIGMA = SkIntToScalar(32);

    if (devSpaceShapeBounds.width() <= kMIN_GPU_BLUR_SIZE &&
        devSpaceShapeBounds.height() <= kMIN_GPU_BLUR_SIZE &&
        xformedSigma <= kMIN_GPU_BLUR_SIGMA) {
        return false;
    }

    return true;
}

///////////////////////////////////////////////////////////////////////////////
//  Circle Blur
///////////////////////////////////////////////////////////////////////////////

static std::unique_ptr<GrFragmentProcessor> create_profile_effect(GrRecordingContext* rContext,
                                                                  const SkRect& circle,
                                                                  float sigma,
                                                                  float* solidRadius,
                                                                  float* textureRadius) {
    float circleR = circle.width() / 2.0f;
    if (!SkIsFinite(circleR) || circleR < SK_ScalarNearlyZero) {
        return nullptr;
    }

    auto threadSafeCache = rContext->priv().threadSafeCache();

    // Profile textures are cached by the ratio of sigma to circle radius and by the size of the
    // profile texture (binned by powers of 2).
    SkScalar sigmaToCircleRRatio = sigma / circleR;
    // When sigma is really small this becomes a equivalent to convolving a Gaussian with a
    // half-plane. Similarly, in the extreme high ratio cases circle becomes a point WRT to the
    // Guassian and the profile texture is a just a Gaussian evaluation. However, we haven't yet
    // implemented this latter optimization.
    sigmaToCircleRRatio = std::min(sigmaToCircleRRatio, 8.f);
    SkFixed sigmaToCircleRRatioFixed;
    static const SkScalar kHalfPlaneThreshold = 0.1f;
    bool useHalfPlaneApprox = false;
    if (sigmaToCircleRRatio <= kHalfPlaneThreshold) {
        useHalfPlaneApprox = true;
        sigmaToCircleRRatioFixed = 0;
        *solidRadius = circleR - 3 * sigma;
        *textureRadius = 6 * sigma;
    } else {
        // Convert to fixed point for the key.
        sigmaToCircleRRatioFixed = SkScalarToFixed(sigmaToCircleRRatio);
        // We shave off some bits to reduce the number of unique entries. We could probably
        // shave off more than we do.
        sigmaToCircleRRatioFixed &= ~0xff;
        sigmaToCircleRRatio = SkFixedToScalar(sigmaToCircleRRatioFixed);
        sigma = circleR * sigmaToCircleRRatio;
        *solidRadius = 0;
        *textureRadius = circleR + 3 * sigma;
    }

    static constexpr int kProfileTextureWidth = 512;
    // This would be kProfileTextureWidth/textureRadius if it weren't for the fact that we do
    // the calculation of the profile coord in a coord space that has already been scaled by
    // 1 / textureRadius. This is done to avoid overflow in length().
    SkMatrix texM = SkMatrix::Scale(kProfileTextureWidth, 1.f);

    static const skgpu::UniqueKey::Domain kDomain = skgpu::UniqueKey::GenerateDomain();
    skgpu::UniqueKey key;
    skgpu::UniqueKey::Builder builder(&key, kDomain, 1, "1-D Circular Blur");
    builder[0] = sigmaToCircleRRatioFixed;
    builder.finish();

    GrSurfaceProxyView profileView = threadSafeCache->find(key);
    if (profileView) {
        SkASSERT(profileView.asTextureProxy());
        SkASSERT(profileView.origin() == kTopLeft_GrSurfaceOrigin);
        return GrTextureEffect::Make(std::move(profileView), kPremul_SkAlphaType, texM);
    }

    SkBitmap bm;
    if (useHalfPlaneApprox) {
        bm = skgpu::CreateHalfPlaneProfile(kProfileTextureWidth);
    } else {
        // Rescale params to the size of the texture we're creating.
        SkScalar scale = kProfileTextureWidth / *textureRadius;
        bm = skgpu::CreateCircleProfile(sigma * scale, circleR * scale, kProfileTextureWidth);
    }

    profileView = std::get<0>(GrMakeUncachedBitmapProxyView(rContext, bm));
    if (!profileView) {
        return nullptr;
    }

    profileView = threadSafeCache->add(key, profileView);
    return GrTextureEffect::Make(std::move(profileView), kPremul_SkAlphaType, texM);
}

static std::unique_ptr<GrFragmentProcessor> make_circle_blur(GrRecordingContext* context,
                                                             const SkRect& circle,
                                                             float sigma) {
    if (skgpu::BlurIsEffectivelyIdentity(sigma)) {
        return nullptr;
    }

    float solidRadius;
    float textureRadius;
    std::unique_ptr<GrFragmentProcessor> profile =
            create_profile_effect(context, circle, sigma, &solidRadius, &textureRadius);
    if (!profile) {
        return nullptr;
    }

    static const SkRuntimeEffect* effect = SkMakeRuntimeEffect(SkRuntimeEffect::MakeForShader,
        "uniform shader blurProfile;"
        "uniform half4 circleData;"

        "half4 main(float2 xy) {"
            // We just want to compute "(length(vec) - circleData.z + 0.5) * circleData.w" but need
            // to rearrange to avoid passing large values to length() that would overflow.
            "half2 vec = half2((sk_FragCoord.xy - circleData.xy) * circleData.w);"
            "half dist = length(vec) + (0.5 - circleData.z) * circleData.w;"
            "return blurProfile.eval(half2(dist, 0.5)).aaaa;"
        "}"
    );

    SkV4 circleData = {circle.centerX(), circle.centerY(), solidRadius, 1.f / textureRadius};
    auto circleBlurFP = GrSkSLFP::Make(effect, "CircleBlur", /*inputFP=*/nullptr,
                                       GrSkSLFP::OptFlags::kCompatibleWithCoverageAsAlpha,
                                       "blurProfile", GrSkSLFP::IgnoreOptFlags(std::move(profile)),
                                       "circleData", circleData);
    // Modulate blur with the input color.
    return GrBlendFragmentProcessor::Make<SkBlendMode::kModulate>(std::move(circleBlurFP),
                                                                  /*dst=*/nullptr);
}

///////////////////////////////////////////////////////////////////////////////
//  Rect Blur
///////////////////////////////////////////////////////////////////////////////

static std::unique_ptr<GrFragmentProcessor> make_rect_integral_fp(GrRecordingContext* rContext,
                                                                  float sixSigma) {
    SkASSERT(!skgpu::BlurIsEffectivelyIdentity(sixSigma / 6.f));
    auto threadSafeCache = rContext->priv().threadSafeCache();

    int width = skgpu::ComputeIntegralTableWidth(sixSigma);

    static const skgpu::UniqueKey::Domain kDomain = skgpu::UniqueKey::GenerateDomain();
    skgpu::UniqueKey key;
    skgpu::UniqueKey::Builder builder(&key, kDomain, 1, "Rect Blur Mask");
    builder[0] = width;
    builder.finish();

    SkMatrix m = SkMatrix::Scale(width / sixSigma, 1.f);

    GrSurfaceProxyView view = threadSafeCache->find(key);

    if (view) {
        SkASSERT(view.origin() == kTopLeft_GrSurfaceOrigin);
        return GrTextureEffect::Make(
                std::move(view), kPremul_SkAlphaType, m, GrSamplerState::Filter::kLinear);
    }

    SkBitmap bitmap = skgpu::CreateIntegralTable(width);
    if (bitmap.empty()) {
        return {};
    }

    view = std::get<0>(GrMakeUncachedBitmapProxyView(rContext, bitmap));
    if (!view) {
        return {};
    }

    view = threadSafeCache->add(key, view);

    SkASSERT(view.origin() == kTopLeft_GrSurfaceOrigin);
    return GrTextureEffect::Make(
            std::move(view), kPremul_SkAlphaType, m, GrSamplerState::Filter::kLinear);
}

static std::unique_ptr<GrFragmentProcessor> make_rect_blur(GrRecordingContext* context,
                                                           const GrShaderCaps& caps,
                                                           const SkRect& srcRect,
                                                           const SkMatrix& viewMatrix,
                                                           float transformedSigma) {
    SkASSERT(viewMatrix.preservesRightAngles());
    SkASSERT(srcRect.isSorted());

    if (skgpu::BlurIsEffectivelyIdentity(transformedSigma)) {
        // No need to blur the rect
        return nullptr;
    }

    SkMatrix invM;
    SkRect rect;
    if (viewMatrix.rectStaysRect()) {
        invM = SkMatrix::I();
        // We can do everything in device space when the src rect projects to a rect in device space
        SkAssertResult(viewMatrix.mapRect(&rect, srcRect));
    } else {
        // The view matrix may scale, perhaps anisotropically. But we want to apply our device space
        // "transformedSigma" to the delta of frag coord from the rect edges. Factor out the scaling
        // to define a space that is purely rotation/translation from device space (and scale from
        // src space) We'll meet in the middle: pre-scale the src rect to be in this space and then
        // apply the inverse of the rotation/translation portion to the frag coord.
        SkMatrix m;
        SkSize scale;
        if (!viewMatrix.decomposeScale(&scale, &m)) {
            return nullptr;
        }
        if (!m.invert(&invM)) {
            return nullptr;
        }
        rect = {srcRect.left() * scale.width(),
                srcRect.top() * scale.height(),
                srcRect.right() * scale.width(),
                srcRect.bottom() * scale.height()};
    }

    if (!caps.fFloatIs32Bits) {
        // We promote the math that gets us into the Gaussian space to full float when the rect
        // coords are large. If we don't have full float then fail. We could probably clip the rect
        // to an outset device bounds instead.
        if (SkScalarAbs(rect.fLeft) > 16000.f || SkScalarAbs(rect.fTop) > 16000.f ||
            SkScalarAbs(rect.fRight) > 16000.f || SkScalarAbs(rect.fBottom) > 16000.f) {
            return nullptr;
        }
    }

    const float sixSigma = 6 * transformedSigma;
    std::unique_ptr<GrFragmentProcessor> integral = make_rect_integral_fp(context, sixSigma);
    if (!integral) {
        return nullptr;
    }

    // In the fast variant we think of the midpoint of the integral texture as aligning with the
    // closest rect edge both in x and y. To simplify texture coord calculation we inset the rect so
    // that the edge of the inset rect corresponds to t = 0 in the texture. It actually simplifies
    // things a bit in the !isFast case, too.
    float threeSigma = sixSigma / 2;
    SkRect insetRect = {rect.left() + threeSigma,
                        rect.top() + threeSigma,
                        rect.right() - threeSigma,
                        rect.bottom() - threeSigma};

    // In our fast variant we find the nearest horizontal and vertical edges and for each do a
    // lookup in the integral texture for each and multiply them. When the rect is less than 6 sigma
    // wide then things aren't so simple and we have to consider both the left and right edge of the
    // rectangle (and similar in y).
    bool isFast = insetRect.isSorted();

    static const SkRuntimeEffect* effect = SkMakeRuntimeEffect(SkRuntimeEffect::MakeForShader,
        // Effect that is a LUT for integral of normal distribution. The value at x:[0,6*sigma] is
        // the integral from -inf to (3*sigma - x). I.e. x is mapped from [0, 6*sigma] to
        // [3*sigma to -3*sigma]. The flip saves a reversal in the shader.
        "uniform shader integral;"

        "uniform float4 rect;"
        "uniform int isFast;"  // specialized

        "half4 main(float2 pos) {"
            "half xCoverage, yCoverage;"
            "if (bool(isFast)) {"
                // Get the smaller of the signed distance from the frag coord to the left and right
                // edges and similar for y.
                // The integral texture goes "backwards" (from 3*sigma to -3*sigma), So, the below
                // computations align the left edge of the integral texture with the inset rect's
                // edge extending outward 6 * sigma from the inset rect.
                "half2 xy = max(half2(rect.LT - pos), half2(pos - rect.RB));"
                "xCoverage = integral.eval(half2(xy.x, 0.5)).a;"
                "yCoverage = integral.eval(half2(xy.y, 0.5)).a;"
            "} else {"
                // We just consider just the x direction here. In practice we compute x and y
                // separately and multiply them together.
                // We define our coord system so that the point at which we're evaluating a kernel
                // defined by the normal distribution (K) at 0. In this coord system let L be left
                // edge and R be the right edge of the rectangle.
                // We can calculate C by integrating K with the half infinite ranges outside the
                // L to R range and subtracting from 1:
                //   C = 1 - <integral of K from from -inf to  L> - <integral of K from R to inf>
                // K is symmetric about x=0 so:
                //   C = 1 - <integral of K from from -inf to  L> - <integral of K from -inf to -R>

                // The integral texture goes "backwards" (from 3*sigma to -3*sigma) which is
                // factored in to the below calculations.
                // Also, our rect uniform was pre-inset by 3 sigma from the actual rect being
                // blurred, also factored in.
                "half4 rect = half4(half2(rect.LT - pos), half2(pos - rect.RB));"
                "xCoverage = 1 - integral.eval(half2(rect.L, 0.5)).a"
                              "- integral.eval(half2(rect.R, 0.5)).a;"
                "yCoverage = 1 - integral.eval(half2(rect.T, 0.5)).a"
                              "- integral.eval(half2(rect.B, 0.5)).a;"
            "}"
            "return half4(xCoverage * yCoverage);"
        "}"
    );

    std::unique_ptr<GrFragmentProcessor> fp =
            GrSkSLFP::Make(effect, "RectBlur", /*inputFP=*/nullptr,
                           GrSkSLFP::OptFlags::kCompatibleWithCoverageAsAlpha,
                           "integral", GrSkSLFP::IgnoreOptFlags(std::move(integral)),
                           "rect", insetRect,
                           "isFast", GrSkSLFP::Specialize<int>(isFast));
    // Modulate blur with the input color.
    fp = GrBlendFragmentProcessor::Make<SkBlendMode::kModulate>(std::move(fp),
                                                                /*dst=*/nullptr);
    if (!invM.isIdentity()) {
        fp = GrMatrixEffect::Make(invM, std::move(fp));
    }
    return GrFragmentProcessor::DeviceSpace(std::move(fp));
}

///////////////////////////////////////////////////////////////////////////////
//  RRect Blur
///////////////////////////////////////////////////////////////////////////////

static constexpr auto kBlurredRRectMaskOrigin = kTopLeft_GrSurfaceOrigin;

static void make_blurred_rrect_key(skgpu::UniqueKey* key,
                                   const SkRRect& rrectToDraw,
                                   float xformedSigma) {
    SkASSERT(!skgpu::BlurIsEffectivelyIdentity(xformedSigma));
    static const skgpu::UniqueKey::Domain kDomain = skgpu::UniqueKey::GenerateDomain();

    skgpu::UniqueKey::Builder builder(key, kDomain, 9, "RoundRect Blur Mask");
    builder[0] = SkScalarCeilToInt(xformedSigma - 1 / 6.0f);

    int index = 1;
    // TODO: this is overkill for _simple_ circular rrects
    for (auto c : {SkRRect::kUpperLeft_Corner,
                   SkRRect::kUpperRight_Corner,
                   SkRRect::kLowerRight_Corner,
                   SkRRect::kLowerLeft_Corner}) {
        SkASSERT(SkScalarIsInt(rrectToDraw.radii(c).fX) && SkScalarIsInt(rrectToDraw.radii(c).fY));
        builder[index++] = SkScalarCeilToInt(rrectToDraw.radii(c).fX);
        builder[index++] = SkScalarCeilToInt(rrectToDraw.radii(c).fY);
    }
    builder.finish();
}

static bool fillin_view_on_gpu(GrDirectContext* dContext,
                               const GrSurfaceProxyView& lazyView,
                               GrThreadSafeCache::Trampoline* trampoline,
                               const SkRRect& rrectToDraw,
                               const SkISize& dimensions,
                               float xformedSigma) {
    SkASSERT(!skgpu::BlurIsEffectivelyIdentity(xformedSigma));

    // We cache blur masks. Use default surface props here so we can use the same cached mask
    // regardless of the final dst surface.
    SkSurfaceProps defaultSurfaceProps;

    std::unique_ptr<skgpu::ganesh::SurfaceDrawContext> sdc =
            skgpu::ganesh::SurfaceDrawContext::MakeWithFallback(dContext,
                                                                GrColorType::kAlpha_8,
                                                                nullptr,
                                                                SkBackingFit::kExact,
                                                                dimensions,
                                                                defaultSurfaceProps,
                                                                1,
                                                                skgpu::Mipmapped::kNo,
                                                                GrProtected::kNo,
                                                                kBlurredRRectMaskOrigin);
    if (!sdc) {
        return false;
    }

    GrPaint paint;

    sdc->clear(SK_PMColor4fTRANSPARENT);
    sdc->drawRRect(nullptr,
                   std::move(paint),
                   GrAA::kYes,
                   SkMatrix::I(),
                   rrectToDraw,
                   GrStyle::SimpleFill());

    GrSurfaceProxyView srcView = sdc->readSurfaceView();
    SkASSERT(srcView.asTextureProxy());
    auto rtc2 = GaussianBlur(dContext,
                             std::move(srcView),
                             sdc->colorInfo().colorType(),
                             sdc->colorInfo().alphaType(),
                             nullptr,
                             SkIRect::MakeSize(dimensions),
                             SkIRect::MakeSize(dimensions),
                             xformedSigma,
                             xformedSigma,
                             SkTileMode::kClamp,
                             SkBackingFit::kExact);
    if (!rtc2 || !rtc2->readSurfaceView()) {
        return false;
    }

    auto view = rtc2->readSurfaceView();
    SkASSERT(view.swizzle() == lazyView.swizzle());
    SkASSERT(view.origin() == lazyView.origin());
    trampoline->fProxy = view.asTextureProxyRef();

    return true;
}

// Create a cpu-side blurred-rrect mask that is close to the version the gpu would've produced.
// The match needs to be close bc the cpu- and gpu-generated version must be interchangeable.
static GrSurfaceProxyView create_mask_on_cpu(GrRecordingContext* rContext,
                                             const SkRRect& rrectToDraw,
                                             const SkISize& dimensions,
                                             float xformedSigma) {
    SkBitmap result = skgpu::CreateRRectBlurMask(rrectToDraw, dimensions, xformedSigma);
    if (result.empty()) {
        return {};
    }

    auto view = std::get<0>(GrMakeUncachedBitmapProxyView(rContext, result));
    if (!view) {
        return {};
    }

    SkASSERT(view.origin() == kBlurredRRectMaskOrigin);
    return view;
}

static std::unique_ptr<GrFragmentProcessor> find_or_create_rrect_blur_mask_fp(
        GrRecordingContext* rContext,
        const SkRRect& rrectToDraw,
        const SkISize& dimensions,
        float xformedSigma) {
    SkASSERT(!skgpu::BlurIsEffectivelyIdentity(xformedSigma));
    skgpu::UniqueKey key;
    make_blurred_rrect_key(&key, rrectToDraw, xformedSigma);

    auto threadSafeCache = rContext->priv().threadSafeCache();

    // It seems like we could omit this matrix and modify the shader code to not normalize
    // the coords used to sample the texture effect. However, the "proxyDims" value in the
    // shader is not always the actual the proxy dimensions. This is because 'dimensions' here
    // was computed using integer corner radii as determined in
    // SkComputeBlurredRRectParams whereas the shader code uses the float radius to compute
    // 'proxyDims'. Why it draws correctly with these unequal values is a mystery for the ages.
    auto m = SkMatrix::Scale(dimensions.width(), dimensions.height());

    GrSurfaceProxyView view;

    if (GrDirectContext* dContext = rContext->asDirectContext()) {
        // The gpu thread gets priority over the recording threads. If the gpu thread is first,
        // it crams a lazy proxy into the cache and then fills it in later.
        auto [lazyView, trampoline] = GrThreadSafeCache::CreateLazyView(dContext,
                                                                        GrColorType::kAlpha_8,
                                                                        dimensions,
                                                                        kBlurredRRectMaskOrigin,
                                                                        SkBackingFit::kExact);
        if (!lazyView) {
            return nullptr;
        }

        view = threadSafeCache->findOrAdd(key, lazyView);
        if (view != lazyView) {
            SkASSERT(view.asTextureProxy());
            SkASSERT(view.origin() == kBlurredRRectMaskOrigin);
            return GrTextureEffect::Make(std::move(view), kPremul_SkAlphaType, m);
        }

        if (!fillin_view_on_gpu(dContext,
                                lazyView,
                                trampoline.get(),
                                rrectToDraw,
                                dimensions,
                                xformedSigma)) {
            // In this case something has gone disastrously wrong so set up to drop the draw
            // that needed this resource and reduce future pollution of the cache.
            threadSafeCache->remove(key);
            return nullptr;
        }
    } else {
        view = threadSafeCache->find(key);
        if (view) {
            SkASSERT(view.asTextureProxy());
            SkASSERT(view.origin() == kBlurredRRectMaskOrigin);
            return GrTextureEffect::Make(std::move(view), kPremul_SkAlphaType, m);
        }

        view = create_mask_on_cpu(rContext, rrectToDraw, dimensions, xformedSigma);
        if (!view) {
            return nullptr;
        }

        view = threadSafeCache->add(key, view);
    }

    SkASSERT(view.asTextureProxy());
    SkASSERT(view.origin() == kBlurredRRectMaskOrigin);
    return GrTextureEffect::Make(std::move(view), kPremul_SkAlphaType, m);
}

static std::unique_ptr<GrFragmentProcessor> make_rrect_blur(GrRecordingContext* context,
                                                            float sigma,
                                                            float xformedSigma,
                                                            const SkRRect& srcRRect,
                                                            const SkRRect& devRRect) {
    SkASSERTF(!SkRRectPriv::IsCircle(devRRect),
              "Unexpected circle. %d\n\t%s\n\t%s",
              SkRRectPriv::IsCircle(srcRRect),
              srcRRect.dumpToString(true).c_str(),
              devRRect.dumpToString(true).c_str());
    SkASSERTF(!devRRect.isRect(),
              "Unexpected rect. %d\n\t%s\n\t%s",
              srcRRect.isRect(),
              srcRRect.dumpToString(true).c_str(),
              devRRect.dumpToString(true).c_str());

    // TODO: loosen this up
    if (!SkRRectPriv::IsSimpleCircular(devRRect)) {
        return nullptr;
    }

    if (skgpu::BlurIsEffectivelyIdentity(xformedSigma)) {
        return nullptr;
    }

    // Make sure we can successfully ninepatch this rrect -- the blur sigma has to be sufficiently
    // small relative to both the size of the corner radius and the width (and height) of the rrect.
    SkRRect rrectToDraw;
    SkISize dimensions;
    SkScalar ignored[kBlurRRectMaxDivisions];

    bool ninePatchable = ComputeBlurredRRectParams(srcRRect,
                                                   devRRect,
                                                   sigma,
                                                   xformedSigma,
                                                   &rrectToDraw,
                                                   &dimensions,
                                                   ignored,
                                                   ignored,
                                                   ignored,
                                                   ignored);
    if (!ninePatchable) {
        return nullptr;
    }

    std::unique_ptr<GrFragmentProcessor> maskFP =
            find_or_create_rrect_blur_mask_fp(context, rrectToDraw, dimensions, xformedSigma);
    if (!maskFP) {
        return nullptr;
    }

    static const SkRuntimeEffect* effect = SkMakeRuntimeEffect(SkRuntimeEffect::MakeForShader,
        "uniform shader ninePatchFP;"

        "uniform half cornerRadius;"
        "uniform float4 proxyRect;"
        "uniform half blurRadius;"

        "half4 main(float2 xy) {"
            // Warp the fragment position to the appropriate part of the 9-patch blur texture by
            // snipping out the middle section of the proxy rect.
            "float2 translatedFragPosFloat = sk_FragCoord.xy - proxyRect.LT;"
            "float2 proxyCenter = (proxyRect.RB - proxyRect.LT) * 0.5;"
            "half edgeSize = 2.0 * blurRadius + cornerRadius + 0.5;"

            // Position the fragment so that (0, 0) marks the center of the proxy rectangle.
            // Negative coordinates are on the left/top side and positive numbers are on the
            // right/bottom.
            "translatedFragPosFloat -= proxyCenter;"

            // Temporarily strip off the fragment's sign. x/y are now strictly increasing as we
            // move away from the center.
            "half2 fragDirection = half2(sign(translatedFragPosFloat));"
            "translatedFragPosFloat = abs(translatedFragPosFloat);"

            // Our goal is to snip out the "middle section" of the proxy rect (everything but the
            // edge). We've repositioned our fragment position so that (0, 0) is the centerpoint
            // and x/y are always positive, so we can subtract here and interpret negative results
            // as being within the middle section.
            "half2 translatedFragPosHalf = half2(translatedFragPosFloat - (proxyCenter - edgeSize));"

            // Remove the middle section by clamping to zero.
            "translatedFragPosHalf = max(translatedFragPosHalf, 0);"

            // Reapply the fragment's sign, so that negative coordinates once again mean left/top
            // side and positive means bottom/right side.
            "translatedFragPosHalf *= fragDirection;"

            // Offset the fragment so that (0, 0) marks the upper-left again, instead of the center
            // point.
            "translatedFragPosHalf += half2(edgeSize);"

            "half2 proxyDims = half2(2.0 * edgeSize);"
            "half2 texCoord = translatedFragPosHalf / proxyDims;"

            "return ninePatchFP.eval(texCoord).aaaa;"
        "}"
    );

    float cornerRadius = SkRRectPriv::GetSimpleRadii(devRRect).fX;
    float blurRadius = 3.f * SkScalarCeilToScalar(xformedSigma - 1 / 6.0f);
    SkRect proxyRect = devRRect.getBounds().makeOutset(blurRadius, blurRadius);

    auto rrectBlurFP = GrSkSLFP::Make(effect, "RRectBlur", /*inputFP=*/nullptr,
                                      GrSkSLFP::OptFlags::kCompatibleWithCoverageAsAlpha,
                                      "ninePatchFP", GrSkSLFP::IgnoreOptFlags(std::move(maskFP)),
                                      "cornerRadius", cornerRadius,
                                      "proxyRect", proxyRect,
                                      "blurRadius", blurRadius);
    // Modulate blur with the input color.
    return GrBlendFragmentProcessor::Make<SkBlendMode::kModulate>(std::move(rrectBlurFP),
                                                                  /*dst=*/nullptr);
}

/**
 *  Try to directly render the mask filter into the target. Returns true if drawing was
 *  successful. If false is returned then paint is unmodified.
 */
static bool direct_filter_mask(GrRecordingContext* context,
                               const SkMaskFilterBase* maskFilter,
                               skgpu::ganesh::SurfaceDrawContext* sdc,
                               GrPaint&& paint,
                               const GrClip* clip,
                               const SkMatrix& viewMatrix,
                               const GrStyledShape& shape) {
    SkASSERT(sdc);
    if (maskFilter->type() != SkMaskFilterBase::Type::kBlur) {
        return false;
    }
    auto bmf = static_cast<const SkBlurMaskFilterImpl*>(maskFilter);

    if (bmf->blurStyle() != kNormal_SkBlurStyle) {
        return false;
    }

    // TODO: we could handle blurred stroked circles
    if (!shape.style().isSimpleFill()) {
        return false;
    }

    SkScalar xformedSigma = bmf->computeXformedSigma(viewMatrix);
    if (skgpu::BlurIsEffectivelyIdentity(xformedSigma)) {
        sdc->drawShape(clip, std::move(paint), GrAA::kYes, viewMatrix, GrStyledShape(shape));
        return true;
    }

    SkRRect srcRRect;
    bool inverted;
    if (!shape.asRRect(&srcRRect, &inverted) || inverted) {
        return false;
    }

    std::unique_ptr<GrFragmentProcessor> fp;

    SkRRect devRRect;
    bool devRRectIsValid = srcRRect.transform(viewMatrix, &devRRect);

    bool devRRectIsCircle = devRRectIsValid && SkRRectPriv::IsCircle(devRRect);

    bool canBeRect = srcRRect.isRect() && viewMatrix.preservesRightAngles();
    bool canBeCircle = (SkRRectPriv::IsCircle(srcRRect) && viewMatrix.isSimilarity()) ||
                       devRRectIsCircle;

    if (canBeRect || canBeCircle) {
        if (canBeRect) {
            fp = make_rect_blur(context, *context->priv().caps()->shaderCaps(),
                                srcRRect.rect(), viewMatrix, xformedSigma);
        } else {
            SkRect devBounds;
            if (devRRectIsCircle) {
                devBounds = devRRect.getBounds();
            } else {
                SkPoint center = {srcRRect.getBounds().centerX(), srcRRect.getBounds().centerY()};
                viewMatrix.mapPoints(&center, 1);
                SkScalar radius = viewMatrix.mapVector(0, srcRRect.width()/2.f).length();
                devBounds = {center.x() - radius,
                             center.y() - radius,
                             center.x() + radius,
                             center.y() + radius};
            }
            fp = make_circle_blur(context, devBounds, xformedSigma);
        }

        if (!fp) {
            return false;
        }

        SkRect srcProxyRect = srcRRect.rect();
        // Determine how much to outset the src rect to ensure we hit pixels within three sigma.
        SkScalar outsetX = 3.0f*xformedSigma;
        SkScalar outsetY = 3.0f*xformedSigma;
        if (viewMatrix.isScaleTranslate()) {
            outsetX /= SkScalarAbs(viewMatrix.getScaleX());
            outsetY /= SkScalarAbs(viewMatrix.getScaleY());
        } else {
            SkSize scale;
            if (!viewMatrix.decomposeScale(&scale, nullptr)) {
                return false;
            }
            outsetX /= scale.width();
            outsetY /= scale.height();
        }
        srcProxyRect.outset(outsetX, outsetY);

        paint.setCoverageFragmentProcessor(std::move(fp));
        sdc->drawRect(clip, std::move(paint), GrAA::kNo, viewMatrix, srcProxyRect);
        return true;
    }
    if (!viewMatrix.isScaleTranslate()) {
        return false;
    }
    if (!devRRectIsValid || !SkRRectPriv::AllCornersCircular(devRRect)) {
        return false;
    }

    fp = make_rrect_blur(context, bmf->sigma(), xformedSigma, srcRRect, devRRect);
    if (!fp) {
        return false;
    }

    if (!bmf->ignoreXform()) {
        SkRect srcProxyRect = srcRRect.rect();
        srcProxyRect.outset(3.0f*bmf->sigma(), 3.0f*bmf->sigma());
        paint.setCoverageFragmentProcessor(std::move(fp));
        sdc->drawRect(clip, std::move(paint), GrAA::kNo, viewMatrix, srcProxyRect);
    } else {
        SkMatrix inverse;
        if (!viewMatrix.invert(&inverse)) {
            return false;
        }

        SkIRect proxyBounds;
        float extra=3.f*SkScalarCeilToScalar(xformedSigma-1/6.0f);
        devRRect.rect().makeOutset(extra, extra).roundOut(&proxyBounds);

        paint.setCoverageFragmentProcessor(std::move(fp));
        sdc->fillPixelsWithLocalMatrix(clip, std::move(paint), proxyBounds, inverse);
    }

    return true;
}

// The key and clip-bounds are computed together because the caching decision can impact the
// clip-bound - since we only cache un-clipped masks the clip can be removed entirely.
// A 'false' return value indicates that the shape is known to be clipped away.
static bool compute_key_and_clip_bounds(skgpu::UniqueKey* maskKey,
                                        SkIRect* boundsForClip,
                                        const GrCaps* caps,
                                        const SkMatrix& viewMatrix,
                                        bool inverseFilled,
                                        const SkMaskFilterBase* maskFilter,
                                        const GrStyledShape& shape,
                                        const SkIRect& unclippedDevShapeBounds,
                                        const SkIRect& devClipBounds) {
    SkASSERT(maskFilter);
    *boundsForClip = devClipBounds;

#ifndef SK_DISABLE_MASKFILTERED_MASK_CACHING
    // To prevent overloading the cache with entries during animations we limit the cache of masks
    // to cases where the matrix preserves axis alignment.
    bool useCache = !inverseFilled && viewMatrix.preservesAxisAlignment() &&
                    shape.hasUnstyledKey() && as_MFB(maskFilter)->asABlur(nullptr);

    if (useCache) {
        SkIRect clippedMaskRect, unClippedMaskRect;
        can_filter_mask(maskFilter, shape, unclippedDevShapeBounds, devClipBounds,
                        viewMatrix, &clippedMaskRect);
        if (clippedMaskRect.isEmpty()) {
            return false;
        }
        can_filter_mask(maskFilter, shape, unclippedDevShapeBounds, unclippedDevShapeBounds,
                        viewMatrix, &unClippedMaskRect);

        // Use the cache only if >50% of the filtered mask is visible.
        int unclippedWidth = unClippedMaskRect.width();
        int unclippedHeight = unClippedMaskRect.height();
        int64_t unclippedArea = sk_64_mul(unclippedWidth, unclippedHeight);
        int64_t clippedArea = sk_64_mul(clippedMaskRect.width(), clippedMaskRect.height());
        int maxTextureSize = caps->maxTextureSize();
        if (unclippedArea > 2 * clippedArea || unclippedWidth > maxTextureSize ||
            unclippedHeight > maxTextureSize) {
            useCache = false;
        } else {
            // Make the clip not affect the mask
            *boundsForClip = unclippedDevShapeBounds;
        }
    }

    if (useCache) {
        static const skgpu::UniqueKey::Domain kDomain = skgpu::UniqueKey::GenerateDomain();
        skgpu::UniqueKey::Builder builder(maskKey, kDomain, 5 + 2 + shape.unstyledKeySize(),
                                          "Mask Filtered Masks");

        // We require the upper left 2x2 of the matrix to match exactly for a cache hit.
        SkScalar sx = viewMatrix.get(SkMatrix::kMScaleX);
        SkScalar sy = viewMatrix.get(SkMatrix::kMScaleY);
        SkScalar kx = viewMatrix.get(SkMatrix::kMSkewX);
        SkScalar ky = viewMatrix.get(SkMatrix::kMSkewY);
        SkScalar tx = viewMatrix.get(SkMatrix::kMTransX);
        SkScalar ty = viewMatrix.get(SkMatrix::kMTransY);
        // Allow 8 bits each in x and y of subpixel positioning. But, note that we're allowing
        // reuse for integer translations.
        SkFixed fracX = SkScalarToFixed(SkScalarFraction(tx)) & 0x0000FF00;
        SkFixed fracY = SkScalarToFixed(SkScalarFraction(ty)) & 0x0000FF00;

        builder[0] = SkFloat2Bits(sx);
        builder[1] = SkFloat2Bits(sy);
        builder[2] = SkFloat2Bits(kx);
        builder[3] = SkFloat2Bits(ky);
        // Distinguish between hairline and filled paths. For hairlines, we also need to include
        // the cap. (SW grows hairlines by 0.5 pixel with round and square caps). Note that
        // stroke-and-fill of hairlines is turned into pure fill by SkStrokeRec, so this covers
        // all cases we might see.
        uint32_t styleBits = shape.style().isSimpleHairline()
                                    ? ((shape.style().strokeRec().getCap() << 1) | 1)
                                    : 0;
        builder[4] = fracX | (fracY >> 8) | (styleBits << 16);

        SkMaskFilterBase::BlurRec rec;
        SkAssertResult(as_MFB(maskFilter)->asABlur(&rec));

        builder[5] = rec.fStyle;  // TODO: we could put this with the other style bits
        builder[6] = SkFloat2Bits(rec.fSigma);
        shape.writeUnstyledKey(&builder[7]);
    }
#endif

    return true;
}

/**
 * This function is used to implement filters that require an explicit src mask. It should only
 * be called if can_filter_mask returned true and the maskRect param should be the output from
 * that call.
 * Implementations are free to get the GrContext from the src texture in order to create
 * additional textures and perform multiple passes.
 */
static GrSurfaceProxyView filter_mask(GrRecordingContext* context,
                                      const SkMaskFilterBase* maskFilter,
                                      GrSurfaceProxyView srcView,
                                      GrColorType srcColorType,
                                      SkAlphaType srcAlphaType,
                                      const SkMatrix& ctm,
                                      const SkIRect& maskRect) {
    if (maskFilter->type() != SkMaskFilterBase::Type::kBlur) {
        return {};
    }
    auto bmf = static_cast<const SkBlurMaskFilterImpl*>(maskFilter);
    // 'maskRect' isn't snapped to the UL corner but the mask in 'src' is.
    const SkIRect clipRect = SkIRect::MakeWH(maskRect.width(), maskRect.height());

    SkScalar xformedSigma = bmf->computeXformedSigma(ctm);

    // If we're doing a normal blur, we can clobber the pathTexture in the
    // gaussianBlur.  Otherwise, we need to save it for later compositing.
    bool isNormalBlur = (kNormal_SkBlurStyle == bmf->blurStyle());
    auto srcBounds = SkIRect::MakeSize(srcView.proxy()->dimensions());
    auto surfaceDrawContext = GaussianBlur(context,
                                            srcView,
                                            srcColorType,
                                            srcAlphaType,
                                            nullptr,
                                            clipRect,
                                            srcBounds,
                                            xformedSigma,
                                            xformedSigma,
                                            SkTileMode::kClamp);
    if (!surfaceDrawContext || !surfaceDrawContext->asTextureProxy()) {
        return {};
    }

    if (!isNormalBlur) {
        GrPaint paint;
        // Blend pathTexture over blurTexture.
        paint.setCoverageFragmentProcessor(GrTextureEffect::Make(std::move(srcView), srcAlphaType));
        if (kInner_SkBlurStyle == bmf->blurStyle()) {
            // inner:  dst = dst * src
            paint.setCoverageSetOpXPFactory(SkRegion::kIntersect_Op);
        } else if (kSolid_SkBlurStyle == bmf->blurStyle()) {
            // solid:  dst = src + dst - src * dst
            //             = src + (1 - src) * dst
            paint.setCoverageSetOpXPFactory(SkRegion::kUnion_Op);
        } else if (kOuter_SkBlurStyle == bmf->blurStyle()) {
            // outer:  dst = dst * (1 - src)
            //             = 0 * src + (1 - src) * dst
            paint.setCoverageSetOpXPFactory(SkRegion::kDifference_Op);
        } else {
            paint.setCoverageSetOpXPFactory(SkRegion::kReplace_Op);
        }

        surfaceDrawContext->fillPixelsWithLocalMatrix(nullptr, std::move(paint), clipRect,
                                                      SkMatrix::I());
    }

    return surfaceDrawContext->readSurfaceView();
}

static GrSurfaceProxyView hw_create_filtered_mask(GrDirectContext* dContext,
                                                  skgpu::ganesh::SurfaceDrawContext* sdc,
                                                  const SkMatrix& viewMatrix,
                                                  const GrStyledShape& shape,
                                                  const SkMaskFilterBase* filter,
                                                  const SkIRect& unclippedDevShapeBounds,
                                                  const SkIRect& clipBounds,
                                                  SkIRect* maskRect,
                                                  skgpu::UniqueKey* key) {
    if (!can_filter_mask(filter, shape, unclippedDevShapeBounds, clipBounds, viewMatrix,
                         maskRect)) {
        return {};
    }

    if (clip_bounds_quick_reject(clipBounds, *maskRect)) {
        // clipped out
        return {};
    }

    auto threadSafeCache = dContext->priv().threadSafeCache();

    GrSurfaceProxyView lazyView;
    sk_sp<GrThreadSafeCache::Trampoline> trampoline;

    if (key->isValid()) {
        // In this case, we want GPU-filtered masks to have priority over SW-generated ones so
        // we pre-emptively add a lazy-view to the cache and fill it in later.
        std::tie(lazyView, trampoline) = GrThreadSafeCache::CreateLazyView(
                dContext, GrColorType::kAlpha_8, maskRect->size(),
                kMaskOrigin, SkBackingFit::kApprox);
        if (!lazyView) {
            return {}; // fall back to a SW-created mask - 'create_mask_GPU' probably won't succeed
        }

        key->setCustomData(create_data(*maskRect, unclippedDevShapeBounds));
        auto [cachedView, data] = threadSafeCache->findOrAddWithData(*key, lazyView);
        if (cachedView != lazyView) {
            // In this case, the gpu-thread lost out to a recording thread - use its result.
            SkASSERT(data);
            SkASSERT(cachedView.asTextureProxy());
            SkASSERT(cachedView.origin() == kMaskOrigin);

            *maskRect = extract_draw_rect_from_data(data.get(), unclippedDevShapeBounds);
            return cachedView;
        }
    }

    std::unique_ptr<skgpu::ganesh::SurfaceDrawContext> maskSDC(
            create_mask_GPU(dContext, *maskRect, viewMatrix, shape, sdc->numSamples()));
    if (!maskSDC) {
        if (key->isValid()) {
            // It is very unlikely that 'create_mask_GPU' will fail after 'CreateLazyView'
            // succeeded but, if it does, remove the lazy-view from the cache and fallback to
            // a SW-created mask. Note that any recording threads that glommed onto the
            // lazy-view will have to, later, drop those draws.
            threadSafeCache->remove(*key);
        }
        return {};
    }

    auto filteredMaskView = filter_mask(dContext, filter,
                                                  maskSDC->readSurfaceView(),
                                                  maskSDC->colorInfo().colorType(),
                                                  maskSDC->colorInfo().alphaType(),
                                                  viewMatrix,
                                                  *maskRect);
    if (!filteredMaskView) {
        if (key->isValid()) {
            // Remove the lazy-view from the cache and fallback to a SW-created mask. Note that
            // any recording threads that glommed onto the lazy-view will have to, later, drop
            // those draws.
            threadSafeCache->remove(*key);
        }
        return {};
    }

    if (key->isValid()) {
        SkASSERT(filteredMaskView.dimensions() == lazyView.dimensions());
        SkASSERT(filteredMaskView.swizzle() == lazyView.swizzle());
        SkASSERT(filteredMaskView.origin() == lazyView.origin());

        trampoline->fProxy = filteredMaskView.asTextureProxyRef();
        return lazyView;
    }

    return filteredMaskView;
}

static void draw_shape_with_mask_filter(GrRecordingContext* rContext,
                                        skgpu::ganesh::SurfaceDrawContext* sdc,
                                        const GrClip* clip,
                                        GrPaint&& paint,
                                        const SkMatrix& viewMatrix,
                                        const SkMaskFilterBase* maskFilter,
                                        const GrStyledShape& origShape) {
    SkASSERT(maskFilter);

    const GrStyledShape* shape = &origShape;
    SkTLazy<GrStyledShape> tmpShape;

    if (origShape.style().applies()) {
        SkScalar styleScale =  GrStyle::MatrixToScaleFactor(viewMatrix);
        if (styleScale == 0) {
            return;
        }

        tmpShape.init(origShape.applyStyle(GrStyle::Apply::kPathEffectAndStrokeRec, styleScale));
        if (tmpShape->isEmpty()) {
            return;
        }

        shape = tmpShape.get();
    }

    if (direct_filter_mask(rContext, maskFilter, sdc, std::move(paint), clip, viewMatrix, *shape)) {
        // the mask filter was able to draw itself directly, so there's nothing
        // left to do.
        return;
    }
    assert_alive(paint);

    // If the path is hairline, ignore inverse fill.
    bool inverseFilled = shape->inverseFilled() &&
                         !GrIsStrokeHairlineOrEquivalent(shape->style(), viewMatrix, nullptr);

    SkIRect unclippedDevShapeBounds, devClipBounds;
    if (!get_shape_and_clip_bounds(sdc, clip, *shape, viewMatrix,
                                   &unclippedDevShapeBounds, &devClipBounds)) {
        // TODO: just cons up an opaque mask here
        if (!inverseFilled) {
            return;
        }
    }

    skgpu::UniqueKey maskKey;
    SkIRect boundsForClip;
    if (!compute_key_and_clip_bounds(&maskKey, &boundsForClip,
                                     sdc->caps(),
                                     viewMatrix, inverseFilled,
                                     maskFilter, *shape,
                                     unclippedDevShapeBounds,
                                     devClipBounds)) {
        return; // 'shape' was entirely clipped out
    }

    GrSurfaceProxyView filteredMaskView;
    SkIRect maskRect;

    if (auto dContext = rContext->asDirectContext()) {
        filteredMaskView = hw_create_filtered_mask(dContext, sdc,
                                                   viewMatrix, *shape, maskFilter,
                                                   unclippedDevShapeBounds, boundsForClip,
                                                   &maskRect, &maskKey);
        if (filteredMaskView) {
            if (draw_mask(sdc, clip, viewMatrix, maskRect, std::move(paint),
                          std::move(filteredMaskView))) {
                // This path is completely drawn
                return;
            }
            assert_alive(paint);
        }
    }

    // Either HW mask rendering failed or we're in a DDL recording thread
    filteredMaskView = sw_create_filtered_mask(rContext,
                                               viewMatrix, *shape, maskFilter,
                                               unclippedDevShapeBounds, boundsForClip,
                                               &maskRect, &maskKey);
    if (filteredMaskView) {
        if (draw_mask(sdc, clip, viewMatrix, maskRect, std::move(paint),
                      std::move(filteredMaskView))) {
            return;
        }
        assert_alive(paint);
    }
}

bool ComputeBlurredRRectParams(const SkRRect& srcRRect,
                               const SkRRect& devRRect,
                               SkScalar sigma,
                               SkScalar xformedSigma,
                               SkRRect* rrectToDraw,
                               SkISize* widthHeight,
                               SkScalar rectXs[kBlurRRectMaxDivisions],
                               SkScalar rectYs[kBlurRRectMaxDivisions],
                               SkScalar texXs[kBlurRRectMaxDivisions],
                               SkScalar texYs[kBlurRRectMaxDivisions]) {
    unsigned int devBlurRadius = 3 * SkScalarCeilToInt(xformedSigma - 1 / 6.0f);
    SkScalar srcBlurRadius = 3.0f * sigma;

    const SkRect& devOrig = devRRect.getBounds();
    const SkVector& devRadiiUL = devRRect.radii(SkRRect::kUpperLeft_Corner);
    const SkVector& devRadiiUR = devRRect.radii(SkRRect::kUpperRight_Corner);
    const SkVector& devRadiiLR = devRRect.radii(SkRRect::kLowerRight_Corner);
    const SkVector& devRadiiLL = devRRect.radii(SkRRect::kLowerLeft_Corner);

    const int devLeft = SkScalarCeilToInt(std::max<SkScalar>(devRadiiUL.fX, devRadiiLL.fX));
    const int devTop = SkScalarCeilToInt(std::max<SkScalar>(devRadiiUL.fY, devRadiiUR.fY));
    const int devRight = SkScalarCeilToInt(std::max<SkScalar>(devRadiiUR.fX, devRadiiLR.fX));
    const int devBot = SkScalarCeilToInt(std::max<SkScalar>(devRadiiLL.fY, devRadiiLR.fY));

    // This is a conservative check for nine-patchability
    if (devOrig.fLeft + devLeft + devBlurRadius >= devOrig.fRight - devRight - devBlurRadius ||
        devOrig.fTop + devTop + devBlurRadius >= devOrig.fBottom - devBot - devBlurRadius) {
        return false;
    }

    const SkVector& srcRadiiUL = srcRRect.radii(SkRRect::kUpperLeft_Corner);
    const SkVector& srcRadiiUR = srcRRect.radii(SkRRect::kUpperRight_Corner);
    const SkVector& srcRadiiLR = srcRRect.radii(SkRRect::kLowerRight_Corner);
    const SkVector& srcRadiiLL = srcRRect.radii(SkRRect::kLowerLeft_Corner);

    const SkScalar srcLeft = std::max<SkScalar>(srcRadiiUL.fX, srcRadiiLL.fX);
    const SkScalar srcTop = std::max<SkScalar>(srcRadiiUL.fY, srcRadiiUR.fY);
    const SkScalar srcRight = std::max<SkScalar>(srcRadiiUR.fX, srcRadiiLR.fX);
    const SkScalar srcBot = std::max<SkScalar>(srcRadiiLL.fY, srcRadiiLR.fY);

    int newRRWidth = 2 * devBlurRadius + devLeft + devRight + 1;
    int newRRHeight = 2 * devBlurRadius + devTop + devBot + 1;
    widthHeight->fWidth = newRRWidth + 2 * devBlurRadius;
    widthHeight->fHeight = newRRHeight + 2 * devBlurRadius;

    const SkRect srcProxyRect = srcRRect.getBounds().makeOutset(srcBlurRadius, srcBlurRadius);

    rectXs[0] = srcProxyRect.fLeft;
    rectXs[1] = srcProxyRect.fLeft + 2 * srcBlurRadius + srcLeft;
    rectXs[2] = srcProxyRect.fRight - 2 * srcBlurRadius - srcRight;
    rectXs[3] = srcProxyRect.fRight;

    rectYs[0] = srcProxyRect.fTop;
    rectYs[1] = srcProxyRect.fTop + 2 * srcBlurRadius + srcTop;
    rectYs[2] = srcProxyRect.fBottom - 2 * srcBlurRadius - srcBot;
    rectYs[3] = srcProxyRect.fBottom;

    texXs[0] = 0.0f;
    texXs[1] = 2.0f * devBlurRadius + devLeft;
    texXs[2] = 2.0f * devBlurRadius + devLeft + 1;
    texXs[3] = SkIntToScalar(widthHeight->fWidth);

    texYs[0] = 0.0f;
    texYs[1] = 2.0f * devBlurRadius + devTop;
    texYs[2] = 2.0f * devBlurRadius + devTop + 1;
    texYs[3] = SkIntToScalar(widthHeight->fHeight);

    const SkRect newRect = SkRect::MakeXYWH(SkIntToScalar(devBlurRadius),
                                            SkIntToScalar(devBlurRadius),
                                            SkIntToScalar(newRRWidth),
                                            SkIntToScalar(newRRHeight));
    SkVector newRadii[4];
    newRadii[0] = {SkScalarCeilToScalar(devRadiiUL.fX), SkScalarCeilToScalar(devRadiiUL.fY)};
    newRadii[1] = {SkScalarCeilToScalar(devRadiiUR.fX), SkScalarCeilToScalar(devRadiiUR.fY)};
    newRadii[2] = {SkScalarCeilToScalar(devRadiiLR.fX), SkScalarCeilToScalar(devRadiiLR.fY)};
    newRadii[3] = {SkScalarCeilToScalar(devRadiiLL.fX), SkScalarCeilToScalar(devRadiiLL.fY)};

    rrectToDraw->setRectRadii(newRect, newRadii);
    return true;
}

void DrawShapeWithMaskFilter(GrRecordingContext* rContext,
                             skgpu::ganesh::SurfaceDrawContext* sdc,
                             const GrClip* clip,
                             const GrStyledShape& shape,
                             GrPaint&& paint,
                             const SkMatrix& viewMatrix,
                             const SkMaskFilter* mf) {
    draw_shape_with_mask_filter(rContext, sdc, clip, std::move(paint),
                                viewMatrix, as_MFB(mf), shape);
}

void DrawShapeWithMaskFilter(GrRecordingContext* rContext,
                             skgpu::ganesh::SurfaceDrawContext* sdc,
                             const GrClip* clip,
                             const SkPaint& paint,
                             const SkMatrix& ctm,
                             const GrStyledShape& shape) {
    if (rContext->abandoned()) {
        return;
    }

    GrPaint grPaint;
    if (!SkPaintToGrPaint(rContext, sdc->colorInfo(), paint, ctm, sdc->surfaceProps(), &grPaint)) {
        return;
    }

    SkMaskFilterBase* mf = as_MFB(paint.getMaskFilter());
    if (mf && !GrFragmentProcessors::IsSupported(mf)) {
        // The MaskFilter wasn't already handled in SkPaintToGrPaint
        draw_shape_with_mask_filter(rContext, sdc, clip, std::move(grPaint), ctm, mf, shape);
    } else {
        sdc->drawShape(clip, std::move(grPaint), sdc->chooseAA(paint), ctm, GrStyledShape(shape));
    }
}


// =================== Gaussian Blur =========================================

namespace {

enum class Direction { kX, kY };

std::unique_ptr<GrFragmentProcessor> make_texture_effect(const GrCaps* caps,
                                                         GrSurfaceProxyView srcView,
                                                         SkAlphaType srcAlphaType,
                                                         const GrSamplerState& sampler,
                                                         const SkIRect& srcSubset,
                                                         const SkIRect& srcRelativeDstRect,
                                                         const SkISize& radii) {
    // It's pretty common to blur a subset of an input texture. In reduced shader mode we always
    // apply the wrap mode in the shader.
    if (caps->reducedShaderMode()) {
        return GrTextureEffect::MakeSubset(std::move(srcView),
                                           srcAlphaType,
                                           SkMatrix::I(),
                                           sampler,
                                           SkRect::Make(srcSubset),
                                           *caps,
                                           GrTextureEffect::kDefaultBorder,
                                           /*alwaysUseShaderTileMode=*/true);
    } else {
        // Inset because we expect to be invoked at pixel centers
        SkRect domain = SkRect::Make(srcRelativeDstRect);
        domain.inset(0.5f, 0.5f);
        domain.outset(radii.width(), radii.height());
        return GrTextureEffect::MakeSubset(std::move(srcView),
                                           srcAlphaType,
                                           SkMatrix::I(),
                                           sampler,
                                           SkRect::Make(srcSubset),
                                           domain,
                                           *caps);
    }
}

} // end namespace

/**
 * Draws 'dstRect' into 'surfaceFillContext' evaluating a 1D Gaussian over 'srcView'. The src rect
 * is 'dstRect' offset by 'dstToSrcOffset'. 'mode' and 'bounds' are applied to the src coords.
 */
static void convolve_gaussian_1d(skgpu::ganesh::SurfaceFillContext* sfc,
                                 GrSurfaceProxyView srcView,
                                 const SkIRect& srcSubset,
                                 SkIVector dstToSrcOffset,
                                 const SkIRect& dstRect,
                                 SkAlphaType srcAlphaType,
                                 Direction direction,
                                 int radius,
                                 float sigma,
                                 SkTileMode mode) {
    SkASSERT(radius && !skgpu::BlurIsEffectivelyIdentity(sigma));
    auto srcRect = dstRect.makeOffset(dstToSrcOffset);

    std::array<SkV4, skgpu::kMaxBlurSamples/2> offsetsAndKernel;
    skgpu::Compute1DBlurLinearKernel(sigma, radius, offsetsAndKernel);

    // The child of the 1D linear blur effect must be linearly sampled.
    GrSamplerState sampler{SkTileModeToWrapMode(mode), GrSamplerState::Filter::kLinear};

    SkISize radii = {direction == Direction::kX ? radius : 0,
                     direction == Direction::kY ? radius : 0};
    std::unique_ptr<GrFragmentProcessor> child = make_texture_effect(sfc->caps(),
                                                                     std::move(srcView),
                                                                     srcAlphaType,
                                                                     sampler,
                                                                     srcSubset,
                                                                     srcRect,
                                                                     radii);

    auto conv = GrSkSLFP::Make(skgpu::GetLinearBlur1DEffect(radius),
                               "GaussianBlur1D",
                               /*inputFP=*/nullptr,
                               GrSkSLFP::OptFlags::kCompatibleWithCoverageAsAlpha,
                               "offsetsAndKernel", SkSpan<SkV4>{offsetsAndKernel},
                               "dir", direction == Direction::kX ? SkV2{1.f, 0.f}
                                                                 : SkV2{0.f, 1.f},
                               "child", std::move(child));
    sfc->fillRectToRectWithFP(srcRect, dstRect, std::move(conv));
}

static std::unique_ptr<skgpu::ganesh::SurfaceDrawContext> convolve_gaussian_2d(
        GrRecordingContext* rContext,
        GrSurfaceProxyView srcView,
        GrColorType srcColorType,
        const SkIRect& srcBounds,
        const SkIRect& dstBounds,
        int radiusX,
        int radiusY,
        SkScalar sigmaX,
        SkScalar sigmaY,
        SkTileMode mode,
        sk_sp<SkColorSpace> finalCS,
        SkBackingFit dstFit) {
    SkASSERT(radiusX && radiusY);
    SkASSERT(!skgpu::BlurIsEffectivelyIdentity(sigmaX) &&
             !skgpu::BlurIsEffectivelyIdentity(sigmaY));
    // Create the sdc with default SkSurfaceProps. Gaussian blurs will soon use a
    // SurfaceFillContext, at which point the SkSurfaceProps won't exist anymore.
    auto sdc = skgpu::ganesh::SurfaceDrawContext::Make(
            rContext,
            srcColorType,
            std::move(finalCS),
            dstFit,
            dstBounds.size(),
            SkSurfaceProps(),
            /*label=*/"SurfaceDrawContext_ConvolveGaussian2d",
            /* sampleCnt= */ 1,
            skgpu::Mipmapped::kNo,
            srcView.proxy()->isProtected(),
            srcView.origin());
    if (!sdc) {
        return nullptr;
    }

    // GaussianBlur() should have downsampled the request until we can handle the 2D blur with
    // just a uniform array, which is asserted inside the Compute function.
    const SkISize radii{radiusX, radiusY};
    std::array<SkV4, skgpu::kMaxBlurSamples/4> kernel;
    std::array<SkV4, skgpu::kMaxBlurSamples/2> offsets;
    skgpu::Compute2DBlurKernel({sigmaX, sigmaY}, radii, kernel);
    skgpu::Compute2DBlurOffsets(radii, offsets);

    GrSamplerState sampler{SkTileModeToWrapMode(mode), GrSamplerState::Filter::kNearest};
    auto child = make_texture_effect(sdc->caps(),
                                     std::move(srcView),
                                     kPremul_SkAlphaType,
                                     sampler,
                                     srcBounds,
                                     dstBounds,
                                     radii);
    auto conv = GrSkSLFP::Make(skgpu::GetBlur2DEffect(radii),
                               "GaussianBlur2D",
                               /*inputFP=*/nullptr,
                               GrSkSLFP::OptFlags::kNone,
                               "kernel", SkSpan<SkV4>{kernel},
                               "offsets", SkSpan<SkV4>{offsets},
                               "child", std::move(child));

    GrPaint paint;
    paint.setColorFragmentProcessor(std::move(conv));
    paint.setPorterDuffXPFactory(SkBlendMode::kSrc);

    // 'dstBounds' is actually in 'srcView' proxy space. It represents the blurred area from src
    // space that we want to capture in the new RTC at {0, 0}. Hence, we use its size as the rect to
    // draw and it directly as the local rect.
    sdc->fillRectToRect(nullptr,
                        std::move(paint),
                        GrAA::kNo,
                        SkMatrix::I(),
                        SkRect::Make(dstBounds.size()),
                        SkRect::Make(dstBounds));

    return sdc;
}

static std::unique_ptr<skgpu::ganesh::SurfaceDrawContext> convolve_gaussian(
        GrRecordingContext* rContext,
        GrSurfaceProxyView srcView,
        GrColorType srcColorType,
        SkAlphaType srcAlphaType,
        SkIRect srcBounds,
        SkIRect dstBounds,
        Direction direction,
        int radius,
        float sigma,
        SkTileMode mode,
        sk_sp<SkColorSpace> finalCS,
        SkBackingFit fit) {
    SkASSERT(radius > 0 && !skgpu::BlurIsEffectivelyIdentity(sigma));
    // Logically we're creating an infinite blur of 'srcBounds' of 'srcView' with 'mode' tiling
    // and then capturing the 'dstBounds' portion in a new RTC where the top left of 'dstBounds' is
    // at {0, 0} in the new RTC.
    //
    // Create the sdc with default SkSurfaceProps. Gaussian blurs will soon use a
    // SurfaceFillContext, at which point the SkSurfaceProps won't exist anymore.
    auto dstSDC =
            skgpu::ganesh::SurfaceDrawContext::Make(rContext,
                                                    srcColorType,
                                                    std::move(finalCS),
                                                    fit,
                                                    dstBounds.size(),
                                                    SkSurfaceProps(),
                                                    /*label=*/"SurfaceDrawContext_ConvolveGaussian",
                                                    /* sampleCnt= */ 1,
                                                    skgpu::Mipmapped::kNo,
                                                    srcView.proxy()->isProtected(),
                                                    srcView.origin());
    if (!dstSDC) {
        return nullptr;
    }
    // This represents the translation from 'dstSurfaceDrawContext' coords to 'srcView' coords.
    auto rtcToSrcOffset = dstBounds.topLeft();

    auto srcBackingBounds = SkIRect::MakeSize(srcView.proxy()->backingStoreDimensions());
    // We've implemented splitting the dst bounds up into areas that do and do not need to
    // use shader based tiling but only for some modes...
    bool canSplit = mode == SkTileMode::kDecal || mode == SkTileMode::kClamp;
    // ...but it's not worth doing the splitting if we'll get HW tiling instead of shader tiling.
    bool canHWTile =
            srcBounds.contains(srcBackingBounds) &&
            !rContext->priv().caps()->reducedShaderMode() &&  // this mode always uses shader tiling
            !(mode == SkTileMode::kDecal && !rContext->priv().caps()->clampToBorderSupport());
    if (!canSplit || canHWTile) {
        auto dstRect = SkIRect::MakeSize(dstBounds.size());
        convolve_gaussian_1d(dstSDC.get(),
                             std::move(srcView),
                             srcBounds,
                             rtcToSrcOffset,
                             dstRect,
                             srcAlphaType,
                             direction,
                             radius,
                             sigma,
                             mode);
        return dstSDC;
    }

    // 'left' and 'right' are the sub rects of 'srcBounds' where 'mode' must be enforced.
    // 'mid' is the area where we can ignore the mode because the kernel does not reach to the
    // edge of 'srcBounds'.
    SkIRect mid, left, right;
    // 'top' and 'bottom' are areas of 'dstBounds' that are entirely above/below 'srcBounds'.
    // These are areas that we can simply clear in the dst in kDecal mode. If 'srcBounds'
    // straddles the top edge of 'dstBounds' then 'top' will be inverted and we will skip
    // processing for the rect. Similar for 'bottom'. The positional/directional labels above refer
    // to the Direction::kX case and one should think of these as 'left' and 'right' for
    // Direction::kY.
    SkIRect top, bottom;
    if (Direction::kX == direction) {
        top = {dstBounds.left(), dstBounds.top(), dstBounds.right(), srcBounds.top()};
        bottom = {dstBounds.left(), srcBounds.bottom(), dstBounds.right(), dstBounds.bottom()};

        // Inset for sub-rect of 'srcBounds' where the x-dir kernel doesn't reach the edges, clipped
        // vertically to dstBounds.
        int midA = std::max(srcBounds.top(), dstBounds.top());
        int midB = std::min(srcBounds.bottom(), dstBounds.bottom());
        mid = {srcBounds.left() + radius, midA, srcBounds.right() - radius, midB};
        if (mid.isEmpty()) {
            // There is no middle where the bounds can be ignored. Make the left span the whole
            // width of dst and we will not draw mid or right.
            left = {dstBounds.left(), mid.top(), dstBounds.right(), mid.bottom()};
        } else {
            left = {dstBounds.left(), mid.top(), mid.left(), mid.bottom()};
            right = {mid.right(), mid.top(), dstBounds.right(), mid.bottom()};
        }
    } else {
        // This is the same as the x direction code if you turn your head 90 degrees CCW. Swap x and
        // y and swap top/bottom with left/right.
        top = {dstBounds.left(), dstBounds.top(), srcBounds.left(), dstBounds.bottom()};
        bottom = {srcBounds.right(), dstBounds.top(), dstBounds.right(), dstBounds.bottom()};

        int midA = std::max(srcBounds.left(), dstBounds.left());
        int midB = std::min(srcBounds.right(), dstBounds.right());
        mid = {midA, srcBounds.top() + radius, midB, srcBounds.bottom() - radius};

        if (mid.isEmpty()) {
            left = {mid.left(), dstBounds.top(), mid.right(), dstBounds.bottom()};
        } else {
            left = {mid.left(), dstBounds.top(), mid.right(), mid.top()};
            right = {mid.left(), mid.bottom(), mid.right(), dstBounds.bottom()};
        }
    }

    auto convolve = [&](SkIRect rect) {
        // Transform rect into the render target's coord system.
        rect.offset(-rtcToSrcOffset);
        convolve_gaussian_1d(dstSDC.get(),
                             srcView,
                             srcBounds,
                             rtcToSrcOffset,
                             rect,
                             srcAlphaType,
                             direction,
                             radius,
                             sigma,
                             mode);
    };
    auto clear = [&](SkIRect rect) {
        // Transform rect into the render target's coord system.
        rect.offset(-rtcToSrcOffset);
        dstSDC->clearAtLeast(rect, SK_PMColor4fTRANSPARENT);
    };

    // Doing mid separately will cause two draws to occur (left and right batch together). At
    // small sizes of mid it is worse to issue more draws than to just execute the slightly
    // more complicated shader that implements the tile mode across mid. This threshold is
    // very arbitrary right now. It is believed that a 21x44 mid on a Moto G4 is a significant
    // regression compared to doing one draw but it has not been locally evaluated or tuned.
    // The optimal cutoff is likely to vary by GPU.
    if (!mid.isEmpty() && mid.width() * mid.height() < 256 * 256) {
        left.join(mid);
        left.join(right);
        mid = SkIRect::MakeEmpty();
        right = SkIRect::MakeEmpty();
        // It's unknown whether for kDecal it'd be better to expand the draw rather than a draw and
        // up to two clears.
        if (mode == SkTileMode::kClamp) {
            left.join(top);
            left.join(bottom);
            top = SkIRect::MakeEmpty();
            bottom = SkIRect::MakeEmpty();
        }
    }

    if (!top.isEmpty()) {
        if (mode == SkTileMode::kDecal) {
            clear(top);
        } else {
            convolve(top);
        }
    }

    if (!bottom.isEmpty()) {
        if (mode == SkTileMode::kDecal) {
            clear(bottom);
        } else {
            convolve(bottom);
        }
    }

    if (mid.isEmpty()) {
        convolve(left);
    } else {
        convolve(left);
        convolve(right);
        convolve(mid);
    }
    return dstSDC;
}

// Expand the contents of 'src' to fit in 'dstSize'. At this point, we are expanding an intermediate
// image, so there's no need to account for a proxy offset from the original input.
static std::unique_ptr<skgpu::ganesh::SurfaceDrawContext> reexpand(
        GrRecordingContext* rContext,
        std::unique_ptr<skgpu::ganesh::SurfaceContext> src,
        const SkRect& srcBounds,
        SkISize dstSize,
        sk_sp<SkColorSpace> colorSpace,
        SkBackingFit fit) {
    GrSurfaceProxyView srcView = src->readSurfaceView();
    if (!srcView.asTextureProxy()) {
        return nullptr;
    }

    GrColorType srcColorType = src->colorInfo().colorType();
    SkAlphaType srcAlphaType = src->colorInfo().alphaType();

#if defined(SK_USE_PADDED_BLUR_UPSCALE)
    // The blur output completely filled the src SurfaceContext, so that is our subset boundary,
    // ensuring we don't access undefined pixels in the approx-fit backing texture.
    SkRect srcContent = SkRect::MakeIWH(src->width(), src->height());
#endif

    src.reset();  // no longer needed

    // Create the sdc with default SkSurfaceProps. Gaussian blurs will soon use a
    // SurfaceFillContext, at which point the SkSurfaceProps won't exist anymore.
    auto dstSDC = skgpu::ganesh::SurfaceDrawContext::Make(rContext,
                                                          srcColorType,
                                                          std::move(colorSpace),
                                                          fit,
                                                          dstSize,
                                                          SkSurfaceProps(),
                                                          /*label=*/"SurfaceDrawContext_Reexpand",
                                                          /* sampleCnt= */ 1,
                                                          skgpu::Mipmapped::kNo,
                                                          srcView.proxy()->isProtected(),
                                                          srcView.origin());
    if (!dstSDC) {
        return nullptr;
    }

    GrPaint paint;
    auto fp = GrTextureEffect::MakeSubset(std::move(srcView),
                                          srcAlphaType,
                                          SkMatrix::I(),
                                          GrSamplerState::Filter::kLinear,
#if defined(SK_USE_PADDED_BLUR_UPSCALE)
                                          srcContent,
#else
                                          srcBounds,
                                          srcBounds,
#endif
                                          *rContext->priv().caps());
    paint.setColorFragmentProcessor(std::move(fp));
    paint.setPorterDuffXPFactory(SkBlendMode::kSrc);

    dstSDC->fillRectToRect(
            nullptr, std::move(paint), GrAA::kNo, SkMatrix::I(), SkRect::Make(dstSize), srcBounds);

    return dstSDC;
}

static std::unique_ptr<skgpu::ganesh::SurfaceDrawContext> two_pass_gaussian(
        GrRecordingContext* rContext,
        GrSurfaceProxyView srcView,
        GrColorType srcColorType,
        SkAlphaType srcAlphaType,
        sk_sp<SkColorSpace> colorSpace,
        SkIRect srcBounds,
        SkIRect dstBounds,
        float sigmaX,
        float sigmaY,
        int radiusX,
        int radiusY,
        SkTileMode mode,
        SkBackingFit fit) {
    SkASSERT(radiusX || radiusY);
    std::unique_ptr<skgpu::ganesh::SurfaceDrawContext> dstSDC;
    if (radiusX > 0) {
        SkBackingFit xFit = radiusY > 0 ? SkBackingFit::kApprox : fit;
        // Expand the dstBounds vertically to produce necessary content for the y-pass. Then we will
        // clip these in a tile-mode dependent way to ensure the tile-mode gets implemented
        // correctly. However, if we're not going to do a y-pass then we must use the original
        // dstBounds without clipping to produce the correct output size.
        SkIRect xPassDstBounds = dstBounds;
        if (radiusY) {
            xPassDstBounds.outset(0, radiusY);
            if (mode == SkTileMode::kRepeat || mode == SkTileMode::kMirror) {
                int srcH = srcBounds.height();
                int srcTop = srcBounds.top();
                if (mode == SkTileMode::kMirror) {
                    srcTop -= srcH;
                    srcH *= 2;
                }

                float floatH = srcH;
                // First row above the dst rect where we should restart the tile mode.
                int n = sk_float_floor2int_no_saturate((xPassDstBounds.top() - srcTop) / floatH);
                int topClip = srcTop + n * srcH;

                // First row above below the dst rect where we should restart the tile mode.
                n = sk_float_ceil2int_no_saturate((xPassDstBounds.bottom() - srcBounds.bottom()) /
                                                  floatH);
                int bottomClip = srcBounds.bottom() + n * srcH;

                xPassDstBounds.fTop = std::max(xPassDstBounds.top(), topClip);
                xPassDstBounds.fBottom = std::min(xPassDstBounds.bottom(), bottomClip);
            } else {
                if (xPassDstBounds.fBottom <= srcBounds.top()) {
                    if (mode == SkTileMode::kDecal) {
                        return nullptr;
                    }
                    xPassDstBounds.fTop = srcBounds.top();
                    xPassDstBounds.fBottom = xPassDstBounds.fTop + 1;
                } else if (xPassDstBounds.fTop >= srcBounds.bottom()) {
                    if (mode == SkTileMode::kDecal) {
                        return nullptr;
                    }
                    xPassDstBounds.fBottom = srcBounds.bottom();
                    xPassDstBounds.fTop = xPassDstBounds.fBottom - 1;
                } else {
                    xPassDstBounds.fTop = std::max(xPassDstBounds.fTop, srcBounds.top());
                    xPassDstBounds.fBottom = std::min(xPassDstBounds.fBottom, srcBounds.bottom());
                }
                int leftSrcEdge = srcBounds.fLeft - radiusX;
                int rightSrcEdge = srcBounds.fRight + radiusX;
                if (mode == SkTileMode::kClamp) {
                    // In clamp the column just outside the src bounds has the same value as the
                    // column just inside, unlike decal.
                    leftSrcEdge += 1;
                    rightSrcEdge -= 1;
                }
                if (xPassDstBounds.fRight <= leftSrcEdge) {
                    if (mode == SkTileMode::kDecal) {
                        return nullptr;
                    }
                    xPassDstBounds.fLeft = xPassDstBounds.fRight - 1;
                } else {
                    xPassDstBounds.fLeft = std::max(xPassDstBounds.fLeft, leftSrcEdge);
                }
                if (xPassDstBounds.fLeft >= rightSrcEdge) {
                    if (mode == SkTileMode::kDecal) {
                        return nullptr;
                    }
                    xPassDstBounds.fRight = xPassDstBounds.fLeft + 1;
                } else {
                    xPassDstBounds.fRight = std::min(xPassDstBounds.fRight, rightSrcEdge);
                }
            }
        }
        dstSDC = convolve_gaussian(rContext,
                                   std::move(srcView),
                                   srcColorType,
                                   srcAlphaType,
                                   srcBounds,
                                   xPassDstBounds,
                                   Direction::kX,
                                   radiusX,
                                   sigmaX,
                                   mode,
                                   colorSpace,
                                   xFit);
        if (!dstSDC) {
            return nullptr;
        }
        srcView = dstSDC->readSurfaceView();
        SkIVector newDstBoundsOffset = dstBounds.topLeft() - xPassDstBounds.topLeft();
        dstBounds = SkIRect::MakeSize(dstBounds.size()).makeOffset(newDstBoundsOffset);
        srcBounds = SkIRect::MakeSize(xPassDstBounds.size());
    }

    if (!radiusY) {
        return dstSDC;
    }

    return convolve_gaussian(rContext,
                             std::move(srcView),
                             srcColorType,
                             srcAlphaType,
                             srcBounds,
                             dstBounds,
                             Direction::kY,
                             radiusY,
                             sigmaY,
                             mode,
                             std::move(colorSpace),
                             fit);
}

std::unique_ptr<skgpu::ganesh::SurfaceDrawContext> GaussianBlur(GrRecordingContext* rContext,
                                                                GrSurfaceProxyView srcView,
                                                                GrColorType srcColorType,
                                                                SkAlphaType srcAlphaType,
                                                                sk_sp<SkColorSpace> colorSpace,
                                                                SkIRect dstBounds,
                                                                SkIRect srcBounds,
                                                                float sigmaX,
                                                                float sigmaY,
                                                                SkTileMode mode,
                                                                SkBackingFit fit) {
    SkASSERT(rContext);
    TRACE_EVENT2("skia.gpu", "GaussianBlur", "sigmaX", sigmaX, "sigmaY", sigmaY);

    if (!srcView.asTextureProxy()) {
        return nullptr;
    }

    int maxRenderTargetSize = rContext->priv().caps()->maxRenderTargetSize();
    if (dstBounds.width() > maxRenderTargetSize || dstBounds.height() > maxRenderTargetSize) {
        return nullptr;
    }

    int radiusX = skgpu::BlurSigmaRadius(sigmaX);
    int radiusY = skgpu::BlurSigmaRadius(sigmaY);
    // Attempt to reduce the srcBounds in order to detect that we can set the sigmas to zero or
    // to reduce the amount of work to rescale the source if sigmas are large. TODO: Could consider
    // how to minimize the required source bounds for repeat/mirror modes.
    if (mode == SkTileMode::kClamp || mode == SkTileMode::kDecal) {
        SkIRect reach = dstBounds.makeOutset(radiusX, radiusY);
        SkIRect intersection;
        if (!intersection.intersect(reach, srcBounds)) {
            if (mode == SkTileMode::kDecal) {
                return nullptr;
            } else {
                if (reach.fLeft >= srcBounds.fRight) {
                    srcBounds.fLeft = srcBounds.fRight - 1;
                } else if (reach.fRight <= srcBounds.fLeft) {
                    srcBounds.fRight = srcBounds.fLeft + 1;
                }
                if (reach.fTop >= srcBounds.fBottom) {
                    srcBounds.fTop = srcBounds.fBottom - 1;
                } else if (reach.fBottom <= srcBounds.fTop) {
                    srcBounds.fBottom = srcBounds.fTop + 1;
                }
            }
        } else {
            srcBounds = intersection;
        }
    }

    if (mode != SkTileMode::kDecal) {
        // All non-decal tile modes are equivalent for one pixel width/height src and amount to a
        // single color value repeated at each column/row. Applying the normalized kernel to that
        // column/row yields that same color. So no blurring is necessary.
        if (srcBounds.width() == 1) {
            sigmaX = 0.f;
            radiusX = 0;
        }
        if (srcBounds.height() == 1) {
            sigmaY = 0.f;
            radiusY = 0;
        }
    }

    // If we determined that there is no blurring necessary in either direction then just do a
    // a draw that applies the tile mode.
    if (!radiusX && !radiusY) {
        // Create the sdc with default SkSurfaceProps. Gaussian blurs will soon use a
        // SurfaceFillContext, at which point the SkSurfaceProps won't exist anymore.
        auto result =
                skgpu::ganesh::SurfaceDrawContext::Make(rContext,
                                                        srcColorType,
                                                        std::move(colorSpace),
                                                        fit,
                                                        dstBounds.size(),
                                                        SkSurfaceProps(),
                                                        /*label=*/"SurfaceDrawContext_GaussianBlur",
                                                        /* sampleCnt= */ 1,
                                                        skgpu::Mipmapped::kNo,
                                                        srcView.proxy()->isProtected(),
                                                        srcView.origin());
        if (!result) {
            return nullptr;
        }
        GrSamplerState sampler(SkTileModeToWrapMode(mode), GrSamplerState::Filter::kNearest);
        auto fp = GrTextureEffect::MakeSubset(std::move(srcView),
                                              srcAlphaType,
                                              SkMatrix::I(),
                                              sampler,
                                              SkRect::Make(srcBounds),
                                              SkRect::Make(dstBounds),
                                              *rContext->priv().caps());
        result->fillRectToRectWithFP(dstBounds, SkIRect::MakeSize(dstBounds.size()), std::move(fp));
        return result;
    }

    // Any sigma higher than the limit for the 1D linear-filtered Gaussian blur is downsampled. If
    // the sigma in X and Y just so happen to fit in the 2D limit, we'll use that. The 2D limit is
    // always less than the linear blur sigma limit.
    static constexpr float kMaxSigma = skgpu::kMaxLinearBlurSigma;
    if (sigmaX <= kMaxSigma && sigmaY <= kMaxSigma) {
        // For really small blurs (certainly no wider than 5x5 on desktop GPUs) it is faster to just
        // launch a single non separable kernel vs two launches.
        const int kernelSize = skgpu::BlurKernelWidth(radiusX) * skgpu::BlurKernelWidth(radiusY);
        if (radiusX > 0 && radiusY > 0 &&
            kernelSize <= skgpu::kMaxBlurSamples &&
            !rContext->priv().caps()->reducedShaderMode()) {
            // Apply the proxy offset to src bounds and offset directly
            return convolve_gaussian_2d(rContext,
                                        std::move(srcView),
                                        srcColorType,
                                        srcBounds,
                                        dstBounds,
                                        radiusX,
                                        radiusY,
                                        sigmaX,
                                        sigmaY,
                                        mode,
                                        std::move(colorSpace),
                                        fit);
        }
        // This will automatically degenerate into a single pass of X or Y if only one of the
        // radii are non-zero.
        SkASSERT(skgpu::BlurLinearKernelWidth(radiusX) <= skgpu::kMaxBlurSamples &&
                 skgpu::BlurLinearKernelWidth(radiusY) <= skgpu::kMaxBlurSamples);
        return two_pass_gaussian(rContext,
                                 std::move(srcView),
                                 srcColorType,
                                 srcAlphaType,
                                 std::move(colorSpace),
                                 srcBounds,
                                 dstBounds,
                                 sigmaX,
                                 sigmaY,
                                 radiusX,
                                 radiusY,
                                 mode,
                                 fit);
    }

    GrColorInfo colorInfo(srcColorType, srcAlphaType, colorSpace);
    auto srcCtx = rContext->priv().makeSC(srcView, colorInfo);
    SkASSERT(srcCtx);

#if defined(SK_USE_PADDED_BLUR_UPSCALE)
    // When we are in clamp mode any artifacts in the edge pixels due to downscaling may be
    // exacerbated because of the tile mode. The particularly egregious case is when the original
    // image has transparent black around the edges and the downscaling pulls in some non-zero
    // values from the interior. Ultimately it'd be better for performance if the calling code could
    // give us extra context around the blur to account for this. We don't currently have a good way
    // to communicate this up stack. So we leave a 1 pixel border around the rescaled src bounds.
    // We populate the top 1 pixel tall row of this border by rescaling the top row of the original
    // source bounds into it. Because this is only rescaling in x (i.e. rescaling a 1 pixel high
    // row into a shorter but still 1 pixel high row) we won't read any interior values. And similar
    // for the other three borders. We'll adjust the source/dest bounds rescaled blur so that this
    // border of extra pixels is used as the edge pixels for clamp mode but the dest bounds
    // corresponds only to the pixels inside the border (the normally rescaled pixels inside this
    // border).
    // Moreover, if we clamped the rescaled size to 1 column or row then we still have a sigma
    // that is greater than kMaxSigma. By using a pad and making the src 3 wide/tall instead of
    // 1 we can recurse again and do another downscale. Since mirror and repeat modes are trivial
    // for a single col/row we only add padding based on sigma exceeding kMaxSigma for decal.
    int padX = mode == SkTileMode::kClamp || (mode == SkTileMode::kDecal && sigmaX > kMaxSigma) ? 1
                                                                                                : 0;
    int padY = mode == SkTileMode::kClamp || (mode == SkTileMode::kDecal && sigmaY > kMaxSigma) ? 1
                                                                                                : 0;
#endif

    float scaleX = sigmaX > kMaxSigma ? kMaxSigma / sigmaX : 1.f;
    float scaleY = sigmaY > kMaxSigma ? kMaxSigma / sigmaY : 1.f;
    // We round down here so that when we recalculate sigmas we know they will be below
    // kMaxSigma (but clamp to 1 do we don't have an empty texture).
    SkISize rescaledSize = {std::max(sk_float_floor2int(srcBounds.width() * scaleX), 1),
                            std::max(sk_float_floor2int(srcBounds.height() * scaleY), 1)};
    // Compute the sigmas using the actual scale factors used once we integerized the
    // rescaledSize.
    scaleX = static_cast<float>(rescaledSize.width()) / srcBounds.width();
    scaleY = static_cast<float>(rescaledSize.height()) / srcBounds.height();
    sigmaX *= scaleX;
    sigmaY *= scaleY;

#if !defined(SK_USE_PADDED_BLUR_UPSCALE)
    // Historically, padX and padY were calculated after scaling sigmaX,Y, which meant that they
    // would never be greater than kMaxSigma. This causes pixel diffs so must be guarded along with
    // the rest of the padding dst behavior.
    int padX = mode == SkTileMode::kClamp || (mode == SkTileMode::kDecal && sigmaX > kMaxSigma) ? 1
                                                                                                : 0;
    int padY = mode == SkTileMode::kClamp || (mode == SkTileMode::kDecal && sigmaY > kMaxSigma) ? 1
                                                                                                : 0;
#endif

    // Create the sdc with default SkSurfaceProps. Gaussian blurs will soon use a
    // SurfaceFillContext, at which point the SkSurfaceProps won't exist anymore.
    auto rescaledSDC = skgpu::ganesh::SurfaceDrawContext::Make(
            srcCtx->recordingContext(),
            colorInfo.colorType(),
            colorInfo.refColorSpace(),
            SkBackingFit::kApprox,
            {rescaledSize.width() + 2 * padX, rescaledSize.height() + 2 * padY},
            SkSurfaceProps(),
            /*label=*/"RescaledSurfaceDrawContext",
            /* sampleCnt= */ 1,
            skgpu::Mipmapped::kNo,
            srcCtx->asSurfaceProxy()->isProtected(),
            srcCtx->origin());
    if (!rescaledSDC) {
        return nullptr;
    }
    if ((padX || padY) && mode == SkTileMode::kDecal) {
        rescaledSDC->clear(SkPMColor4f{0, 0, 0, 0});
    }
    if (!srcCtx->rescaleInto(rescaledSDC.get(),
                             SkIRect::MakeSize(rescaledSize).makeOffset(padX, padY),
                             srcBounds,
                             SkSurface::RescaleGamma::kSrc,
                             SkSurface::RescaleMode::kRepeatedLinear)) {
        return nullptr;
    }
    if (mode == SkTileMode::kClamp) {
        SkASSERT(padX == 1 && padY == 1);
        // Rather than run a potentially multi-pass rescaler on single rows/columns we just do a
        // single bilerp draw. If we find this quality unacceptable we should think more about how
        // to rescale these with better quality but without 4 separate multi-pass downscales.
        auto cheapDownscale = [&](SkIRect dstRect, SkIRect srcRect) {
            rescaledSDC->drawTexture(nullptr,
                                     srcCtx->readSurfaceView(),
                                     srcAlphaType,
                                     GrSamplerState::Filter::kLinear,
                                     GrSamplerState::MipmapMode::kNone,
                                     SkBlendMode::kSrc,
                                     SK_PMColor4fWHITE,
                                     SkRect::Make(srcRect),
                                     SkRect::Make(dstRect),
                                     GrQuadAAFlags::kNone,
                                     SkCanvas::SrcRectConstraint::kFast_SrcRectConstraint,
                                     SkMatrix::I(),
                                     nullptr);
        };
        auto [dw, dh] = rescaledSize;
        // The are the src rows and columns from the source that we will scale into the dst padding.
        float sLCol = srcBounds.left();
        float sTRow = srcBounds.top();
        float sRCol = srcBounds.right() - 1;
        float sBRow = srcBounds.bottom() - 1;

        int sx = srcBounds.left();
        int sy = srcBounds.top();
        int sw = srcBounds.width();
        int sh = srcBounds.height();

        // Downscale the edges from the original source. These draws should batch together (and with
        // the above interior rescaling when it is a single pass).
        cheapDownscale(SkIRect::MakeXYWH(0, 1, 1, dh), SkIRect::MakeXYWH(sLCol, sy, 1, sh));
        cheapDownscale(SkIRect::MakeXYWH(1, 0, dw, 1), SkIRect::MakeXYWH(sx, sTRow, sw, 1));
        cheapDownscale(SkIRect::MakeXYWH(dw + 1, 1, 1, dh), SkIRect::MakeXYWH(sRCol, sy, 1, sh));
        cheapDownscale(SkIRect::MakeXYWH(1, dh + 1, dw, 1), SkIRect::MakeXYWH(sx, sBRow, sw, 1));

        // Copy the corners from the original source. These would batch with the edges except that
        // at time of writing we recognize these can use kNearest and downgrade the filter. So they
        // batch with each other but not the edge draws.
        cheapDownscale(SkIRect::MakeXYWH(0, 0, 1, 1), SkIRect::MakeXYWH(sLCol, sTRow, 1, 1));
        cheapDownscale(SkIRect::MakeXYWH(dw + 1, 0, 1, 1), SkIRect::MakeXYWH(sRCol, sTRow, 1, 1));
        cheapDownscale(SkIRect::MakeXYWH(dw + 1, dh + 1, 1, 1),
                       SkIRect::MakeXYWH(sRCol, sBRow, 1, 1));
        cheapDownscale(SkIRect::MakeXYWH(0, dh + 1, 1, 1), SkIRect::MakeXYWH(sLCol, sBRow, 1, 1));
    }
    srcView = rescaledSDC->readSurfaceView();
    // Drop the contexts so we don't hold the proxies longer than necessary.
    rescaledSDC.reset();
    srcCtx.reset();

    // Compute the dst bounds in the scaled down space. First move the origin to be at the top
    // left since we trimmed off everything above and to the left of the original src bounds during
    // the rescale.
    SkRect scaledDstBounds = SkRect::Make(dstBounds.makeOffset(-srcBounds.topLeft()));
    scaledDstBounds.fLeft *= scaleX;
    scaledDstBounds.fTop *= scaleY;
    scaledDstBounds.fRight *= scaleX;
    scaledDstBounds.fBottom *= scaleY;
    // Account for padding in our rescaled src, if any.
    scaledDstBounds.offset(padX, padY);
    // Turn the scaled down dst bounds into an integer pixel rect, adding 1px of padding to help
    // with boundary sampling during re-expansion when there are extreme scale factors. This is
    // particularly important when the blurs extend across Chrome raster tiles; w/o it the re-expand
    // produces visible seams: crbug.com/1500021.
#if defined(SK_USE_PADDED_BLUR_UPSCALE)
    static constexpr int kDstPadding = 1;
#else
    static constexpr int kDstPadding = 0;
#endif
    auto scaledDstBoundsI = scaledDstBounds.roundOut();
    scaledDstBoundsI.outset(kDstPadding, kDstPadding);

    SkIRect scaledSrcBounds = SkIRect::MakeSize(srcView.dimensions());
    auto sdc = GaussianBlur(rContext,
                            std::move(srcView),
                            srcColorType,
                            srcAlphaType,
                            colorSpace,
                            scaledDstBoundsI,
                            scaledSrcBounds,
                            sigmaX,
                            sigmaY,
                            mode,
                            fit);
    if (!sdc) {
        return nullptr;
    }

    SkASSERT(sdc->width() == scaledDstBoundsI.width() &&
             sdc->height() == scaledDstBoundsI.height());
    // We rounded out the integer scaled dst bounds. Select the fractional dst bounds from the
    // integer dimension blurred result when we scale back up. This also accounts for the padding
    // added to 'scaledDstBoundsI' when sampling from the blurred result.
    scaledDstBounds.offset(-scaledDstBoundsI.left(), -scaledDstBoundsI.top());
    return reexpand(rContext,
                    std::move(sdc),
                    scaledDstBounds,
                    dstBounds.size(),
                    std::move(colorSpace),
                    fit);
}

}  // namespace GrBlurUtils