ConvertConstDequantisationLayersToConstLayers.hpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810) - OpenGrok cross reference for /aosp_15_r20/external/armnn/src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp

//
// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#pragma once

#include "Optimization.hpp"
#include "NetworkUtils.hpp"

#include <armnn/Logging.hpp>
#include <armnnUtils/Permute.hpp>

namespace armnn
{
namespace optimizations
{

class ConvertConstDequantisationLayersToConstLayersImpl
{
public:
    void Run(Graph& graph, InputSlot& connection) const
    {
        Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer();
        Layer& child = connection.GetOwningLayer();

        ARMNN_ASSERT(base.GetType() == LayerType::Constant);
        ARMNN_ASSERT(child.GetType() == LayerType::Dequantize);

        ReplaceConstDequantisationLayer(graph,
                                        PolymorphicDowncast<ConstantLayer*>(&base),
                                        PolymorphicDowncast<DequantizeLayer*>(&child));

    }
protected:
    ConvertConstDequantisationLayersToConstLayersImpl() = default;
    ~ConvertConstDequantisationLayersToConstLayersImpl() = default;
private:

    static void ReplaceConstDequantisationLayer(Graph&,
                                                ConstantLayer* constantLayer,
                                                DequantizeLayer* dequantizeLayer)
    {
        ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl::ReplaceConstDequantisationLayer()";
        /**
         * This optimisation is to find situations where a constant set of inputs is being provided to a Dequantization
         * layer. In this case we don't want the overhead of Dequantizing the values on every inference, instead we
         * want to Dequantize them once and store them in a Const layer to be used everytime as they will not change.
         */
        TensorInfo constantInfo = constantLayer->GetOutputSlot(0).GetTensorInfo();
        TensorInfo inputDequantizeInfo = dequantizeLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
        TensorInfo outputDequantizeInfo = dequantizeLayer->GetOutputSlot(0).GetTensorInfo();

        bool requiresPermute = false;

        auto connection = dequantizeLayer->GetOutputSlot(0).GetConnection(0);
        if (connection)
        {
            if (connection->GetOwningLayer().GetType() == LayerType::Convolution2d)
            {
                /**
                 * ArmNN does not currently support non-fixed weights or bias
                 * The NNAPI filter is always OHWI [depth_out, filter_height, filter_width, depth_in]
                 * but ArmNN expects the filter's height and width indices to match the input's height
                 * and width indices so we permute it to OIHW if the DataLayout is NCHW
                 */
                ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Connected to "
                                   "Convolution layer.";
                auto conv2dLayer = PolymorphicDowncast<Convolution2dLayer*>(&connection->GetOwningLayer());
                if (conv2dLayer->GetParameters().m_DataLayout == DataLayout::NCHW)
                {
                    ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Connected to "
                                        "Convolution layer and requires permute on weights. ";
                    requiresPermute = true;
                }
            }
        }

        ARMNN_ASSERT(constantLayer->GetNumOutputSlots() == 1);
        auto numConnections = constantLayer->GetOutputSlot(0).GetNumConnections();

        ARMNN_LOG(info) << "constantInfo datatype:" << armnn::GetDataTypeName(constantInfo.GetDataType())
           << "inputDequantizeInfo datatype:" << armnn::GetDataTypeName(inputDequantizeInfo.GetDataType())
           << "outputDequantizeInfo datatype:" << armnn::GetDataTypeName(outputDequantizeInfo.GetDataType());

        std::vector<float> newValues(outputDequantizeInfo.GetNumElements());
        if (constantInfo.GetDataType() == DataType::Float16 &&
            inputDequantizeInfo.GetDataType() == DataType::Float16 &&
            outputDequantizeInfo.GetDataType() == DataType::Float32)
        {
            ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Converting FP16 -> FP32";
            armnnUtils::FloatingPointConverter::ConvertFloat16To32(constantLayer->m_LayerOutput->Map(true),
                                                                   outputDequantizeInfo.GetNumElements(),
                                                                   newValues.data());
        }
        else if (((constantInfo.GetDataType() == DataType::QAsymmS8
                  && inputDequantizeInfo.GetDataType() == DataType::QAsymmS8)
                  || (constantInfo.GetDataType() == DataType::QSymmS8
                      && inputDequantizeInfo.GetDataType() == DataType::QSymmS8)) &&
                outputDequantizeInfo.GetDataType() == DataType::Float32)
        {
            ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Converting INT8 -> FP32";
            ConvertInt8To32(constantLayer->m_LayerOutput->Map(true),
                            outputDequantizeInfo.GetNumElements(),
                            inputDequantizeInfo.GetQuantizationScale(),
                            inputDequantizeInfo.GetQuantizationOffset(),
                            newValues.data());
        }

        TensorInfo newInfo = outputDequantizeInfo;
        newInfo.SetConstant(true);
        if (requiresPermute)
        {
            ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Permuting the constant data.";
            const PermutationVector OHWIToOIHW = {0, 2, 3, 1};
            std::vector<float> permutedValues(outputDequantizeInfo.GetNumElements());
            armnnUtils::Permute(outputDequantizeInfo.GetShape(), OHWIToOIHW,
                                newValues.data(), permutedValues.data(),
                                GetDataTypeSize(outputDequantizeInfo.GetDataType()));
            ConstTensor newInput(newInfo, permutedValues);
            constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
        }
        else
        {
            ConstTensor newInput(newInfo, newValues);
            constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
        }

        // Moves connections in dequantize output to the constant layer.
        // Dequantize layer will be removed if left unconnected.
        dequantizeLayer->GetOutputSlot().MoveAllConnections(constantLayer->GetOutputSlot());

        // Updating the output tensor
        constantLayer->GetOutputSlot(0).SetTensorInfo(newInfo);
        ARMNN_ASSERT(constantLayer->GetOutputSlot(0).GetTensorInfo().IsConstant() == true);

        // Set isConstant to true in all input tensor infos where constantLayer is now connected to
        for (unsigned int i = numConnections; i < constantLayer->GetOutputSlot(0).GetNumConnections(); ++i)
        {
            auto info = constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0)
                    .GetConnectedOutputSlot()->GetTensorInfo();
            info.SetConstant();
            constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0)
                    .GetConnectedOutputSlot()->SetTensorInfo(info);
        }
    }


static void ConvertInt8To32(const void* srcInt8Buffer,
                            size_t numElements,
                            const float scale,
                            const int32_t offset,
                            float* dstFloat32Buffer)
{
    ARMNN_ASSERT(srcInt8Buffer != nullptr);
    ARMNN_ASSERT(dstFloat32Buffer != nullptr);

    ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: scale: " << scale;
    ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: offset: " << offset;

    const auto* pInt8 = static_cast<const int8_t*>(srcInt8Buffer);

    for (size_t i = 0; i < numElements; ++i)
    {
        dstFloat32Buffer[i] = static_cast<float>(pInt8[i] - offset) * scale;
    }
}

};

using ConvertConstDequantisationLayersToConstLayers
    = OptimizeForConnection<ConstantLayer,
                            DequantizeLayer,
                            ConvertConstDequantisationLayersToConstLayersImpl>;

} // namespace optimizations
} // namespace armnn