1 //
2 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 
7 #include "Optimization.hpp"
8 #include "NetworkUtils.hpp"
9 
10 #include <armnn/Logging.hpp>
11 #include <armnnUtils/Permute.hpp>
12 
13 namespace armnn
14 {
15 namespace optimizations
16 {
17 
18 class ConvertConstDequantisationLayersToConstLayersImpl
19 {
20 public:
Run(Graph & graph,InputSlot & connection) const21     void Run(Graph& graph, InputSlot& connection) const
22     {
23         Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer();
24         Layer& child = connection.GetOwningLayer();
25 
26         ARMNN_ASSERT(base.GetType() == LayerType::Constant);
27         ARMNN_ASSERT(child.GetType() == LayerType::Dequantize);
28 
29         ReplaceConstDequantisationLayer(graph,
30                                         PolymorphicDowncast<ConstantLayer*>(&base),
31                                         PolymorphicDowncast<DequantizeLayer*>(&child));
32 
33     }
34 protected:
35     ConvertConstDequantisationLayersToConstLayersImpl() = default;
36     ~ConvertConstDequantisationLayersToConstLayersImpl() = default;
37 private:
38 
ReplaceConstDequantisationLayer(Graph &,ConstantLayer * constantLayer,DequantizeLayer * dequantizeLayer)39     static void ReplaceConstDequantisationLayer(Graph&,
40                                                 ConstantLayer* constantLayer,
41                                                 DequantizeLayer* dequantizeLayer)
42     {
43         ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl::ReplaceConstDequantisationLayer()";
44         /**
45          * This optimisation is to find situations where a constant set of inputs is being provided to a Dequantization
46          * layer. In this case we don't want the overhead of Dequantizing the values on every inference, instead we
47          * want to Dequantize them once and store them in a Const layer to be used everytime as they will not change.
48          */
49         TensorInfo constantInfo = constantLayer->GetOutputSlot(0).GetTensorInfo();
50         TensorInfo inputDequantizeInfo = dequantizeLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
51         TensorInfo outputDequantizeInfo = dequantizeLayer->GetOutputSlot(0).GetTensorInfo();
52 
53         bool requiresPermute = false;
54 
55         auto connection = dequantizeLayer->GetOutputSlot(0).GetConnection(0);
56         if (connection)
57         {
58             if (connection->GetOwningLayer().GetType() == LayerType::Convolution2d)
59             {
60                 /**
61                  * ArmNN does not currently support non-fixed weights or bias
62                  * The NNAPI filter is always OHWI [depth_out, filter_height, filter_width, depth_in]
63                  * but ArmNN expects the filter's height and width indices to match the input's height
64                  * and width indices so we permute it to OIHW if the DataLayout is NCHW
65                  */
66                 ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Connected to "
67                                    "Convolution layer.";
68                 auto conv2dLayer = PolymorphicDowncast<Convolution2dLayer*>(&connection->GetOwningLayer());
69                 if (conv2dLayer->GetParameters().m_DataLayout == DataLayout::NCHW)
70                 {
71                     ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Connected to "
72                                         "Convolution layer and requires permute on weights. ";
73                     requiresPermute = true;
74                 }
75             }
76         }
77 
78         ARMNN_ASSERT(constantLayer->GetNumOutputSlots() == 1);
79         auto numConnections = constantLayer->GetOutputSlot(0).GetNumConnections();
80 
81         ARMNN_LOG(info) << "constantInfo datatype:" << armnn::GetDataTypeName(constantInfo.GetDataType())
82            << "inputDequantizeInfo datatype:" << armnn::GetDataTypeName(inputDequantizeInfo.GetDataType())
83            << "outputDequantizeInfo datatype:" << armnn::GetDataTypeName(outputDequantizeInfo.GetDataType());
84 
85         std::vector<float> newValues(outputDequantizeInfo.GetNumElements());
86         if (constantInfo.GetDataType() == DataType::Float16 &&
87             inputDequantizeInfo.GetDataType() == DataType::Float16 &&
88             outputDequantizeInfo.GetDataType() == DataType::Float32)
89         {
90             ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Converting FP16 -> FP32";
91             armnnUtils::FloatingPointConverter::ConvertFloat16To32(constantLayer->m_LayerOutput->Map(true),
92                                                                    outputDequantizeInfo.GetNumElements(),
93                                                                    newValues.data());
94         }
95         else if (((constantInfo.GetDataType() == DataType::QAsymmS8
96                   && inputDequantizeInfo.GetDataType() == DataType::QAsymmS8)
97                   || (constantInfo.GetDataType() == DataType::QSymmS8
98                       && inputDequantizeInfo.GetDataType() == DataType::QSymmS8)) &&
99                 outputDequantizeInfo.GetDataType() == DataType::Float32)
100         {
101             ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Converting INT8 -> FP32";
102             ConvertInt8To32(constantLayer->m_LayerOutput->Map(true),
103                             outputDequantizeInfo.GetNumElements(),
104                             inputDequantizeInfo.GetQuantizationScale(),
105                             inputDequantizeInfo.GetQuantizationOffset(),
106                             newValues.data());
107         }
108 
109         TensorInfo newInfo = outputDequantizeInfo;
110         newInfo.SetConstant(true);
111         if (requiresPermute)
112         {
113             ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Permuting the constant data.";
114             const PermutationVector OHWIToOIHW = {0, 2, 3, 1};
115             std::vector<float> permutedValues(outputDequantizeInfo.GetNumElements());
116             armnnUtils::Permute(outputDequantizeInfo.GetShape(), OHWIToOIHW,
117                                 newValues.data(), permutedValues.data(),
118                                 GetDataTypeSize(outputDequantizeInfo.GetDataType()));
119             ConstTensor newInput(newInfo, permutedValues);
120             constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
121         }
122         else
123         {
124             ConstTensor newInput(newInfo, newValues);
125             constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
126         }
127 
128         // Moves connections in dequantize output to the constant layer.
129         // Dequantize layer will be removed if left unconnected.
130         dequantizeLayer->GetOutputSlot().MoveAllConnections(constantLayer->GetOutputSlot());
131 
132         // Updating the output tensor
133         constantLayer->GetOutputSlot(0).SetTensorInfo(newInfo);
134         ARMNN_ASSERT(constantLayer->GetOutputSlot(0).GetTensorInfo().IsConstant() == true);
135 
136         // Set isConstant to true in all input tensor infos where constantLayer is now connected to
137         for (unsigned int i = numConnections; i < constantLayer->GetOutputSlot(0).GetNumConnections(); ++i)
138         {
139             auto info = constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0)
140                     .GetConnectedOutputSlot()->GetTensorInfo();
141             info.SetConstant();
142             constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0)
143                     .GetConnectedOutputSlot()->SetTensorInfo(info);
144         }
145     }
146 
147 
ConvertInt8To32(const void * srcInt8Buffer,size_t numElements,const float scale,const int32_t offset,float * dstFloat32Buffer)148 static void ConvertInt8To32(const void* srcInt8Buffer,
149                             size_t numElements,
150                             const float scale,
151                             const int32_t offset,
152                             float* dstFloat32Buffer)
153 {
154     ARMNN_ASSERT(srcInt8Buffer != nullptr);
155     ARMNN_ASSERT(dstFloat32Buffer != nullptr);
156 
157     ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: scale: " << scale;
158     ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: offset: " << offset;
159 
160     const auto* pInt8 = static_cast<const int8_t*>(srcInt8Buffer);
161 
162     for (size_t i = 0; i < numElements; ++i)
163     {
164         dstFloat32Buffer[i] = static_cast<float>(pInt8[i] - offset) * scale;
165     }
166 }
167 
168 };
169 
170 using ConvertConstDequantisationLayersToConstLayers
171     = OptimizeForConnection<ConstantLayer,
172                             DequantizeLayer,
173                             ConvertConstDequantisationLayersToConstLayersImpl>;
174 
175 } // namespace optimizations
176 } // namespace armnn