1 //
2 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6
7 #include "Optimization.hpp"
8 #include "NetworkUtils.hpp"
9
10 #include <armnn/Logging.hpp>
11 #include <armnnUtils/Permute.hpp>
12
13 namespace armnn
14 {
15 namespace optimizations
16 {
17
18 class ConvertConstDequantisationLayersToConstLayersImpl
19 {
20 public:
Run(Graph & graph,InputSlot & connection) const21 void Run(Graph& graph, InputSlot& connection) const
22 {
23 Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer();
24 Layer& child = connection.GetOwningLayer();
25
26 ARMNN_ASSERT(base.GetType() == LayerType::Constant);
27 ARMNN_ASSERT(child.GetType() == LayerType::Dequantize);
28
29 ReplaceConstDequantisationLayer(graph,
30 PolymorphicDowncast<ConstantLayer*>(&base),
31 PolymorphicDowncast<DequantizeLayer*>(&child));
32
33 }
34 protected:
35 ConvertConstDequantisationLayersToConstLayersImpl() = default;
36 ~ConvertConstDequantisationLayersToConstLayersImpl() = default;
37 private:
38
ReplaceConstDequantisationLayer(Graph &,ConstantLayer * constantLayer,DequantizeLayer * dequantizeLayer)39 static void ReplaceConstDequantisationLayer(Graph&,
40 ConstantLayer* constantLayer,
41 DequantizeLayer* dequantizeLayer)
42 {
43 ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl::ReplaceConstDequantisationLayer()";
44 /**
45 * This optimisation is to find situations where a constant set of inputs is being provided to a Dequantization
46 * layer. In this case we don't want the overhead of Dequantizing the values on every inference, instead we
47 * want to Dequantize them once and store them in a Const layer to be used everytime as they will not change.
48 */
49 TensorInfo constantInfo = constantLayer->GetOutputSlot(0).GetTensorInfo();
50 TensorInfo inputDequantizeInfo = dequantizeLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
51 TensorInfo outputDequantizeInfo = dequantizeLayer->GetOutputSlot(0).GetTensorInfo();
52
53 bool requiresPermute = false;
54
55 auto connection = dequantizeLayer->GetOutputSlot(0).GetConnection(0);
56 if (connection)
57 {
58 if (connection->GetOwningLayer().GetType() == LayerType::Convolution2d)
59 {
60 /**
61 * ArmNN does not currently support non-fixed weights or bias
62 * The NNAPI filter is always OHWI [depth_out, filter_height, filter_width, depth_in]
63 * but ArmNN expects the filter's height and width indices to match the input's height
64 * and width indices so we permute it to OIHW if the DataLayout is NCHW
65 */
66 ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Connected to "
67 "Convolution layer.";
68 auto conv2dLayer = PolymorphicDowncast<Convolution2dLayer*>(&connection->GetOwningLayer());
69 if (conv2dLayer->GetParameters().m_DataLayout == DataLayout::NCHW)
70 {
71 ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Connected to "
72 "Convolution layer and requires permute on weights. ";
73 requiresPermute = true;
74 }
75 }
76 }
77
78 ARMNN_ASSERT(constantLayer->GetNumOutputSlots() == 1);
79 auto numConnections = constantLayer->GetOutputSlot(0).GetNumConnections();
80
81 ARMNN_LOG(info) << "constantInfo datatype:" << armnn::GetDataTypeName(constantInfo.GetDataType())
82 << "inputDequantizeInfo datatype:" << armnn::GetDataTypeName(inputDequantizeInfo.GetDataType())
83 << "outputDequantizeInfo datatype:" << armnn::GetDataTypeName(outputDequantizeInfo.GetDataType());
84
85 std::vector<float> newValues(outputDequantizeInfo.GetNumElements());
86 if (constantInfo.GetDataType() == DataType::Float16 &&
87 inputDequantizeInfo.GetDataType() == DataType::Float16 &&
88 outputDequantizeInfo.GetDataType() == DataType::Float32)
89 {
90 ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Converting FP16 -> FP32";
91 armnnUtils::FloatingPointConverter::ConvertFloat16To32(constantLayer->m_LayerOutput->Map(true),
92 outputDequantizeInfo.GetNumElements(),
93 newValues.data());
94 }
95 else if (((constantInfo.GetDataType() == DataType::QAsymmS8
96 && inputDequantizeInfo.GetDataType() == DataType::QAsymmS8)
97 || (constantInfo.GetDataType() == DataType::QSymmS8
98 && inputDequantizeInfo.GetDataType() == DataType::QSymmS8)) &&
99 outputDequantizeInfo.GetDataType() == DataType::Float32)
100 {
101 ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Converting INT8 -> FP32";
102 ConvertInt8To32(constantLayer->m_LayerOutput->Map(true),
103 outputDequantizeInfo.GetNumElements(),
104 inputDequantizeInfo.GetQuantizationScale(),
105 inputDequantizeInfo.GetQuantizationOffset(),
106 newValues.data());
107 }
108
109 TensorInfo newInfo = outputDequantizeInfo;
110 newInfo.SetConstant(true);
111 if (requiresPermute)
112 {
113 ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Permuting the constant data.";
114 const PermutationVector OHWIToOIHW = {0, 2, 3, 1};
115 std::vector<float> permutedValues(outputDequantizeInfo.GetNumElements());
116 armnnUtils::Permute(outputDequantizeInfo.GetShape(), OHWIToOIHW,
117 newValues.data(), permutedValues.data(),
118 GetDataTypeSize(outputDequantizeInfo.GetDataType()));
119 ConstTensor newInput(newInfo, permutedValues);
120 constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
121 }
122 else
123 {
124 ConstTensor newInput(newInfo, newValues);
125 constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
126 }
127
128 // Moves connections in dequantize output to the constant layer.
129 // Dequantize layer will be removed if left unconnected.
130 dequantizeLayer->GetOutputSlot().MoveAllConnections(constantLayer->GetOutputSlot());
131
132 // Updating the output tensor
133 constantLayer->GetOutputSlot(0).SetTensorInfo(newInfo);
134 ARMNN_ASSERT(constantLayer->GetOutputSlot(0).GetTensorInfo().IsConstant() == true);
135
136 // Set isConstant to true in all input tensor infos where constantLayer is now connected to
137 for (unsigned int i = numConnections; i < constantLayer->GetOutputSlot(0).GetNumConnections(); ++i)
138 {
139 auto info = constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0)
140 .GetConnectedOutputSlot()->GetTensorInfo();
141 info.SetConstant();
142 constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0)
143 .GetConnectedOutputSlot()->SetTensorInfo(info);
144 }
145 }
146
147
ConvertInt8To32(const void * srcInt8Buffer,size_t numElements,const float scale,const int32_t offset,float * dstFloat32Buffer)148 static void ConvertInt8To32(const void* srcInt8Buffer,
149 size_t numElements,
150 const float scale,
151 const int32_t offset,
152 float* dstFloat32Buffer)
153 {
154 ARMNN_ASSERT(srcInt8Buffer != nullptr);
155 ARMNN_ASSERT(dstFloat32Buffer != nullptr);
156
157 ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: scale: " << scale;
158 ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: offset: " << offset;
159
160 const auto* pInt8 = static_cast<const int8_t*>(srcInt8Buffer);
161
162 for (size_t i = 0; i < numElements; ++i)
163 {
164 dstFloat32Buffer[i] = static_cast<float>(pInt8[i] - offset) * scale;
165 }
166 }
167
168 };
169
170 using ConvertConstDequantisationLayersToConstLayers
171 = OptimizeForConnection<ConstantLayer,
172 DequantizeLayer,
173 ConvertConstDequantisationLayersToConstLayersImpl>;
174
175 } // namespace optimizations
176 } // namespace armnn