xref: /aosp_15_r20/external/armnn/src/backends/reference/workloads/DetectionPostProcess.cpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "DetectionPostProcess.hpp"
7 
8 #include <armnn/utility/Assert.hpp>
9 #include <armnn/utility/NumericCast.hpp>
10 
11 #include <algorithm>
12 #include <numeric>
13 
14 namespace armnn
15 {
16 
GenerateRangeK(unsigned int k)17 std::vector<unsigned int> GenerateRangeK(unsigned int k)
18 {
19     std::vector<unsigned int> range(k);
20     std::iota(range.begin(), range.end(), 0);
21     return range;
22 }
23 
TopKSort(unsigned int k,unsigned int * indices,const float * values,unsigned int numElement)24 void TopKSort(unsigned int k, unsigned int* indices, const float* values, unsigned int numElement)
25 {
26     std::partial_sort(indices, indices + k, indices + numElement,
27                       [&values](unsigned int i, unsigned int j) { return values[i] > values[j]; });
28 }
29 
IntersectionOverUnion(const float * boxI,const float * boxJ)30 float IntersectionOverUnion(const float* boxI, const float* boxJ)
31 {
32     // Box-corner format: ymin, xmin, ymax, xmax.
33     const int yMin = 0;
34     const int xMin = 1;
35     const int yMax = 2;
36     const int xMax = 3;
37     float areaI = (boxI[yMax] - boxI[yMin]) * (boxI[xMax] - boxI[xMin]);
38     float areaJ = (boxJ[yMax] - boxJ[yMin]) * (boxJ[xMax] - boxJ[xMin]);
39     float yMinIntersection = std::max(boxI[yMin], boxJ[yMin]);
40     float xMinIntersection = std::max(boxI[xMin], boxJ[xMin]);
41     float yMaxIntersection = std::min(boxI[yMax], boxJ[yMax]);
42     float xMaxIntersection = std::min(boxI[xMax], boxJ[xMax]);
43     float areaIntersection = std::max(yMaxIntersection - yMinIntersection, 0.0f) *
44                                 std::max(xMaxIntersection - xMinIntersection, 0.0f);
45     float areaUnion = areaI + areaJ - areaIntersection;
46     return areaIntersection / areaUnion;
47 }
48 
NonMaxSuppression(unsigned int numBoxes,const std::vector<float> & boxCorners,const std::vector<float> & scores,float nmsScoreThreshold,unsigned int maxDetection,float nmsIouThreshold)49 std::vector<unsigned int> NonMaxSuppression(unsigned int numBoxes,
50                                             const std::vector<float>& boxCorners,
51                                             const std::vector<float>& scores,
52                                             float nmsScoreThreshold,
53                                             unsigned int maxDetection,
54                                             float nmsIouThreshold)
55 {
56     // Select boxes that have scores above a given threshold.
57     std::vector<float> scoresAboveThreshold;
58     std::vector<unsigned int> indicesAboveThreshold;
59     for (unsigned int i = 0; i < numBoxes; ++i)
60     {
61         if (scores[i] >= nmsScoreThreshold)
62         {
63             scoresAboveThreshold.push_back(scores[i]);
64             indicesAboveThreshold.push_back(i);
65         }
66     }
67 
68     // Sort the indices based on scores.
69     unsigned int numAboveThreshold = armnn::numeric_cast<unsigned int>(scoresAboveThreshold.size());
70     std::vector<unsigned int> sortedIndices = GenerateRangeK(numAboveThreshold);
71     TopKSort(numAboveThreshold, sortedIndices.data(), scoresAboveThreshold.data(), numAboveThreshold);
72 
73     // Number of output cannot be more than max detections specified in the option.
74     unsigned int numOutput = std::min(maxDetection, numAboveThreshold);
75     std::vector<unsigned int> outputIndices;
76     std::vector<bool> visited(numAboveThreshold, false);
77 
78     // Prune out the boxes with high intersection over union by keeping the box with higher score.
79     for (unsigned int i = 0; i < numAboveThreshold; ++i)
80     {
81         if (outputIndices.size() >= numOutput)
82         {
83             break;
84         }
85         if (!visited[sortedIndices[i]])
86         {
87             outputIndices.push_back(indicesAboveThreshold[sortedIndices[i]]);
88             for (unsigned int j = i + 1; j < numAboveThreshold; ++j)
89             {
90                 unsigned int iIndex = indicesAboveThreshold[sortedIndices[i]] * 4;
91                 unsigned int jIndex = indicesAboveThreshold[sortedIndices[j]] * 4;
92                 if (IntersectionOverUnion(&boxCorners[iIndex], &boxCorners[jIndex]) > nmsIouThreshold)
93                 {
94                     visited[sortedIndices[j]] = true;
95                 }
96             }
97         }
98     }
99     return outputIndices;
100 }
101 
AllocateOutputData(unsigned int numOutput,unsigned int numSelected,const std::vector<float> & boxCorners,const std::vector<unsigned int> & outputIndices,const std::vector<unsigned int> & selectedBoxes,const std::vector<unsigned int> & selectedClasses,const std::vector<float> & selectedScores,float * detectionBoxes,float * detectionScores,float * detectionClasses,float * numDetections)102 void AllocateOutputData(unsigned int numOutput,
103                         unsigned int numSelected,
104                         const std::vector<float>& boxCorners,
105                         const std::vector<unsigned int>& outputIndices,
106                         const std::vector<unsigned int>& selectedBoxes,
107                         const std::vector<unsigned int>& selectedClasses,
108                         const std::vector<float>& selectedScores,
109                         float* detectionBoxes,
110                         float* detectionScores,
111                         float* detectionClasses,
112                         float* numDetections)
113 {
114     for (unsigned int i = 0; i < numOutput; ++i)
115         {
116             unsigned int boxIndex = i * 4;
117             if (i < numSelected)
118             {
119                 unsigned int boxCornorIndex = selectedBoxes[outputIndices[i]] * 4;
120                 detectionScores[i] = selectedScores[outputIndices[i]];
121                 detectionClasses[i] = armnn::numeric_cast<float>(selectedClasses[outputIndices[i]]);
122                 detectionBoxes[boxIndex] = boxCorners[boxCornorIndex];
123                 detectionBoxes[boxIndex + 1] = boxCorners[boxCornorIndex + 1];
124                 detectionBoxes[boxIndex + 2] = boxCorners[boxCornorIndex + 2];
125                 detectionBoxes[boxIndex + 3] = boxCorners[boxCornorIndex + 3];
126             }
127             else
128             {
129                 detectionScores[i] = 0.0f;
130                 detectionClasses[i] = 0.0f;
131                 detectionBoxes[boxIndex] = 0.0f;
132                 detectionBoxes[boxIndex + 1] = 0.0f;
133                 detectionBoxes[boxIndex + 2] = 0.0f;
134                 detectionBoxes[boxIndex + 3] = 0.0f;
135             }
136         }
137         numDetections[0] = armnn::numeric_cast<float>(numSelected);
138 }
139 
DetectionPostProcess(const TensorInfo & boxEncodingsInfo,const TensorInfo & scoresInfo,const TensorInfo & anchorsInfo,const TensorInfo & detectionBoxesInfo,const TensorInfo & detectionClassesInfo,const TensorInfo & detectionScoresInfo,const TensorInfo & numDetectionsInfo,const DetectionPostProcessDescriptor & desc,Decoder<float> & boxEncodings,Decoder<float> & scores,Decoder<float> & anchors,float * detectionBoxes,float * detectionClasses,float * detectionScores,float * numDetections)140 void DetectionPostProcess(const TensorInfo& boxEncodingsInfo,
141                           const TensorInfo& scoresInfo,
142                           const TensorInfo& anchorsInfo,
143                           const TensorInfo& detectionBoxesInfo,
144                           const TensorInfo& detectionClassesInfo,
145                           const TensorInfo& detectionScoresInfo,
146                           const TensorInfo& numDetectionsInfo,
147                           const DetectionPostProcessDescriptor& desc,
148                           Decoder<float>& boxEncodings,
149                           Decoder<float>& scores,
150                           Decoder<float>& anchors,
151                           float* detectionBoxes,
152                           float* detectionClasses,
153                           float* detectionScores,
154                           float* numDetections)
155 {
156     IgnoreUnused(anchorsInfo, detectionClassesInfo, detectionScoresInfo, numDetectionsInfo);
157 
158     // Transform center-size format which is (ycenter, xcenter, height, width) to box-corner format,
159     // which represents the lower left corner and the upper right corner (ymin, xmin, ymax, xmax)
160     std::vector<float> boxCorners(boxEncodingsInfo.GetNumElements());
161 
162     const unsigned int numBoxes  = boxEncodingsInfo.GetShape()[1];
163     const unsigned int numScores = scoresInfo.GetNumElements();
164 
165     for (unsigned int i = 0; i < numBoxes; ++i)
166     {
167         // Y
168         float boxEncodingY = boxEncodings.Get();
169         float anchorY      = anchors.Get();
170 
171         ++boxEncodings;
172         ++anchors;
173 
174         // X
175         float boxEncodingX = boxEncodings.Get();
176         float anchorX      = anchors.Get();
177 
178         ++boxEncodings;
179         ++anchors;
180 
181         // H
182         float boxEncodingH = boxEncodings.Get();
183         float anchorH      = anchors.Get();
184 
185         ++boxEncodings;
186         ++anchors;
187 
188         // W
189         float boxEncodingW = boxEncodings.Get();
190         float anchorW      = anchors.Get();
191 
192         ++boxEncodings;
193         ++anchors;
194 
195         float yCentre = boxEncodingY / desc.m_ScaleY * anchorH + anchorY;
196         float xCentre = boxEncodingX / desc.m_ScaleX * anchorW + anchorX;
197 
198         float halfH = 0.5f * expf(boxEncodingH / desc.m_ScaleH) * anchorH;
199         float halfW = 0.5f * expf(boxEncodingW / desc.m_ScaleW) * anchorW;
200 
201         unsigned int indexY = i * 4;
202         unsigned int indexX = indexY + 1;
203         unsigned int indexH = indexX + 1;
204         unsigned int indexW = indexH + 1;
205 
206         // ymin
207         boxCorners[indexY] = yCentre - halfH;
208         // xmin
209         boxCorners[indexX] = xCentre - halfW;
210         // ymax
211         boxCorners[indexH] = yCentre + halfH;
212         // xmax
213         boxCorners[indexW] = xCentre + halfW;
214 
215         ARMNN_ASSERT(boxCorners[indexY] < boxCorners[indexH]);
216         ARMNN_ASSERT(boxCorners[indexX] < boxCorners[indexW]);
217     }
218 
219     unsigned int numClassesWithBg = desc.m_NumClasses + 1;
220 
221     // Decode scores
222     std::vector<float> decodedScores;
223     decodedScores.reserve(numScores);
224 
225     for (unsigned int i = 0u; i < numScores; ++i)
226     {
227         decodedScores.emplace_back(scores.Get());
228         ++scores;
229     }
230 
231     // Perform Non Max Suppression.
232     if (desc.m_UseRegularNms)
233     {
234         // Perform Regular NMS.
235         // For each class, perform NMS and select max detection numbers of the highest score across all classes.
236         std::vector<float> classScores(numBoxes);
237 
238         std::vector<unsigned int> selectedBoxesAfterNms;
239         selectedBoxesAfterNms.reserve(numBoxes);
240 
241         std::vector<float> selectedScoresAfterNms;
242         selectedBoxesAfterNms.reserve(numScores);
243 
244         std::vector<unsigned int> selectedClasses;
245 
246         for (unsigned int c = 0; c < desc.m_NumClasses; ++c)
247         {
248             // For each boxes, get scores of the boxes for the class c.
249             for (unsigned int i = 0; i < numBoxes; ++i)
250             {
251                 classScores[i] = decodedScores[i * numClassesWithBg + c + 1];
252             }
253             std::vector<unsigned int> selectedIndices = NonMaxSuppression(numBoxes,
254                                                                           boxCorners,
255                                                                           classScores,
256                                                                           desc.m_NmsScoreThreshold,
257                                                                           desc.m_DetectionsPerClass,
258                                                                           desc.m_NmsIouThreshold);
259 
260             for (unsigned int i = 0; i < selectedIndices.size(); ++i)
261             {
262                 selectedBoxesAfterNms.push_back(selectedIndices[i]);
263                 selectedScoresAfterNms.push_back(classScores[selectedIndices[i]]);
264                 selectedClasses.push_back(c);
265             }
266         }
267 
268         // Select max detection numbers of the highest score across all classes
269         unsigned int numSelected = armnn::numeric_cast<unsigned int>(selectedBoxesAfterNms.size());
270         unsigned int numOutput = std::min(desc.m_MaxDetections,  numSelected);
271 
272         // Sort the max scores among the selected indices.
273         std::vector<unsigned int> outputIndices = GenerateRangeK(numSelected);
274         TopKSort(numOutput, outputIndices.data(), selectedScoresAfterNms.data(), numSelected);
275 
276         AllocateOutputData(detectionBoxesInfo.GetShape()[1], numOutput, boxCorners, outputIndices,
277                            selectedBoxesAfterNms, selectedClasses, selectedScoresAfterNms,
278                            detectionBoxes, detectionScores, detectionClasses, numDetections);
279     }
280     else
281     {
282         // Perform Fast NMS.
283         // Select max scores of boxes and perform NMS on max scores,
284         // select max detection numbers of the highest score
285         unsigned int numClassesPerBox = std::min(desc.m_MaxClassesPerDetection, desc.m_NumClasses);
286         std::vector<float> maxScores;
287         std::vector<unsigned int>boxIndices;
288         std::vector<unsigned int>maxScoreClasses;
289 
290         for (unsigned int box = 0; box < numBoxes; ++box)
291         {
292             unsigned int scoreIndex = box * numClassesWithBg + 1;
293 
294             // Get the max scores of the box.
295             std::vector<unsigned int> maxScoreIndices = GenerateRangeK(desc.m_NumClasses);
296             TopKSort(numClassesPerBox, maxScoreIndices.data(),
297                 decodedScores.data() + scoreIndex, desc.m_NumClasses);
298 
299             for (unsigned int i = 0; i < numClassesPerBox; ++i)
300             {
301                 maxScores.push_back(decodedScores[scoreIndex + maxScoreIndices[i]]);
302                 maxScoreClasses.push_back(maxScoreIndices[i]);
303                 boxIndices.push_back(box);
304             }
305         }
306 
307         // Perform NMS on max scores
308         std::vector<unsigned int> selectedIndices = NonMaxSuppression(numBoxes, boxCorners, maxScores,
309                                                                       desc.m_NmsScoreThreshold,
310                                                                       desc.m_MaxDetections,
311                                                                       desc.m_NmsIouThreshold);
312 
313         unsigned int numSelected = armnn::numeric_cast<unsigned int>(selectedIndices.size());
314         unsigned int numOutput = std::min(desc.m_MaxDetections,  numSelected);
315 
316         AllocateOutputData(detectionBoxesInfo.GetShape()[1], numOutput, boxCorners, selectedIndices,
317                            boxIndices, maxScoreClasses, maxScores,
318                            detectionBoxes, detectionScores, detectionClasses, numDetections);
319     }
320 }
321 
322 } // namespace armnn
323