xref: /aosp_15_r20/external/armnn/src/backends/backendsCommon/test/DefaultAsyncExecuteTest.cpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include <armnn/Exceptions.hpp>
7 
8 #include <armnn/backends/TensorHandle.hpp>
9 #include <armnn/backends/Workload.hpp>
10 
11 #include <doctest/doctest.h>
12 
13 #include <thread>
14 
15 using namespace armnn;
16 
17 
18 namespace
19 {
20 
21 TEST_SUITE("WorkloadAsyncExecuteTests")
22 {
23 
24 struct Workload0 : BaseWorkload<ElementwiseUnaryQueueDescriptor>
25 {
Workload0__anone931cbbc0111::Workload026     Workload0(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info)
27         : BaseWorkload(descriptor, info)
28     {
29     }
30 
Workload0__anone931cbbc0111::Workload031     Workload0() : BaseWorkload(ElementwiseUnaryQueueDescriptor(), WorkloadInfo())
32     {
33     }
34 
Execute__anone931cbbc0111::Workload035     void Execute() const
36     {
37         int* inVals = static_cast<int*>(m_Data.m_Inputs[0][0].Map());
38         int* outVals = static_cast<int*>(m_Data.m_Outputs[0][0].Map());
39 
40         for (unsigned int i = 0;
41              i < m_Data.m_Inputs[0][0].GetShape().GetNumElements();
42              ++i)
43         {
44             outVals[i] = inVals[i] * outVals[i];
45             inVals[i] = outVals[i];
46         }
47     }
48 
ExecuteAsync__anone931cbbc0111::Workload049     void ExecuteAsync(ExecutionData& executionData)
50     {
51         WorkingMemDescriptor* workingMemDescriptor = static_cast<WorkingMemDescriptor*>(executionData.m_Data);
52         int* inVals = static_cast<int*>(workingMemDescriptor->m_Inputs[0][0].Map());
53         int* outVals = static_cast<int*>(workingMemDescriptor->m_Outputs[0][0].Map());
54 
55         for (unsigned int i = 0;
56              i < workingMemDescriptor->m_Inputs[0][0].GetShape().GetNumElements();
57              ++i)
58         {
59             outVals[i] = inVals[i] + outVals[i];
60             inVals[i] = outVals[i];
61         }
62     }
63 
GetQueueDescriptor__anone931cbbc0111::Workload064     QueueDescriptor* GetQueueDescriptor()
65     {
66         return &m_Data;
67     }
68 };
69 
70 struct Workload1 : BaseWorkload<ElementwiseUnaryQueueDescriptor>
71 {
Workload1__anone931cbbc0111::Workload172     Workload1(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info)
73         : BaseWorkload(descriptor, info)
74     {
75     }
76 
Execute__anone931cbbc0111::Workload177     void Execute() const
78     {
79         int* inVals = static_cast<int*>(m_Data.m_Inputs[0][0].Map());
80         int* outVals = static_cast<int*>(m_Data.m_Outputs[0][0].Map());
81 
82         for (unsigned int i = 0;
83              i < m_Data.m_Inputs[0][0].GetShape().GetNumElements();
84              ++i)
85         {
86             outVals[i] = inVals[i] * outVals[i];
87             inVals[i] = outVals[i];
88         }
89     }
90 };
91 
ValidateTensor(ITensorHandle * tensorHandle,int expectedValue)92 void ValidateTensor(ITensorHandle* tensorHandle, int expectedValue)
93 {
94     int* actualOutput = static_cast<int*>(tensorHandle->Map());
95 
96     bool allValuesCorrect = true;
97     for (unsigned int i = 0;
98          i < tensorHandle->GetShape().GetNumElements();
99          ++i)
100     {
101         if (actualOutput[i] != expectedValue)
102         {
103             allValuesCorrect = false;
104         }
105     }
106 
107     CHECK(allValuesCorrect);
108 }
109 
110 template<typename Workload>
CreateWorkload(TensorInfo info,ITensorHandle * inputTensor,ITensorHandle * outputTensor)111 std::unique_ptr<Workload> CreateWorkload(TensorInfo info, ITensorHandle* inputTensor, ITensorHandle* outputTensor)
112 {
113     WorkloadInfo workloadInfo;
114     workloadInfo.m_InputTensorInfos = std::vector<TensorInfo>{info};
115     workloadInfo.m_OutputTensorInfos = std::vector<TensorInfo>{info};
116 
117     ElementwiseUnaryQueueDescriptor elementwiseUnaryQueueDescriptor;
118     elementwiseUnaryQueueDescriptor.m_Inputs = std::vector<ITensorHandle*>{inputTensor};
119     elementwiseUnaryQueueDescriptor.m_Outputs = std::vector<ITensorHandle*>{outputTensor};
120 
121     return std::make_unique<Workload>(elementwiseUnaryQueueDescriptor, workloadInfo);
122 }
123 
124 TEST_CASE("TestAsyncExecute")
125 {
126     TensorInfo info({5}, DataType::Signed32, 0.0, 0, true);
127 
128     int inVals[5]{2, 2, 2, 2, 2};
129     int outVals[5]{1, 1, 1, 1, 1};
130 
131     int expectedExecuteval = 2;
132     int expectedExecuteAsyncval = 3;
133 
134     ConstTensor constInputTensor(info, inVals);
135     ConstTensor constOutputTensor(info, outVals);
136 
137     ScopedTensorHandle syncInput0(constInputTensor);
138     ScopedTensorHandle syncOutput0(constOutputTensor);
139 
140     std::unique_ptr<Workload0> workload0 = CreateWorkload<Workload0>(info, &syncInput0, &syncOutput0);
141 
142     workload0.get()->Execute();
143 
144     ScopedTensorHandle asyncInput0(constInputTensor);
145     ScopedTensorHandle asyncOutput0(constOutputTensor);
146 
147     WorkingMemDescriptor workingMemDescriptor0;
148     workingMemDescriptor0.m_Inputs = std::vector<ITensorHandle*>{&asyncInput0};
149     workingMemDescriptor0.m_Outputs = std::vector<ITensorHandle*>{&asyncOutput0};
150 
151     ExecutionData executionData;
152     executionData.m_Data = &workingMemDescriptor0;
153 
154     workload0.get()->ExecuteAsync(executionData);
155 
156     // Inputs are also changed by the execute/executeAsync calls to make sure there is no interference with them
157     ValidateTensor(workingMemDescriptor0.m_Outputs[0], expectedExecuteAsyncval);
158     ValidateTensor(workingMemDescriptor0.m_Inputs[0], expectedExecuteAsyncval);
159 
160     ValidateTensor(&workload0.get()->GetQueueDescriptor()->m_Outputs[0][0], expectedExecuteval);
161     ValidateTensor(&workload0.get()->GetQueueDescriptor()->m_Inputs[0][0], expectedExecuteval);
162 }
163 
164 TEST_CASE("TestDefaultAsyncExecute")
165 {
166     TensorInfo info({5}, DataType::Signed32, 0.0f, 0, true);
167 
168     std::vector<int> inVals{2, 2, 2, 2, 2};
169     std::vector<int> outVals{1, 1, 1, 1, 1};
170     std::vector<int> defaultVals{0, 0, 0, 0, 0};
171 
172     int expectedExecuteval = 2;
173 
174     ConstTensor constInputTensor(info, inVals);
175     ConstTensor constOutputTensor(info, outVals);
176     ConstTensor defaultTensor(info, &defaultVals);
177 
178     ScopedTensorHandle defaultInput = ScopedTensorHandle(defaultTensor);
179     ScopedTensorHandle defaultOutput = ScopedTensorHandle(defaultTensor);
180 
181     std::unique_ptr<Workload1> workload1 = CreateWorkload<Workload1>(info, &defaultInput, &defaultOutput);
182 
183     ScopedTensorHandle asyncInput(constInputTensor);
184     ScopedTensorHandle asyncOutput(constOutputTensor);
185 
186     WorkingMemDescriptor workingMemDescriptor;
187     workingMemDescriptor.m_Inputs = std::vector<ITensorHandle*>{&asyncInput};
188     workingMemDescriptor.m_Outputs = std::vector<ITensorHandle*>{&asyncOutput};
189 
190     ExecutionData executionData;
191     executionData.m_Data = &workingMemDescriptor;
192 
193     workload1.get()->ExecuteAsync(executionData);
194 
195     // workload1 has no AsyncExecute implementation and so should use the default workload AsyncExecute
196     // implementation which will call  workload1.Execute() in a thread safe manner
197     ValidateTensor(workingMemDescriptor.m_Outputs[0], expectedExecuteval);
198     ValidateTensor(workingMemDescriptor.m_Inputs[0], expectedExecuteval);
199 }
200 
201 TEST_CASE("TestDefaultAsyncExeuteWithThreads")
202 {
203     // Use a large vector so the threads have a chance to interact
204     unsigned int vecSize = 1000;
205     TensorInfo info({vecSize}, DataType::Signed32, 0.0f, 0, true);
206 
207     std::vector<int> inVals1(vecSize, 2);
208     std::vector<int> outVals1(vecSize, 1);
209     std::vector<int> inVals2(vecSize, 5);
210     std::vector<int> outVals2(vecSize, -1);
211 
212     std::vector<int> defaultVals(vecSize, 0);
213 
214     int expectedExecuteval1 = 4;
215     int expectedExecuteval2 = 25;
216     ConstTensor constInputTensor1(info, inVals1);
217     ConstTensor constOutputTensor1(info, outVals1);
218 
219     ConstTensor constInputTensor2(info, inVals2);
220     ConstTensor constOutputTensor2(info, outVals2);
221 
222     ConstTensor defaultTensor(info, defaultVals.data());
223 
224     ScopedTensorHandle defaultInput = ScopedTensorHandle(defaultTensor);
225     ScopedTensorHandle defaultOutput = ScopedTensorHandle(defaultTensor);
226     std::unique_ptr<Workload1> workload = CreateWorkload<Workload1>(info, &defaultInput, &defaultOutput);
227 
228     ScopedTensorHandle asyncInput1(constInputTensor1);
229     ScopedTensorHandle asyncOutput1(constOutputTensor1);
230 
231     WorkingMemDescriptor workingMemDescriptor1;
232     workingMemDescriptor1.m_Inputs = std::vector<ITensorHandle*>{&asyncInput1};
233     workingMemDescriptor1.m_Outputs = std::vector<ITensorHandle*>{&asyncOutput1};
234 
235     ExecutionData executionData1;
236     executionData1.m_Data = &workingMemDescriptor1;
237 
238     ScopedTensorHandle asyncInput2(constInputTensor2);
239     ScopedTensorHandle asyncOutput2(constOutputTensor2);
240 
241     WorkingMemDescriptor workingMemDescriptor2;
242     workingMemDescriptor2.m_Inputs = std::vector<ITensorHandle*>{&asyncInput2};
243     workingMemDescriptor2.m_Outputs = std::vector<ITensorHandle*>{&asyncOutput2};
244 
245     ExecutionData executionData2;
246     executionData2.m_Data = &workingMemDescriptor2;
247 
248     std::thread thread1 = std::thread([&]()
__anone931cbbc0202() 249                                       {
250                                           workload.get()->ExecuteAsync(executionData1);
251                                           workload.get()->ExecuteAsync(executionData1);
252                                       });
253 
254     std::thread thread2 = std::thread([&]()
__anone931cbbc0302() 255                                       {
256                                           workload.get()->ExecuteAsync(executionData2);
257                                           workload.get()->ExecuteAsync(executionData2);
258                                       });
259 
260     thread1.join();
261     thread2.join();
262 
263     ValidateTensor(workingMemDescriptor1.m_Outputs[0], expectedExecuteval1);
264     ValidateTensor(workingMemDescriptor1.m_Inputs[0], expectedExecuteval1);
265 
266     ValidateTensor(workingMemDescriptor2.m_Outputs[0], expectedExecuteval2);
267     ValidateTensor(workingMemDescriptor2.m_Inputs[0], expectedExecuteval2);
268 }
269 
270 }
271 
272 }
273