1 //
2 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include <armnn/Exceptions.hpp>
7
8 #include <armnn/backends/TensorHandle.hpp>
9 #include <armnn/backends/Workload.hpp>
10
11 #include <doctest/doctest.h>
12
13 #include <thread>
14
15 using namespace armnn;
16
17
18 namespace
19 {
20
21 TEST_SUITE("WorkloadAsyncExecuteTests")
22 {
23
24 struct Workload0 : BaseWorkload<ElementwiseUnaryQueueDescriptor>
25 {
Workload0__anone931cbbc0111::Workload026 Workload0(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info)
27 : BaseWorkload(descriptor, info)
28 {
29 }
30
Workload0__anone931cbbc0111::Workload031 Workload0() : BaseWorkload(ElementwiseUnaryQueueDescriptor(), WorkloadInfo())
32 {
33 }
34
Execute__anone931cbbc0111::Workload035 void Execute() const
36 {
37 int* inVals = static_cast<int*>(m_Data.m_Inputs[0][0].Map());
38 int* outVals = static_cast<int*>(m_Data.m_Outputs[0][0].Map());
39
40 for (unsigned int i = 0;
41 i < m_Data.m_Inputs[0][0].GetShape().GetNumElements();
42 ++i)
43 {
44 outVals[i] = inVals[i] * outVals[i];
45 inVals[i] = outVals[i];
46 }
47 }
48
ExecuteAsync__anone931cbbc0111::Workload049 void ExecuteAsync(ExecutionData& executionData)
50 {
51 WorkingMemDescriptor* workingMemDescriptor = static_cast<WorkingMemDescriptor*>(executionData.m_Data);
52 int* inVals = static_cast<int*>(workingMemDescriptor->m_Inputs[0][0].Map());
53 int* outVals = static_cast<int*>(workingMemDescriptor->m_Outputs[0][0].Map());
54
55 for (unsigned int i = 0;
56 i < workingMemDescriptor->m_Inputs[0][0].GetShape().GetNumElements();
57 ++i)
58 {
59 outVals[i] = inVals[i] + outVals[i];
60 inVals[i] = outVals[i];
61 }
62 }
63
GetQueueDescriptor__anone931cbbc0111::Workload064 QueueDescriptor* GetQueueDescriptor()
65 {
66 return &m_Data;
67 }
68 };
69
70 struct Workload1 : BaseWorkload<ElementwiseUnaryQueueDescriptor>
71 {
Workload1__anone931cbbc0111::Workload172 Workload1(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info)
73 : BaseWorkload(descriptor, info)
74 {
75 }
76
Execute__anone931cbbc0111::Workload177 void Execute() const
78 {
79 int* inVals = static_cast<int*>(m_Data.m_Inputs[0][0].Map());
80 int* outVals = static_cast<int*>(m_Data.m_Outputs[0][0].Map());
81
82 for (unsigned int i = 0;
83 i < m_Data.m_Inputs[0][0].GetShape().GetNumElements();
84 ++i)
85 {
86 outVals[i] = inVals[i] * outVals[i];
87 inVals[i] = outVals[i];
88 }
89 }
90 };
91
ValidateTensor(ITensorHandle * tensorHandle,int expectedValue)92 void ValidateTensor(ITensorHandle* tensorHandle, int expectedValue)
93 {
94 int* actualOutput = static_cast<int*>(tensorHandle->Map());
95
96 bool allValuesCorrect = true;
97 for (unsigned int i = 0;
98 i < tensorHandle->GetShape().GetNumElements();
99 ++i)
100 {
101 if (actualOutput[i] != expectedValue)
102 {
103 allValuesCorrect = false;
104 }
105 }
106
107 CHECK(allValuesCorrect);
108 }
109
110 template<typename Workload>
CreateWorkload(TensorInfo info,ITensorHandle * inputTensor,ITensorHandle * outputTensor)111 std::unique_ptr<Workload> CreateWorkload(TensorInfo info, ITensorHandle* inputTensor, ITensorHandle* outputTensor)
112 {
113 WorkloadInfo workloadInfo;
114 workloadInfo.m_InputTensorInfos = std::vector<TensorInfo>{info};
115 workloadInfo.m_OutputTensorInfos = std::vector<TensorInfo>{info};
116
117 ElementwiseUnaryQueueDescriptor elementwiseUnaryQueueDescriptor;
118 elementwiseUnaryQueueDescriptor.m_Inputs = std::vector<ITensorHandle*>{inputTensor};
119 elementwiseUnaryQueueDescriptor.m_Outputs = std::vector<ITensorHandle*>{outputTensor};
120
121 return std::make_unique<Workload>(elementwiseUnaryQueueDescriptor, workloadInfo);
122 }
123
124 TEST_CASE("TestAsyncExecute")
125 {
126 TensorInfo info({5}, DataType::Signed32, 0.0, 0, true);
127
128 int inVals[5]{2, 2, 2, 2, 2};
129 int outVals[5]{1, 1, 1, 1, 1};
130
131 int expectedExecuteval = 2;
132 int expectedExecuteAsyncval = 3;
133
134 ConstTensor constInputTensor(info, inVals);
135 ConstTensor constOutputTensor(info, outVals);
136
137 ScopedTensorHandle syncInput0(constInputTensor);
138 ScopedTensorHandle syncOutput0(constOutputTensor);
139
140 std::unique_ptr<Workload0> workload0 = CreateWorkload<Workload0>(info, &syncInput0, &syncOutput0);
141
142 workload0.get()->Execute();
143
144 ScopedTensorHandle asyncInput0(constInputTensor);
145 ScopedTensorHandle asyncOutput0(constOutputTensor);
146
147 WorkingMemDescriptor workingMemDescriptor0;
148 workingMemDescriptor0.m_Inputs = std::vector<ITensorHandle*>{&asyncInput0};
149 workingMemDescriptor0.m_Outputs = std::vector<ITensorHandle*>{&asyncOutput0};
150
151 ExecutionData executionData;
152 executionData.m_Data = &workingMemDescriptor0;
153
154 workload0.get()->ExecuteAsync(executionData);
155
156 // Inputs are also changed by the execute/executeAsync calls to make sure there is no interference with them
157 ValidateTensor(workingMemDescriptor0.m_Outputs[0], expectedExecuteAsyncval);
158 ValidateTensor(workingMemDescriptor0.m_Inputs[0], expectedExecuteAsyncval);
159
160 ValidateTensor(&workload0.get()->GetQueueDescriptor()->m_Outputs[0][0], expectedExecuteval);
161 ValidateTensor(&workload0.get()->GetQueueDescriptor()->m_Inputs[0][0], expectedExecuteval);
162 }
163
164 TEST_CASE("TestDefaultAsyncExecute")
165 {
166 TensorInfo info({5}, DataType::Signed32, 0.0f, 0, true);
167
168 std::vector<int> inVals{2, 2, 2, 2, 2};
169 std::vector<int> outVals{1, 1, 1, 1, 1};
170 std::vector<int> defaultVals{0, 0, 0, 0, 0};
171
172 int expectedExecuteval = 2;
173
174 ConstTensor constInputTensor(info, inVals);
175 ConstTensor constOutputTensor(info, outVals);
176 ConstTensor defaultTensor(info, &defaultVals);
177
178 ScopedTensorHandle defaultInput = ScopedTensorHandle(defaultTensor);
179 ScopedTensorHandle defaultOutput = ScopedTensorHandle(defaultTensor);
180
181 std::unique_ptr<Workload1> workload1 = CreateWorkload<Workload1>(info, &defaultInput, &defaultOutput);
182
183 ScopedTensorHandle asyncInput(constInputTensor);
184 ScopedTensorHandle asyncOutput(constOutputTensor);
185
186 WorkingMemDescriptor workingMemDescriptor;
187 workingMemDescriptor.m_Inputs = std::vector<ITensorHandle*>{&asyncInput};
188 workingMemDescriptor.m_Outputs = std::vector<ITensorHandle*>{&asyncOutput};
189
190 ExecutionData executionData;
191 executionData.m_Data = &workingMemDescriptor;
192
193 workload1.get()->ExecuteAsync(executionData);
194
195 // workload1 has no AsyncExecute implementation and so should use the default workload AsyncExecute
196 // implementation which will call workload1.Execute() in a thread safe manner
197 ValidateTensor(workingMemDescriptor.m_Outputs[0], expectedExecuteval);
198 ValidateTensor(workingMemDescriptor.m_Inputs[0], expectedExecuteval);
199 }
200
201 TEST_CASE("TestDefaultAsyncExeuteWithThreads")
202 {
203 // Use a large vector so the threads have a chance to interact
204 unsigned int vecSize = 1000;
205 TensorInfo info({vecSize}, DataType::Signed32, 0.0f, 0, true);
206
207 std::vector<int> inVals1(vecSize, 2);
208 std::vector<int> outVals1(vecSize, 1);
209 std::vector<int> inVals2(vecSize, 5);
210 std::vector<int> outVals2(vecSize, -1);
211
212 std::vector<int> defaultVals(vecSize, 0);
213
214 int expectedExecuteval1 = 4;
215 int expectedExecuteval2 = 25;
216 ConstTensor constInputTensor1(info, inVals1);
217 ConstTensor constOutputTensor1(info, outVals1);
218
219 ConstTensor constInputTensor2(info, inVals2);
220 ConstTensor constOutputTensor2(info, outVals2);
221
222 ConstTensor defaultTensor(info, defaultVals.data());
223
224 ScopedTensorHandle defaultInput = ScopedTensorHandle(defaultTensor);
225 ScopedTensorHandle defaultOutput = ScopedTensorHandle(defaultTensor);
226 std::unique_ptr<Workload1> workload = CreateWorkload<Workload1>(info, &defaultInput, &defaultOutput);
227
228 ScopedTensorHandle asyncInput1(constInputTensor1);
229 ScopedTensorHandle asyncOutput1(constOutputTensor1);
230
231 WorkingMemDescriptor workingMemDescriptor1;
232 workingMemDescriptor1.m_Inputs = std::vector<ITensorHandle*>{&asyncInput1};
233 workingMemDescriptor1.m_Outputs = std::vector<ITensorHandle*>{&asyncOutput1};
234
235 ExecutionData executionData1;
236 executionData1.m_Data = &workingMemDescriptor1;
237
238 ScopedTensorHandle asyncInput2(constInputTensor2);
239 ScopedTensorHandle asyncOutput2(constOutputTensor2);
240
241 WorkingMemDescriptor workingMemDescriptor2;
242 workingMemDescriptor2.m_Inputs = std::vector<ITensorHandle*>{&asyncInput2};
243 workingMemDescriptor2.m_Outputs = std::vector<ITensorHandle*>{&asyncOutput2};
244
245 ExecutionData executionData2;
246 executionData2.m_Data = &workingMemDescriptor2;
247
248 std::thread thread1 = std::thread([&]()
__anone931cbbc0202() 249 {
250 workload.get()->ExecuteAsync(executionData1);
251 workload.get()->ExecuteAsync(executionData1);
252 });
253
254 std::thread thread2 = std::thread([&]()
__anone931cbbc0302() 255 {
256 workload.get()->ExecuteAsync(executionData2);
257 workload.get()->ExecuteAsync(executionData2);
258 });
259
260 thread1.join();
261 thread2.join();
262
263 ValidateTensor(workingMemDescriptor1.m_Outputs[0], expectedExecuteval1);
264 ValidateTensor(workingMemDescriptor1.m_Inputs[0], expectedExecuteval1);
265
266 ValidateTensor(workingMemDescriptor2.m_Outputs[0], expectedExecuteval2);
267 ValidateTensor(workingMemDescriptor2.m_Inputs[0], expectedExecuteval2);
268 }
269
270 }
271
272 }
273