xref: /aosp_15_r20/external/armnn/src/backends/neon/test/NeonTensorHandleTests.cpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #include <Graph.hpp>
6 #include <Network.hpp>
7 
8 #include <neon/NeonTensorHandle.hpp>
9 #include <neon/NeonTensorHandleFactory.hpp>
10 
11 #include <armnn/utility/NumericCast.hpp>
12 #include <armnn/utility/PolymorphicDowncast.hpp>
13 
14 #include <GraphUtils.hpp>
15 #include <arm_compute/runtime/Allocator.h>
16 #include <CommonTestUtils.hpp>
17 
18 #include <doctest/doctest.h>
19 #include <armnn/utility/Assert.hpp>
20 
21 TEST_SUITE("NeonTensorHandleTests")
22 {
23 using namespace armnn;
24 
25 TEST_CASE("NeonTensorHandleGetCapabilitiesNoPadding")
26 {
27     std::shared_ptr<NeonMemoryManager> memoryManager = std::make_shared<NeonMemoryManager>();
28     NeonTensorHandleFactory handleFactory(memoryManager);
29 
30     INetworkPtr network(INetwork::Create());
31 
32     // Add the layers
33     IConnectableLayer* input = network->AddInputLayer(0);
34     SoftmaxDescriptor descriptor;
35     descriptor.m_Beta = 1.0f;
36     IConnectableLayer* softmax = network->AddSoftmaxLayer(descriptor);
37     IConnectableLayer* output = network->AddOutputLayer(2);
38 
39     // Establish connections
40     input->GetOutputSlot(0).Connect(softmax->GetInputSlot(0));
41     softmax->GetOutputSlot(0).Connect(output->GetInputSlot(0));
42 
43     // No padding required for input
44     std::vector<Capability> capabilities = handleFactory.GetCapabilities(input,
45                                                                          softmax,
46                                                                          CapabilityClass::PaddingRequired);
47     CHECK(capabilities.empty());
48 
49     // No padding required for Softmax
50     capabilities = handleFactory.GetCapabilities(softmax, output, CapabilityClass::PaddingRequired);
51     CHECK(capabilities.empty());
52 
53     // No padding required for output
54     capabilities = handleFactory.GetCapabilities(output, nullptr, CapabilityClass::PaddingRequired);
55     CHECK(capabilities.empty());
56 }
57 
58 TEST_CASE("NeonTensorHandleGetCapabilitiesPadding")
59 {
60     std::shared_ptr<NeonMemoryManager> memoryManager = std::make_shared<NeonMemoryManager>();
61     NeonTensorHandleFactory handleFactory(memoryManager);
62 
63     INetworkPtr network(INetwork::Create());
64 
65     // Add the layers
66     IConnectableLayer* input = network->AddInputLayer(0);
67     Pooling2dDescriptor descriptor;
68     IConnectableLayer* pooling = network->AddPooling2dLayer(descriptor);
69     IConnectableLayer* output = network->AddOutputLayer(2);
70 
71     // Establish connections
72     input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
73     pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
74 
75     // No padding required for input
76     std::vector<Capability> capabilities = handleFactory.GetCapabilities(input,
77                                                                          pooling,
78                                                                          CapabilityClass::PaddingRequired);
79     CHECK(capabilities.empty());
80 
81     // No padding required for output
82     capabilities = handleFactory.GetCapabilities(output, nullptr, CapabilityClass::PaddingRequired);
83     CHECK(capabilities.empty());
84 
85     // Padding required for Pooling2d
86     capabilities = handleFactory.GetCapabilities(pooling, output, CapabilityClass::PaddingRequired);
87     CHECK(capabilities.size() == 1);
88     CHECK((capabilities[0].m_CapabilityClass == CapabilityClass::PaddingRequired));
89     CHECK(capabilities[0].m_Value);
90 }
91 
92 TEST_CASE("ConcatOnXorYSubTensorsNoPaddingRequiredTest")
93 {
94     armnn::INetworkPtr net(armnn::INetwork::Create());
95 
96     // Set up tensor infos
97     const armnn::TensorInfo inputInfo = armnn::TensorInfo({2, 3, 2, 2}, armnn::DataType::Float32);
98     const armnn::TensorInfo intermediateInfo = armnn::TensorInfo({2, 3, 2, 2}, armnn::DataType::Float32);
99     const armnn::TensorInfo outputInfo = armnn::TensorInfo({2, 3, 4, 2}, armnn::DataType::Float32);
100 
101     armnn::ElementwiseUnaryDescriptor descriptor(armnn::UnaryOperation::Abs);
102 
103     // Create the network
104     armnn::IConnectableLayer* const input0Layer = net->AddInputLayer(0, "input_0");
105     input0Layer->GetOutputSlot(0).SetTensorInfo(inputInfo);
106     armnn::IConnectableLayer* elementwiseUnaryLayer0 = net->AddElementwiseUnaryLayer(descriptor, "elementwiseUnary_0");
107     elementwiseUnaryLayer0->GetOutputSlot(0).SetTensorInfo(intermediateInfo);
108     input0Layer->GetOutputSlot(0).Connect(elementwiseUnaryLayer0->GetInputSlot(0));
109 
110     armnn::IConnectableLayer* const input1Layer = net->AddInputLayer(1, "input_1");
111     input1Layer->GetOutputSlot(0).SetTensorInfo(inputInfo);
112     armnn::IConnectableLayer* elementwiseUnaryLayer1 = net->AddElementwiseUnaryLayer(descriptor, "elementwiseUnary_1");
113     elementwiseUnaryLayer1->GetOutputSlot(0).SetTensorInfo(intermediateInfo);
114     input1Layer->GetOutputSlot(0).Connect(elementwiseUnaryLayer1->GetInputSlot(0));
115 
116     std::array<armnn::TensorShape, 2> concatInputShapes = { intermediateInfo.GetShape(), intermediateInfo.GetShape() };
117     armnn::IConnectableLayer* const concatLayer = net->AddConcatLayer(armnn::CreateDescriptorForConcatenation(
118         concatInputShapes.begin(), concatInputShapes.end(), 2), "concatenation");
119     concatLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
120     elementwiseUnaryLayer0->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(0));
121     elementwiseUnaryLayer1->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(1));
122 
123     armnn::IConnectableLayer* const outputLayer = net->AddOutputLayer(0, "output");
124     concatLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
125 
126     armnn::IRuntime::CreationOptions options;
127     armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
128 
129     std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
130     armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
131 
132     const armnn::Graph& theGraph = GetGraphForTesting(optimizedNet.get());
133 
134     // Load graph into runtime
135     armnn::NetworkId networkIdentifier;
136     runtime->LoadNetwork(networkIdentifier, std::move(optimizedNet));
137 
138     // now check the concat how many sub-tensors it is using..
139     auto TraceSubTensorHandleAncestry = [](armnn::ITensorHandle* const subTensorHandle)
__anon702f9fdc0102(armnn::ITensorHandle* const subTensorHandle) 140     {
141         if (subTensorHandle && subTensorHandle->GetParent())
142         {
143             return true;
144         }
145         return false;
146     };
147 
148     for (auto&& layer : theGraph)
149     {
150         if(layer->GetType() == armnn::LayerType::Concat)
151         {
152             unsigned int numberOfSubTensors = 0;
153             for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
154             {
155                 const armnn::OutputSlot* slot = layer->GetInputSlot(i).GetConnectedOutputSlot();
156                 if (TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData()))
157                 {
158                     ++numberOfSubTensors;
159                 }
160             }
161             // sub-tensors should be supported in this configuration
162             ARMNN_ASSERT(numberOfSubTensors > 0);
163         }
164     }
165 }
166 
167 TEST_CASE("ConcatonXorYPaddingRequiredTest")
168 {
169     armnn::INetworkPtr net(armnn::INetwork::Create());
170 
171     // Set up tensor infos
172     const armnn::TensorInfo inputInfo = armnn::TensorInfo({2, 3, 2, 2}, armnn::DataType::Float32);
173     const armnn::TensorInfo intermediateInfo = armnn::TensorInfo({2, 3, 2, 2}, armnn::DataType::Float32);
174     const armnn::TensorInfo outputInfo = armnn::TensorInfo({2, 3, 4, 2}, armnn::DataType::Float32);
175 
176     armnn::Pooling2dDescriptor descriptor;
177     descriptor.m_PoolType = armnn::PoolingAlgorithm::Average;
178     descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3;
179     descriptor.m_StrideX = descriptor.m_StrideY = 1;
180     descriptor.m_PadLeft = 1;
181     descriptor.m_PadRight = 1;
182     descriptor.m_PadTop = 1;
183     descriptor.m_PadBottom = 1;
184     descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue;
185 
186     // Create the network
187     armnn::IConnectableLayer* const input0Layer = net->AddInputLayer(0, "input_0");
188     input0Layer->GetOutputSlot(0).SetTensorInfo(inputInfo);
189     armnn::IConnectableLayer* pooling2dLayer0 = net->AddPooling2dLayer(descriptor, "pooling2d_0");
190     pooling2dLayer0->GetOutputSlot(0).SetTensorInfo(intermediateInfo);
191     input0Layer->GetOutputSlot(0).Connect(pooling2dLayer0->GetInputSlot(0));
192 
193     armnn::IConnectableLayer* const input1Layer = net->AddInputLayer(1, "input_1");
194     input1Layer->GetOutputSlot(0).SetTensorInfo(inputInfo);
195     armnn::IConnectableLayer* pooling2dLayer1 = net->AddPooling2dLayer(descriptor, "pooling2d_1");
196     pooling2dLayer1->GetOutputSlot(0).SetTensorInfo(intermediateInfo);
197     input1Layer->GetOutputSlot(0).Connect(pooling2dLayer1->GetInputSlot(0));
198 
199     std::array<armnn::TensorShape, 2> concatInputShapes = { intermediateInfo.GetShape(), intermediateInfo.GetShape() };
200     armnn::IConnectableLayer* const concatLayer = net->AddConcatLayer(armnn::CreateDescriptorForConcatenation(
201         concatInputShapes.begin(), concatInputShapes.end(), 2), "concatenation");
202     concatLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
203     pooling2dLayer0->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(0));
204     pooling2dLayer1->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(1));
205 
206     armnn::IConnectableLayer* const outputLayer = net->AddOutputLayer(0, "output");
207     concatLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
208 
209     armnn::IRuntime::CreationOptions options;
210     armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
211 
212     std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
213     armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
214 
215     const armnn::Graph& theGraph = GetGraphForTesting(optimizedNet.get());
216 
217     // Load graph into runtime
218     armnn::NetworkId networkIdentifier;
219     runtime->LoadNetwork(networkIdentifier, std::move(optimizedNet));
220 
221     // now check the concat how many sub-tensors it is using..
222     auto TraceSubTensorHandleAncestry = [](armnn::ITensorHandle* const subTensorHandle)
__anon702f9fdc0202(armnn::ITensorHandle* const subTensorHandle) 223     {
224         if (subTensorHandle && subTensorHandle->GetParent())
225         {
226             return true;
227         }
228         return false;
229     };
230 
231     unsigned int numberOfSubTensors = 0;
232     for (auto&& layer : theGraph)
233     {
234         if(layer->GetType() == armnn::LayerType::Concat)
235         {
236             for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
237             {
238                 const armnn::OutputSlot* slot = layer->GetInputSlot(i).GetConnectedOutputSlot();
239                 if (TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData()))
240                 {
241                     ++numberOfSubTensors;
242                 }
243             }
244         }
245     }
246     // sub-tensors should not be supported in this configuration
247     ARMNN_ASSERT(numberOfSubTensors == 0);
248 }
249 
250 TEST_CASE("SplitteronXorYNoPaddingRequiredTest")
251 {
252     using namespace armnn;
253 
254     unsigned int splitAxis = 2;
255     unsigned int numSplit = 2;
256 
257     const TensorShape& inputShape = { 2, 3, 4, 2 };
258     const armnn::TensorInfo intermediateInfo = armnn::TensorInfo({ 2, 3, 2, 2 }, armnn::DataType::Float32);
259     const std::vector<TensorShape> outputShapes{{ 2, 3, 2, 2 },
260                                                 { 2, 3, 2, 2 }};
261     const float qScale = 1.0f;
262     const int32_t qOffset = 0;
263 
264     // Creates structures for input & output.
265     std::vector<float> inputData{
266             1, 2,
267             3, 4,
268             5, 6,
269             7, 8,
270             9, 10,
271             11, 12,
272             13, 14,
273             15, 16,
274             17, 18,
275             19, 20,
276             21, 22,
277             23, 24,
278             25, 26,
279             27, 28,
280             29, 30,
281             31, 32,
282             33, 34,
283             35, 36,
284             37, 38,
285             39, 40,
286             41, 42,
287             43, 44,
288             45, 46,
289             47, 48
290     };
291 
292     std::vector<float> expectedOutput0{
293             1, 2,
294             3, 4,
295             9, 10,
296             11, 12,
297             17, 18,
298             19, 20,
299             25, 26,
300             27, 28,
301             33, 34,
302             35, 36,
303             41, 42,
304             43, 44
305     };
306 
307     std::vector<float> expectedOutput1{
308             5, 6,
309             7, 8,
310             13, 14,
311             15, 16,
312             21, 22,
313             23, 24,
314             29, 30,
315             31, 32,
316             37, 38,
317             39, 40,
318             45, 46,
319             47, 48
320     };
321 
322     // Builds up the structure of the network.
323     INetworkPtr net(INetwork::Create());
324 
325     TensorInfo inputTensorInfo(inputShape, armnn::DataType::Float32, qScale, qOffset);
326 
327     armnn::ElementwiseUnaryDescriptor descriptor(armnn::UnaryOperation::Abs);
328 
329     // Splitter
330     std::vector<unsigned int> splitterDimSizes(inputShape.GetNumDimensions());
331 
332     // Add current input shape to splitterDimSizes
333     for (unsigned int i = 0; i < inputShape.GetNumDimensions(); ++i)
334     {
335         splitterDimSizes[i] = inputTensorInfo.GetShape()[i];
336     }
337 
338     if (splitterDimSizes[splitAxis] % numSplit != 0)
339     {
340         throw ParseException("Number of splits must evenly divide the dimension");
341     }
342 
343     splitterDimSizes[splitAxis] /= numSplit;
344 
345     SplitterDescriptor splitDesc(numSplit, inputShape.GetNumDimensions());
346 
347     for (unsigned int g = 0; g < numSplit; ++g)
348     {
349         // Set the size of the views.
350         for (unsigned int dimIdx = 0; dimIdx < splitterDimSizes.size(); ++dimIdx)
351         {
352             splitDesc.SetViewSize(g, dimIdx, splitterDimSizes[dimIdx]);
353         }
354         splitDesc.SetViewOriginCoord(g, splitAxis, splitterDimSizes[splitAxis] * g);
355     }
356     IConnectableLayer* input = net->AddInputLayer(0, "input");
357     IConnectableLayer* elementWiseUnary0 = net->AddElementwiseUnaryLayer(descriptor, "elementwiseunary_0");
358     IConnectableLayer* elementWiseUnary1 = net->AddElementwiseUnaryLayer(descriptor, "elementwiseunary_0");
359     IConnectableLayer* splitter = net->AddSplitterLayer(splitDesc, "splitter");
360 
361     // Connections
362     Connect(input, splitter, inputTensorInfo, 0, 0);
363     Connect(splitter, elementWiseUnary0, intermediateInfo, 0, 0);
364     Connect(splitter, elementWiseUnary1, intermediateInfo, 1, 0);
365 
366     std::vector<IConnectableLayer*> pooling2dLayers{elementWiseUnary0, elementWiseUnary1};
367 
368     for (unsigned int i = 0; i < outputShapes.size(); ++i)
369     {
370         TensorInfo outputTensorInfo(outputShapes[i], armnn::DataType::Float32, qScale, qOffset);
371         IConnectableLayer* output = net->AddOutputLayer(armnn::numeric_cast<LayerBindingId>(i));
372         Connect(pooling2dLayers[i], output, outputTensorInfo, 0, 0);
373     }
374 
375     std::map<int, std::vector<float>> inputTensorData = {{ 0,inputData }};
376     std::map<int, std::vector<float>> expectedOutputData = {{ 0, expectedOutput0 }, { 1, expectedOutput1 }};
377 
378     armnn::IRuntime::CreationOptions options;
379     armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
380 
381     std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
382     armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
383 
384     const armnn::Graph& theGraph = GetGraphForTesting(optimizedNet.get());
385 
386     // Load graph into runtime
387     armnn::NetworkId networkIdentifier;
388     runtime->LoadNetwork(networkIdentifier, std::move(optimizedNet));
389 
390     // now check the concat how many sub-tensors it is using..
391     auto TraceSubTensorHandleAncestry = [](armnn::ITensorHandle* const subTensorHandle)
__anon702f9fdc0302(armnn::ITensorHandle* const subTensorHandle) 392     {
393         if (subTensorHandle && subTensorHandle->GetParent())
394         {
395             return true;
396         }
397         return false;
398     };
399 
400     for (auto&& layer : theGraph)
401     {
402         if(layer->GetType() == armnn::LayerType::ElementwiseUnary)
403         {
404             unsigned int numberOfSubTensors = 0;
405             for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
406             {
407                 const armnn::OutputSlot* slot = layer->GetInputSlot(i).GetConnectedOutputSlot();
408                 if (TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData()))
409                 {
410                     ++numberOfSubTensors;
411                 }
412             }
413             // sub-tensors should be supported in this configuration
414             ARMNN_ASSERT(numberOfSubTensors > 0);
415         }
416     }
417 
418     InputTensors inputTensors;
419     inputTensors.reserve(inputTensorData.size());
420     for (auto&& it : inputTensorData)
421     {
422         TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(networkIdentifier, it.first);
423         inputTensorInfo.SetConstant(true);
424         inputTensors.push_back({it.first,
425                                 ConstTensor(inputTensorInfo, it.second.data())});
426     }
427     OutputTensors outputTensors;
428     outputTensors.reserve(expectedOutputData.size());
429     std::map<int, std::vector<float>> outputStorage;
430     for (auto&& it : expectedOutputData)
431     {
432         std::vector<float> out(it.second.size());
433         outputStorage.emplace(it.first, out);
434         outputTensors.push_back({it.first,
435                                  Tensor(runtime->GetOutputTensorInfo(networkIdentifier, it.first),
436                                                outputStorage.at(it.first).data())});
437     }
438 
439     // Does the inference.
440     runtime->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors);
441 
442     // Checks the results.
443     float tolerance = 0.000001f;
444     for (auto&& it : expectedOutputData)
445     {
446         std::vector<float> out = outputStorage.at(it.first);
447         for (unsigned int i = 0; i < out.size(); ++i)
448         {
449             CHECK_MESSAGE(Compare<armnn::DataType::Float32>(it.second[i], out[i], tolerance) == true,
450                     "Actual output: " << out[i] << ". Expected output:" << it.second[i]);
451 
452         }
453     }
454 }
455 
456 TEST_CASE("SplitteronXorYPaddingRequiredTest")
457 {
458     using namespace armnn;
459 
460     unsigned int splitAxis = 2;
461     unsigned int numSplit = 2;
462 
463     const TensorShape& inputShape = { 1, 1, 4, 4 };
464     const armnn::TensorInfo intermediateInfo = armnn::TensorInfo({ 1, 1, 2, 4 }, armnn::DataType::Float32);
465     const std::vector<TensorShape> outputShapes{{ 1, 1, 2, 4 },
466                                                 { 1, 1, 2, 4 }};
467 
468     const float qScale = 1.0f;
469     const int32_t qOffset = 0;
470 
471     // Creates structures for input & output.
472     std::vector<float> inputData{
473         9.0f,   27.0f,  18.0f,  36.0f,
474         18.0f,   9.0f,  18.0f,   9.0f,
475         27.0f,  18.0f,   9.0f,  27.0f,
476         9.0f,   27.0f,   9.0f,  18.0f,
477     };
478 
479     std::vector<float> expectedOutput0{
480          7.0f,  11.0f,  13.0f, 9.0f,
481          7.0f,  11.0f,  13.0f, 9.0f
482     };
483 
484     std::vector<float> expectedOutput1{
485         9.0f,  11.0f,  12.0f, 7.0f,
486         9.0f,  11.0f,  12.0f, 7.0f
487     };
488 
489     // Builds up the structure of the network.
490     INetworkPtr net(INetwork::Create());
491 
492     TensorInfo inputTensorInfo(inputShape, armnn::DataType::Float32, qScale, qOffset);
493 
494     // Pooling
495     armnn::Pooling2dDescriptor descriptor;
496     descriptor.m_PoolType = armnn::PoolingAlgorithm::Average;
497     descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3;
498     descriptor.m_StrideX = descriptor.m_StrideY = 1;
499     descriptor.m_PadLeft = 1;
500     descriptor.m_PadRight = 1;
501     descriptor.m_PadTop = 1;
502     descriptor.m_PadBottom = 1;
503     descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue;
504 
505     // Splitter
506     std::vector<unsigned int> splitterDimSizes(inputShape.GetNumDimensions());
507 
508     // Add current input shape to splitterDimSizes
509     for (unsigned int i = 0; i < inputShape.GetNumDimensions(); ++i)
510     {
511         splitterDimSizes[i] = inputTensorInfo.GetShape()[i];
512     }
513 
514     if (splitterDimSizes[splitAxis] % numSplit != 0)
515     {
516         throw ParseException("Number of splits must evenly divide the dimension");
517     }
518 
519     splitterDimSizes[splitAxis] /= numSplit;
520 
521     SplitterDescriptor splitDesc(numSplit, inputShape.GetNumDimensions());
522 
523     for (unsigned int g = 0; g < numSplit; ++g)
524     {
525         // Set the size of the views.
526         for (unsigned int dimIdx = 0; dimIdx < splitterDimSizes.size(); ++dimIdx)
527         {
528             splitDesc.SetViewSize(g, dimIdx, splitterDimSizes[dimIdx]);
529         }
530         splitDesc.SetViewOriginCoord(g, splitAxis, splitterDimSizes[splitAxis] * g);
531     }
532 
533     IConnectableLayer* input = net->AddInputLayer(0, "input");
534     IConnectableLayer* pooling2d0 = net->AddPooling2dLayer(descriptor, "pooling2d_0");
535     IConnectableLayer* pooling2d1 = net->AddPooling2dLayer(descriptor, "pooling2d_1");
536     IConnectableLayer* splitter = net->AddSplitterLayer(splitDesc, "splitter");
537 
538     // Connections
539     Connect(input, splitter, inputTensorInfo, 0, 0);
540     Connect(splitter, pooling2d0, intermediateInfo, 0, 0);
541     Connect(splitter, pooling2d1, intermediateInfo, 1, 0);
542 
543     std::vector<IConnectableLayer*> pooling2dLayers{pooling2d0, pooling2d1};
544 
545     for (unsigned int i = 0; i < outputShapes.size(); ++i)
546     {
547         TensorInfo outputTensorInfo(outputShapes[i], armnn::DataType::Float32, qScale, qOffset);
548         IConnectableLayer* output = net->AddOutputLayer(armnn::numeric_cast<LayerBindingId>(i));
549         Connect(pooling2dLayers[i], output, outputTensorInfo, 0, 0);
550     }
551 
552     std::map<int, std::vector<float>> inputTensorData = {{ 0,inputData }};
553     std::map<int, std::vector<float>> expectedOutputData = {{ 0, expectedOutput0 }, { 1, expectedOutput1 }};
554 
555     armnn::IRuntime::CreationOptions options;
556     armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
557 
558     std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
559     armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
560 
561     const armnn::Graph& theGraph = GetGraphForTesting(optimizedNet.get());
562 
563     // Load graph into runtime
564     armnn::NetworkId networkIdentifier;
565     runtime->LoadNetwork(networkIdentifier, std::move(optimizedNet));
566 
567     // now check the concat how many sub-tensors it is using..
568     auto TraceSubTensorHandleAncestry = [](armnn::ITensorHandle* const subTensorHandle)
__anon702f9fdc0402(armnn::ITensorHandle* const subTensorHandle) 569     {
570         if (subTensorHandle && subTensorHandle->GetParent())
571         {
572             return true;
573         }
574         return false;
575     };
576 
577     for (auto&& layer : theGraph)
578     {
579         if(layer->GetType() == armnn::LayerType::Pooling2d)
580         {
581             unsigned int numberOfSubTensors = 0;
582             for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
583             {
584                 const armnn::OutputSlot* slot = layer->GetInputSlot(i).GetConnectedOutputSlot();
585                 if (TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData()))
586                 {
587                     ++numberOfSubTensors;
588                 }
589             }
590             // sub-tensors should be supported in this configuration
591             ARMNN_ASSERT(numberOfSubTensors == 0);
592         }
593     }
594 
595     InputTensors inputTensors;
596     inputTensors.reserve(inputTensorData.size());
597     for (auto&& it : inputTensorData)
598     {
599         TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(networkIdentifier, it.first);
600         inputTensorInfo.SetConstant(true);
601         inputTensors.push_back({it.first,
602                                 ConstTensor(inputTensorInfo, it.second.data())});
603     }
604     OutputTensors outputTensors;
605     outputTensors.reserve(expectedOutputData.size());
606     std::map<int, std::vector<float>> outputStorage;
607     for (auto&& it : expectedOutputData)
608     {
609         std::vector<float> out(it.second.size());
610         outputStorage.emplace(it.first, out);
611         outputTensors.push_back({it.first,
612                                  Tensor(runtime->GetOutputTensorInfo(networkIdentifier, it.first),
613                                                outputStorage.at(it.first).data())});
614     }
615 
616     // Does the inference.
617     runtime->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors);
618 
619     // Checks the results.
620     float tolerance = 0.000001f;
621     for (auto&& it : expectedOutputData)
622     {
623         std::vector<float> out = outputStorage.at(it.first);
624         for (unsigned int i = 0; i < out.size(); ++i)
625         {
626             CHECK_MESSAGE(Compare<armnn::DataType::Float32>(it.second[i], out[i], tolerance) == true,
627                     "Actual output: " << out[i] << ". Expected output:" << it.second[i]);
628 
629         }
630     }
631 }
632 
633 TEST_CASE("NeonTensorHandleFactoryMemoryManaged")
634 {
635     std::shared_ptr<NeonMemoryManager> memoryManager = std::make_shared<NeonMemoryManager>(
636         std::make_unique<arm_compute::Allocator>(),
637         BaseMemoryManager::MemoryAffinity::Offset);
638     NeonTensorHandleFactory handleFactory(memoryManager);
639     TensorInfo info({ 1, 1, 2, 1 }, DataType::Float32);
640 
641     // create TensorHandle with memory managed
642     auto handle = handleFactory.CreateTensorHandle(info, true);
643     handle->Manage();
644     handle->Allocate();
645 
646     memoryManager->Acquire();
647     {
648         float* buffer = reinterpret_cast<float*>(handle->Map());
649         CHECK(buffer != nullptr); // Yields a valid pointer
650         buffer[0] = 1.5f;
651         buffer[1] = 2.5f;
652         CHECK(buffer[0] == 1.5f); // Memory is writable and readable
653         CHECK(buffer[1] == 2.5f); // Memory is writable and readable
654     }
655     memoryManager->Release();
656 
657     memoryManager->Acquire();
658     {
659         float* buffer = reinterpret_cast<float*>(handle->Map());
660         CHECK(buffer != nullptr); // Yields a valid pointer
661         buffer[0] = 3.5f;
662         buffer[1] = 4.5f;
663         CHECK(buffer[0] == 3.5f); // Memory is writable and readable
664         CHECK(buffer[1] == 4.5f); // Memory is writable and readable
665     }
666     memoryManager->Release();
667 
668     float testPtr[2] = { 2.5f, 5.5f };
669     // Cannot import as import is disabled
670     CHECK_THROWS_AS(handle->Import(static_cast<void*>(testPtr), MemorySource::Malloc), MemoryImportException);
671 }
672 
673 TEST_CASE("NeonTensorHandleFactoryImport")
674 {
675     std::shared_ptr<NeonMemoryManager> memoryManager = std::make_shared<NeonMemoryManager>(
676         std::make_unique<arm_compute::Allocator>(),
677         BaseMemoryManager::MemoryAffinity::Offset);
678     NeonTensorHandleFactory handleFactory(memoryManager);
679     TensorInfo info({ 1, 1, 2, 1 }, DataType::Float32);
680 
681     // create TensorHandle without memory managed
682     auto handle = handleFactory.CreateTensorHandle(info, false);
683     handle->Manage();
684     handle->Allocate();
685     memoryManager->Acquire();
686 
687     // No buffer allocated when import is enabled
688     CHECK((PolymorphicDowncast<NeonTensorHandle*>(handle.get()))->GetTensor().buffer() == nullptr);
689 
690     float testPtr[2] = { 2.5f, 5.5f };
691     // Correctly import
692     CHECK(handle->Import(static_cast<void*>(testPtr), MemorySource::Malloc));
693     float* buffer = reinterpret_cast<float*>(handle->Map());
694     CHECK(buffer != nullptr); // Yields a valid pointer after import
695     CHECK(buffer == testPtr); // buffer is pointing to testPtr
696     // Memory is writable and readable with correct value
697     CHECK(buffer[0] == 2.5f);
698     CHECK(buffer[1] == 5.5f);
699     buffer[0] = 3.5f;
700     buffer[1] = 10.0f;
701     CHECK(buffer[0] == 3.5f);
702     CHECK(buffer[1] == 10.0f);
703     memoryManager->Release();
704 }
705 
706 TEST_CASE("NeonTensorHandleCanBeImported")
707 {
708     std::shared_ptr<NeonMemoryManager> memoryManager = std::make_shared<NeonMemoryManager>(
709         std::make_unique<arm_compute::Allocator>(),
710         BaseMemoryManager::MemoryAffinity::Offset);
711     NeonTensorHandleFactory handleFactory(memoryManager);
712     TensorInfo info({ 1, 1, 2, 1 }, DataType::Float32);
713 
714     // create TensorHandle (Memory Managed status is irrelevant)
715     auto handle = handleFactory.CreateTensorHandle(info, false);
716 
717     // Create an aligned buffer
718     float alignedBuffer[2] = { 2.5f, 5.5f };
719     // Check aligned buffers return true
720     CHECK(handle->CanBeImported(&alignedBuffer, MemorySource::Malloc) == true);
721 
722     // Create a misaligned buffer from the aligned one
723     float* misalignedBuffer = reinterpret_cast<float*>(reinterpret_cast<char*>(alignedBuffer) + 1);
724     // Check misaligned buffers return false
725     CHECK(handle->CanBeImported(static_cast<void*>(misalignedBuffer), MemorySource::Malloc) == false);
726 }
727 
728 TEST_CASE("NeonTensorHandleSupportsInPlaceComputation")
729 {
730     std::shared_ptr<NeonMemoryManager> memoryManager = std::make_shared<NeonMemoryManager>();
731     NeonTensorHandleFactory handleFactory(memoryManager);
732 
733     // NeonTensorHandleFactory supports InPlaceComputation
734     ARMNN_ASSERT(handleFactory.SupportsInPlaceComputation());
735 }
736 
737 }
738