1 // 2 // Copyright © 2020 Arm Ltd and Contributors. All rights reserved. 3 // SPDX-License-Identifier: MIT 4 // 5 #include <Graph.hpp> 6 #include <Network.hpp> 7 8 #include <neon/NeonTensorHandle.hpp> 9 #include <neon/NeonTensorHandleFactory.hpp> 10 11 #include <armnn/utility/NumericCast.hpp> 12 #include <armnn/utility/PolymorphicDowncast.hpp> 13 14 #include <GraphUtils.hpp> 15 #include <arm_compute/runtime/Allocator.h> 16 #include <CommonTestUtils.hpp> 17 18 #include <doctest/doctest.h> 19 #include <armnn/utility/Assert.hpp> 20 21 TEST_SUITE("NeonTensorHandleTests") 22 { 23 using namespace armnn; 24 25 TEST_CASE("NeonTensorHandleGetCapabilitiesNoPadding") 26 { 27 std::shared_ptr<NeonMemoryManager> memoryManager = std::make_shared<NeonMemoryManager>(); 28 NeonTensorHandleFactory handleFactory(memoryManager); 29 30 INetworkPtr network(INetwork::Create()); 31 32 // Add the layers 33 IConnectableLayer* input = network->AddInputLayer(0); 34 SoftmaxDescriptor descriptor; 35 descriptor.m_Beta = 1.0f; 36 IConnectableLayer* softmax = network->AddSoftmaxLayer(descriptor); 37 IConnectableLayer* output = network->AddOutputLayer(2); 38 39 // Establish connections 40 input->GetOutputSlot(0).Connect(softmax->GetInputSlot(0)); 41 softmax->GetOutputSlot(0).Connect(output->GetInputSlot(0)); 42 43 // No padding required for input 44 std::vector<Capability> capabilities = handleFactory.GetCapabilities(input, 45 softmax, 46 CapabilityClass::PaddingRequired); 47 CHECK(capabilities.empty()); 48 49 // No padding required for Softmax 50 capabilities = handleFactory.GetCapabilities(softmax, output, CapabilityClass::PaddingRequired); 51 CHECK(capabilities.empty()); 52 53 // No padding required for output 54 capabilities = handleFactory.GetCapabilities(output, nullptr, CapabilityClass::PaddingRequired); 55 CHECK(capabilities.empty()); 56 } 57 58 TEST_CASE("NeonTensorHandleGetCapabilitiesPadding") 59 { 60 std::shared_ptr<NeonMemoryManager> memoryManager = std::make_shared<NeonMemoryManager>(); 61 NeonTensorHandleFactory handleFactory(memoryManager); 62 63 INetworkPtr network(INetwork::Create()); 64 65 // Add the layers 66 IConnectableLayer* input = network->AddInputLayer(0); 67 Pooling2dDescriptor descriptor; 68 IConnectableLayer* pooling = network->AddPooling2dLayer(descriptor); 69 IConnectableLayer* output = network->AddOutputLayer(2); 70 71 // Establish connections 72 input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0)); 73 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0)); 74 75 // No padding required for input 76 std::vector<Capability> capabilities = handleFactory.GetCapabilities(input, 77 pooling, 78 CapabilityClass::PaddingRequired); 79 CHECK(capabilities.empty()); 80 81 // No padding required for output 82 capabilities = handleFactory.GetCapabilities(output, nullptr, CapabilityClass::PaddingRequired); 83 CHECK(capabilities.empty()); 84 85 // Padding required for Pooling2d 86 capabilities = handleFactory.GetCapabilities(pooling, output, CapabilityClass::PaddingRequired); 87 CHECK(capabilities.size() == 1); 88 CHECK((capabilities[0].m_CapabilityClass == CapabilityClass::PaddingRequired)); 89 CHECK(capabilities[0].m_Value); 90 } 91 92 TEST_CASE("ConcatOnXorYSubTensorsNoPaddingRequiredTest") 93 { 94 armnn::INetworkPtr net(armnn::INetwork::Create()); 95 96 // Set up tensor infos 97 const armnn::TensorInfo inputInfo = armnn::TensorInfo({2, 3, 2, 2}, armnn::DataType::Float32); 98 const armnn::TensorInfo intermediateInfo = armnn::TensorInfo({2, 3, 2, 2}, armnn::DataType::Float32); 99 const armnn::TensorInfo outputInfo = armnn::TensorInfo({2, 3, 4, 2}, armnn::DataType::Float32); 100 101 armnn::ElementwiseUnaryDescriptor descriptor(armnn::UnaryOperation::Abs); 102 103 // Create the network 104 armnn::IConnectableLayer* const input0Layer = net->AddInputLayer(0, "input_0"); 105 input0Layer->GetOutputSlot(0).SetTensorInfo(inputInfo); 106 armnn::IConnectableLayer* elementwiseUnaryLayer0 = net->AddElementwiseUnaryLayer(descriptor, "elementwiseUnary_0"); 107 elementwiseUnaryLayer0->GetOutputSlot(0).SetTensorInfo(intermediateInfo); 108 input0Layer->GetOutputSlot(0).Connect(elementwiseUnaryLayer0->GetInputSlot(0)); 109 110 armnn::IConnectableLayer* const input1Layer = net->AddInputLayer(1, "input_1"); 111 input1Layer->GetOutputSlot(0).SetTensorInfo(inputInfo); 112 armnn::IConnectableLayer* elementwiseUnaryLayer1 = net->AddElementwiseUnaryLayer(descriptor, "elementwiseUnary_1"); 113 elementwiseUnaryLayer1->GetOutputSlot(0).SetTensorInfo(intermediateInfo); 114 input1Layer->GetOutputSlot(0).Connect(elementwiseUnaryLayer1->GetInputSlot(0)); 115 116 std::array<armnn::TensorShape, 2> concatInputShapes = { intermediateInfo.GetShape(), intermediateInfo.GetShape() }; 117 armnn::IConnectableLayer* const concatLayer = net->AddConcatLayer(armnn::CreateDescriptorForConcatenation( 118 concatInputShapes.begin(), concatInputShapes.end(), 2), "concatenation"); 119 concatLayer->GetOutputSlot(0).SetTensorInfo(outputInfo); 120 elementwiseUnaryLayer0->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(0)); 121 elementwiseUnaryLayer1->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(1)); 122 123 armnn::IConnectableLayer* const outputLayer = net->AddOutputLayer(0, "output"); 124 concatLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); 125 126 armnn::IRuntime::CreationOptions options; 127 armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); 128 129 std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc }; 130 armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); 131 132 const armnn::Graph& theGraph = GetGraphForTesting(optimizedNet.get()); 133 134 // Load graph into runtime 135 armnn::NetworkId networkIdentifier; 136 runtime->LoadNetwork(networkIdentifier, std::move(optimizedNet)); 137 138 // now check the concat how many sub-tensors it is using.. 139 auto TraceSubTensorHandleAncestry = [](armnn::ITensorHandle* const subTensorHandle) __anon702f9fdc0102(armnn::ITensorHandle* const subTensorHandle) 140 { 141 if (subTensorHandle && subTensorHandle->GetParent()) 142 { 143 return true; 144 } 145 return false; 146 }; 147 148 for (auto&& layer : theGraph) 149 { 150 if(layer->GetType() == armnn::LayerType::Concat) 151 { 152 unsigned int numberOfSubTensors = 0; 153 for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i) 154 { 155 const armnn::OutputSlot* slot = layer->GetInputSlot(i).GetConnectedOutputSlot(); 156 if (TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData())) 157 { 158 ++numberOfSubTensors; 159 } 160 } 161 // sub-tensors should be supported in this configuration 162 ARMNN_ASSERT(numberOfSubTensors > 0); 163 } 164 } 165 } 166 167 TEST_CASE("ConcatonXorYPaddingRequiredTest") 168 { 169 armnn::INetworkPtr net(armnn::INetwork::Create()); 170 171 // Set up tensor infos 172 const armnn::TensorInfo inputInfo = armnn::TensorInfo({2, 3, 2, 2}, armnn::DataType::Float32); 173 const armnn::TensorInfo intermediateInfo = armnn::TensorInfo({2, 3, 2, 2}, armnn::DataType::Float32); 174 const armnn::TensorInfo outputInfo = armnn::TensorInfo({2, 3, 4, 2}, armnn::DataType::Float32); 175 176 armnn::Pooling2dDescriptor descriptor; 177 descriptor.m_PoolType = armnn::PoolingAlgorithm::Average; 178 descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; 179 descriptor.m_StrideX = descriptor.m_StrideY = 1; 180 descriptor.m_PadLeft = 1; 181 descriptor.m_PadRight = 1; 182 descriptor.m_PadTop = 1; 183 descriptor.m_PadBottom = 1; 184 descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; 185 186 // Create the network 187 armnn::IConnectableLayer* const input0Layer = net->AddInputLayer(0, "input_0"); 188 input0Layer->GetOutputSlot(0).SetTensorInfo(inputInfo); 189 armnn::IConnectableLayer* pooling2dLayer0 = net->AddPooling2dLayer(descriptor, "pooling2d_0"); 190 pooling2dLayer0->GetOutputSlot(0).SetTensorInfo(intermediateInfo); 191 input0Layer->GetOutputSlot(0).Connect(pooling2dLayer0->GetInputSlot(0)); 192 193 armnn::IConnectableLayer* const input1Layer = net->AddInputLayer(1, "input_1"); 194 input1Layer->GetOutputSlot(0).SetTensorInfo(inputInfo); 195 armnn::IConnectableLayer* pooling2dLayer1 = net->AddPooling2dLayer(descriptor, "pooling2d_1"); 196 pooling2dLayer1->GetOutputSlot(0).SetTensorInfo(intermediateInfo); 197 input1Layer->GetOutputSlot(0).Connect(pooling2dLayer1->GetInputSlot(0)); 198 199 std::array<armnn::TensorShape, 2> concatInputShapes = { intermediateInfo.GetShape(), intermediateInfo.GetShape() }; 200 armnn::IConnectableLayer* const concatLayer = net->AddConcatLayer(armnn::CreateDescriptorForConcatenation( 201 concatInputShapes.begin(), concatInputShapes.end(), 2), "concatenation"); 202 concatLayer->GetOutputSlot(0).SetTensorInfo(outputInfo); 203 pooling2dLayer0->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(0)); 204 pooling2dLayer1->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(1)); 205 206 armnn::IConnectableLayer* const outputLayer = net->AddOutputLayer(0, "output"); 207 concatLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); 208 209 armnn::IRuntime::CreationOptions options; 210 armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); 211 212 std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc }; 213 armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); 214 215 const armnn::Graph& theGraph = GetGraphForTesting(optimizedNet.get()); 216 217 // Load graph into runtime 218 armnn::NetworkId networkIdentifier; 219 runtime->LoadNetwork(networkIdentifier, std::move(optimizedNet)); 220 221 // now check the concat how many sub-tensors it is using.. 222 auto TraceSubTensorHandleAncestry = [](armnn::ITensorHandle* const subTensorHandle) __anon702f9fdc0202(armnn::ITensorHandle* const subTensorHandle) 223 { 224 if (subTensorHandle && subTensorHandle->GetParent()) 225 { 226 return true; 227 } 228 return false; 229 }; 230 231 unsigned int numberOfSubTensors = 0; 232 for (auto&& layer : theGraph) 233 { 234 if(layer->GetType() == armnn::LayerType::Concat) 235 { 236 for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i) 237 { 238 const armnn::OutputSlot* slot = layer->GetInputSlot(i).GetConnectedOutputSlot(); 239 if (TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData())) 240 { 241 ++numberOfSubTensors; 242 } 243 } 244 } 245 } 246 // sub-tensors should not be supported in this configuration 247 ARMNN_ASSERT(numberOfSubTensors == 0); 248 } 249 250 TEST_CASE("SplitteronXorYNoPaddingRequiredTest") 251 { 252 using namespace armnn; 253 254 unsigned int splitAxis = 2; 255 unsigned int numSplit = 2; 256 257 const TensorShape& inputShape = { 2, 3, 4, 2 }; 258 const armnn::TensorInfo intermediateInfo = armnn::TensorInfo({ 2, 3, 2, 2 }, armnn::DataType::Float32); 259 const std::vector<TensorShape> outputShapes{{ 2, 3, 2, 2 }, 260 { 2, 3, 2, 2 }}; 261 const float qScale = 1.0f; 262 const int32_t qOffset = 0; 263 264 // Creates structures for input & output. 265 std::vector<float> inputData{ 266 1, 2, 267 3, 4, 268 5, 6, 269 7, 8, 270 9, 10, 271 11, 12, 272 13, 14, 273 15, 16, 274 17, 18, 275 19, 20, 276 21, 22, 277 23, 24, 278 25, 26, 279 27, 28, 280 29, 30, 281 31, 32, 282 33, 34, 283 35, 36, 284 37, 38, 285 39, 40, 286 41, 42, 287 43, 44, 288 45, 46, 289 47, 48 290 }; 291 292 std::vector<float> expectedOutput0{ 293 1, 2, 294 3, 4, 295 9, 10, 296 11, 12, 297 17, 18, 298 19, 20, 299 25, 26, 300 27, 28, 301 33, 34, 302 35, 36, 303 41, 42, 304 43, 44 305 }; 306 307 std::vector<float> expectedOutput1{ 308 5, 6, 309 7, 8, 310 13, 14, 311 15, 16, 312 21, 22, 313 23, 24, 314 29, 30, 315 31, 32, 316 37, 38, 317 39, 40, 318 45, 46, 319 47, 48 320 }; 321 322 // Builds up the structure of the network. 323 INetworkPtr net(INetwork::Create()); 324 325 TensorInfo inputTensorInfo(inputShape, armnn::DataType::Float32, qScale, qOffset); 326 327 armnn::ElementwiseUnaryDescriptor descriptor(armnn::UnaryOperation::Abs); 328 329 // Splitter 330 std::vector<unsigned int> splitterDimSizes(inputShape.GetNumDimensions()); 331 332 // Add current input shape to splitterDimSizes 333 for (unsigned int i = 0; i < inputShape.GetNumDimensions(); ++i) 334 { 335 splitterDimSizes[i] = inputTensorInfo.GetShape()[i]; 336 } 337 338 if (splitterDimSizes[splitAxis] % numSplit != 0) 339 { 340 throw ParseException("Number of splits must evenly divide the dimension"); 341 } 342 343 splitterDimSizes[splitAxis] /= numSplit; 344 345 SplitterDescriptor splitDesc(numSplit, inputShape.GetNumDimensions()); 346 347 for (unsigned int g = 0; g < numSplit; ++g) 348 { 349 // Set the size of the views. 350 for (unsigned int dimIdx = 0; dimIdx < splitterDimSizes.size(); ++dimIdx) 351 { 352 splitDesc.SetViewSize(g, dimIdx, splitterDimSizes[dimIdx]); 353 } 354 splitDesc.SetViewOriginCoord(g, splitAxis, splitterDimSizes[splitAxis] * g); 355 } 356 IConnectableLayer* input = net->AddInputLayer(0, "input"); 357 IConnectableLayer* elementWiseUnary0 = net->AddElementwiseUnaryLayer(descriptor, "elementwiseunary_0"); 358 IConnectableLayer* elementWiseUnary1 = net->AddElementwiseUnaryLayer(descriptor, "elementwiseunary_0"); 359 IConnectableLayer* splitter = net->AddSplitterLayer(splitDesc, "splitter"); 360 361 // Connections 362 Connect(input, splitter, inputTensorInfo, 0, 0); 363 Connect(splitter, elementWiseUnary0, intermediateInfo, 0, 0); 364 Connect(splitter, elementWiseUnary1, intermediateInfo, 1, 0); 365 366 std::vector<IConnectableLayer*> pooling2dLayers{elementWiseUnary0, elementWiseUnary1}; 367 368 for (unsigned int i = 0; i < outputShapes.size(); ++i) 369 { 370 TensorInfo outputTensorInfo(outputShapes[i], armnn::DataType::Float32, qScale, qOffset); 371 IConnectableLayer* output = net->AddOutputLayer(armnn::numeric_cast<LayerBindingId>(i)); 372 Connect(pooling2dLayers[i], output, outputTensorInfo, 0, 0); 373 } 374 375 std::map<int, std::vector<float>> inputTensorData = {{ 0,inputData }}; 376 std::map<int, std::vector<float>> expectedOutputData = {{ 0, expectedOutput0 }, { 1, expectedOutput1 }}; 377 378 armnn::IRuntime::CreationOptions options; 379 armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); 380 381 std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc }; 382 armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); 383 384 const armnn::Graph& theGraph = GetGraphForTesting(optimizedNet.get()); 385 386 // Load graph into runtime 387 armnn::NetworkId networkIdentifier; 388 runtime->LoadNetwork(networkIdentifier, std::move(optimizedNet)); 389 390 // now check the concat how many sub-tensors it is using.. 391 auto TraceSubTensorHandleAncestry = [](armnn::ITensorHandle* const subTensorHandle) __anon702f9fdc0302(armnn::ITensorHandle* const subTensorHandle) 392 { 393 if (subTensorHandle && subTensorHandle->GetParent()) 394 { 395 return true; 396 } 397 return false; 398 }; 399 400 for (auto&& layer : theGraph) 401 { 402 if(layer->GetType() == armnn::LayerType::ElementwiseUnary) 403 { 404 unsigned int numberOfSubTensors = 0; 405 for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i) 406 { 407 const armnn::OutputSlot* slot = layer->GetInputSlot(i).GetConnectedOutputSlot(); 408 if (TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData())) 409 { 410 ++numberOfSubTensors; 411 } 412 } 413 // sub-tensors should be supported in this configuration 414 ARMNN_ASSERT(numberOfSubTensors > 0); 415 } 416 } 417 418 InputTensors inputTensors; 419 inputTensors.reserve(inputTensorData.size()); 420 for (auto&& it : inputTensorData) 421 { 422 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(networkIdentifier, it.first); 423 inputTensorInfo.SetConstant(true); 424 inputTensors.push_back({it.first, 425 ConstTensor(inputTensorInfo, it.second.data())}); 426 } 427 OutputTensors outputTensors; 428 outputTensors.reserve(expectedOutputData.size()); 429 std::map<int, std::vector<float>> outputStorage; 430 for (auto&& it : expectedOutputData) 431 { 432 std::vector<float> out(it.second.size()); 433 outputStorage.emplace(it.first, out); 434 outputTensors.push_back({it.first, 435 Tensor(runtime->GetOutputTensorInfo(networkIdentifier, it.first), 436 outputStorage.at(it.first).data())}); 437 } 438 439 // Does the inference. 440 runtime->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors); 441 442 // Checks the results. 443 float tolerance = 0.000001f; 444 for (auto&& it : expectedOutputData) 445 { 446 std::vector<float> out = outputStorage.at(it.first); 447 for (unsigned int i = 0; i < out.size(); ++i) 448 { 449 CHECK_MESSAGE(Compare<armnn::DataType::Float32>(it.second[i], out[i], tolerance) == true, 450 "Actual output: " << out[i] << ". Expected output:" << it.second[i]); 451 452 } 453 } 454 } 455 456 TEST_CASE("SplitteronXorYPaddingRequiredTest") 457 { 458 using namespace armnn; 459 460 unsigned int splitAxis = 2; 461 unsigned int numSplit = 2; 462 463 const TensorShape& inputShape = { 1, 1, 4, 4 }; 464 const armnn::TensorInfo intermediateInfo = armnn::TensorInfo({ 1, 1, 2, 4 }, armnn::DataType::Float32); 465 const std::vector<TensorShape> outputShapes{{ 1, 1, 2, 4 }, 466 { 1, 1, 2, 4 }}; 467 468 const float qScale = 1.0f; 469 const int32_t qOffset = 0; 470 471 // Creates structures for input & output. 472 std::vector<float> inputData{ 473 9.0f, 27.0f, 18.0f, 36.0f, 474 18.0f, 9.0f, 18.0f, 9.0f, 475 27.0f, 18.0f, 9.0f, 27.0f, 476 9.0f, 27.0f, 9.0f, 18.0f, 477 }; 478 479 std::vector<float> expectedOutput0{ 480 7.0f, 11.0f, 13.0f, 9.0f, 481 7.0f, 11.0f, 13.0f, 9.0f 482 }; 483 484 std::vector<float> expectedOutput1{ 485 9.0f, 11.0f, 12.0f, 7.0f, 486 9.0f, 11.0f, 12.0f, 7.0f 487 }; 488 489 // Builds up the structure of the network. 490 INetworkPtr net(INetwork::Create()); 491 492 TensorInfo inputTensorInfo(inputShape, armnn::DataType::Float32, qScale, qOffset); 493 494 // Pooling 495 armnn::Pooling2dDescriptor descriptor; 496 descriptor.m_PoolType = armnn::PoolingAlgorithm::Average; 497 descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; 498 descriptor.m_StrideX = descriptor.m_StrideY = 1; 499 descriptor.m_PadLeft = 1; 500 descriptor.m_PadRight = 1; 501 descriptor.m_PadTop = 1; 502 descriptor.m_PadBottom = 1; 503 descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; 504 505 // Splitter 506 std::vector<unsigned int> splitterDimSizes(inputShape.GetNumDimensions()); 507 508 // Add current input shape to splitterDimSizes 509 for (unsigned int i = 0; i < inputShape.GetNumDimensions(); ++i) 510 { 511 splitterDimSizes[i] = inputTensorInfo.GetShape()[i]; 512 } 513 514 if (splitterDimSizes[splitAxis] % numSplit != 0) 515 { 516 throw ParseException("Number of splits must evenly divide the dimension"); 517 } 518 519 splitterDimSizes[splitAxis] /= numSplit; 520 521 SplitterDescriptor splitDesc(numSplit, inputShape.GetNumDimensions()); 522 523 for (unsigned int g = 0; g < numSplit; ++g) 524 { 525 // Set the size of the views. 526 for (unsigned int dimIdx = 0; dimIdx < splitterDimSizes.size(); ++dimIdx) 527 { 528 splitDesc.SetViewSize(g, dimIdx, splitterDimSizes[dimIdx]); 529 } 530 splitDesc.SetViewOriginCoord(g, splitAxis, splitterDimSizes[splitAxis] * g); 531 } 532 533 IConnectableLayer* input = net->AddInputLayer(0, "input"); 534 IConnectableLayer* pooling2d0 = net->AddPooling2dLayer(descriptor, "pooling2d_0"); 535 IConnectableLayer* pooling2d1 = net->AddPooling2dLayer(descriptor, "pooling2d_1"); 536 IConnectableLayer* splitter = net->AddSplitterLayer(splitDesc, "splitter"); 537 538 // Connections 539 Connect(input, splitter, inputTensorInfo, 0, 0); 540 Connect(splitter, pooling2d0, intermediateInfo, 0, 0); 541 Connect(splitter, pooling2d1, intermediateInfo, 1, 0); 542 543 std::vector<IConnectableLayer*> pooling2dLayers{pooling2d0, pooling2d1}; 544 545 for (unsigned int i = 0; i < outputShapes.size(); ++i) 546 { 547 TensorInfo outputTensorInfo(outputShapes[i], armnn::DataType::Float32, qScale, qOffset); 548 IConnectableLayer* output = net->AddOutputLayer(armnn::numeric_cast<LayerBindingId>(i)); 549 Connect(pooling2dLayers[i], output, outputTensorInfo, 0, 0); 550 } 551 552 std::map<int, std::vector<float>> inputTensorData = {{ 0,inputData }}; 553 std::map<int, std::vector<float>> expectedOutputData = {{ 0, expectedOutput0 }, { 1, expectedOutput1 }}; 554 555 armnn::IRuntime::CreationOptions options; 556 armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); 557 558 std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc }; 559 armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); 560 561 const armnn::Graph& theGraph = GetGraphForTesting(optimizedNet.get()); 562 563 // Load graph into runtime 564 armnn::NetworkId networkIdentifier; 565 runtime->LoadNetwork(networkIdentifier, std::move(optimizedNet)); 566 567 // now check the concat how many sub-tensors it is using.. 568 auto TraceSubTensorHandleAncestry = [](armnn::ITensorHandle* const subTensorHandle) __anon702f9fdc0402(armnn::ITensorHandle* const subTensorHandle) 569 { 570 if (subTensorHandle && subTensorHandle->GetParent()) 571 { 572 return true; 573 } 574 return false; 575 }; 576 577 for (auto&& layer : theGraph) 578 { 579 if(layer->GetType() == armnn::LayerType::Pooling2d) 580 { 581 unsigned int numberOfSubTensors = 0; 582 for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i) 583 { 584 const armnn::OutputSlot* slot = layer->GetInputSlot(i).GetConnectedOutputSlot(); 585 if (TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData())) 586 { 587 ++numberOfSubTensors; 588 } 589 } 590 // sub-tensors should be supported in this configuration 591 ARMNN_ASSERT(numberOfSubTensors == 0); 592 } 593 } 594 595 InputTensors inputTensors; 596 inputTensors.reserve(inputTensorData.size()); 597 for (auto&& it : inputTensorData) 598 { 599 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(networkIdentifier, it.first); 600 inputTensorInfo.SetConstant(true); 601 inputTensors.push_back({it.first, 602 ConstTensor(inputTensorInfo, it.second.data())}); 603 } 604 OutputTensors outputTensors; 605 outputTensors.reserve(expectedOutputData.size()); 606 std::map<int, std::vector<float>> outputStorage; 607 for (auto&& it : expectedOutputData) 608 { 609 std::vector<float> out(it.second.size()); 610 outputStorage.emplace(it.first, out); 611 outputTensors.push_back({it.first, 612 Tensor(runtime->GetOutputTensorInfo(networkIdentifier, it.first), 613 outputStorage.at(it.first).data())}); 614 } 615 616 // Does the inference. 617 runtime->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors); 618 619 // Checks the results. 620 float tolerance = 0.000001f; 621 for (auto&& it : expectedOutputData) 622 { 623 std::vector<float> out = outputStorage.at(it.first); 624 for (unsigned int i = 0; i < out.size(); ++i) 625 { 626 CHECK_MESSAGE(Compare<armnn::DataType::Float32>(it.second[i], out[i], tolerance) == true, 627 "Actual output: " << out[i] << ". Expected output:" << it.second[i]); 628 629 } 630 } 631 } 632 633 TEST_CASE("NeonTensorHandleFactoryMemoryManaged") 634 { 635 std::shared_ptr<NeonMemoryManager> memoryManager = std::make_shared<NeonMemoryManager>( 636 std::make_unique<arm_compute::Allocator>(), 637 BaseMemoryManager::MemoryAffinity::Offset); 638 NeonTensorHandleFactory handleFactory(memoryManager); 639 TensorInfo info({ 1, 1, 2, 1 }, DataType::Float32); 640 641 // create TensorHandle with memory managed 642 auto handle = handleFactory.CreateTensorHandle(info, true); 643 handle->Manage(); 644 handle->Allocate(); 645 646 memoryManager->Acquire(); 647 { 648 float* buffer = reinterpret_cast<float*>(handle->Map()); 649 CHECK(buffer != nullptr); // Yields a valid pointer 650 buffer[0] = 1.5f; 651 buffer[1] = 2.5f; 652 CHECK(buffer[0] == 1.5f); // Memory is writable and readable 653 CHECK(buffer[1] == 2.5f); // Memory is writable and readable 654 } 655 memoryManager->Release(); 656 657 memoryManager->Acquire(); 658 { 659 float* buffer = reinterpret_cast<float*>(handle->Map()); 660 CHECK(buffer != nullptr); // Yields a valid pointer 661 buffer[0] = 3.5f; 662 buffer[1] = 4.5f; 663 CHECK(buffer[0] == 3.5f); // Memory is writable and readable 664 CHECK(buffer[1] == 4.5f); // Memory is writable and readable 665 } 666 memoryManager->Release(); 667 668 float testPtr[2] = { 2.5f, 5.5f }; 669 // Cannot import as import is disabled 670 CHECK_THROWS_AS(handle->Import(static_cast<void*>(testPtr), MemorySource::Malloc), MemoryImportException); 671 } 672 673 TEST_CASE("NeonTensorHandleFactoryImport") 674 { 675 std::shared_ptr<NeonMemoryManager> memoryManager = std::make_shared<NeonMemoryManager>( 676 std::make_unique<arm_compute::Allocator>(), 677 BaseMemoryManager::MemoryAffinity::Offset); 678 NeonTensorHandleFactory handleFactory(memoryManager); 679 TensorInfo info({ 1, 1, 2, 1 }, DataType::Float32); 680 681 // create TensorHandle without memory managed 682 auto handle = handleFactory.CreateTensorHandle(info, false); 683 handle->Manage(); 684 handle->Allocate(); 685 memoryManager->Acquire(); 686 687 // No buffer allocated when import is enabled 688 CHECK((PolymorphicDowncast<NeonTensorHandle*>(handle.get()))->GetTensor().buffer() == nullptr); 689 690 float testPtr[2] = { 2.5f, 5.5f }; 691 // Correctly import 692 CHECK(handle->Import(static_cast<void*>(testPtr), MemorySource::Malloc)); 693 float* buffer = reinterpret_cast<float*>(handle->Map()); 694 CHECK(buffer != nullptr); // Yields a valid pointer after import 695 CHECK(buffer == testPtr); // buffer is pointing to testPtr 696 // Memory is writable and readable with correct value 697 CHECK(buffer[0] == 2.5f); 698 CHECK(buffer[1] == 5.5f); 699 buffer[0] = 3.5f; 700 buffer[1] = 10.0f; 701 CHECK(buffer[0] == 3.5f); 702 CHECK(buffer[1] == 10.0f); 703 memoryManager->Release(); 704 } 705 706 TEST_CASE("NeonTensorHandleCanBeImported") 707 { 708 std::shared_ptr<NeonMemoryManager> memoryManager = std::make_shared<NeonMemoryManager>( 709 std::make_unique<arm_compute::Allocator>(), 710 BaseMemoryManager::MemoryAffinity::Offset); 711 NeonTensorHandleFactory handleFactory(memoryManager); 712 TensorInfo info({ 1, 1, 2, 1 }, DataType::Float32); 713 714 // create TensorHandle (Memory Managed status is irrelevant) 715 auto handle = handleFactory.CreateTensorHandle(info, false); 716 717 // Create an aligned buffer 718 float alignedBuffer[2] = { 2.5f, 5.5f }; 719 // Check aligned buffers return true 720 CHECK(handle->CanBeImported(&alignedBuffer, MemorySource::Malloc) == true); 721 722 // Create a misaligned buffer from the aligned one 723 float* misalignedBuffer = reinterpret_cast<float*>(reinterpret_cast<char*>(alignedBuffer) + 1); 724 // Check misaligned buffers return false 725 CHECK(handle->CanBeImported(static_cast<void*>(misalignedBuffer), MemorySource::Malloc) == false); 726 } 727 728 TEST_CASE("NeonTensorHandleSupportsInPlaceComputation") 729 { 730 std::shared_ptr<NeonMemoryManager> memoryManager = std::make_shared<NeonMemoryManager>(); 731 NeonTensorHandleFactory handleFactory(memoryManager); 732 733 // NeonTensorHandleFactory supports InPlaceComputation 734 ARMNN_ASSERT(handleFactory.SupportsInPlaceComputation()); 735 } 736 737 } 738