xref: /aosp_15_r20/external/armnn/samples/AsyncExecutionSample.cpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #include <armnn/INetwork.hpp>
6 #include <armnn/IRuntime.hpp>
7 #include <armnn/Utils.hpp>
8 #include <armnn/Descriptors.hpp>
9 
10 #include <iostream>
11 #include <thread>
12 
13 /// A simple example of using the ArmNN SDK API to run a network multiple times with different inputs in an asynchronous
14 /// manner.
15 ///
16 /// Background info: The usual runtime->EnqueueWorkload, which is used to trigger the execution of a network, is not
17 ///                  thread safe. Each workload has memory assigned to it which would be overwritten by each thread.
18 ///                  Before we added support for this you had to load a network multiple times to execute it at the
19 ///                  same time. Every time a network is loaded, it takes up memory on your device. Making the
20 ///                  execution thread safe helps to reduce the memory footprint for concurrent executions significantly.
21 ///                  This example shows you how to execute a model concurrently (multiple threads) while still only
22 ///                  loading it once.
23 ///
24 /// As in most of our simple samples, the network in this example will ask the user for a single input number for each
25 /// execution of the network.
26 /// The network consists of a single fully connected layer with a single neuron. The neurons weight is set to 1.0f
27 /// to produce an output number that is the same as the input.
main()28 int main()
29 {
30     using namespace armnn;
31 
32     // The first part of this code is very similar to the SimpleSample.cpp you should check it out for comparison
33     // The interesting part starts when the graph is loaded into the runtime
34 
35     std::vector<float> inputs;
36     float number1;
37     std::cout << "Please enter a number for the first iteration: " << std::endl;
38     std::cin >> number1;
39     float number2;
40     std::cout << "Please enter a number for the second iteration: " << std::endl;
41     std::cin >> number2;
42 
43     // Turn on logging to standard output
44     // This is useful in this sample so that users can learn more about what is going on
45     ConfigureLogging(true, false, LogSeverity::Warning);
46 
47     // Construct ArmNN network
48     NetworkId networkIdentifier;
49     INetworkPtr myNetwork = INetwork::Create();
50 
51     float weightsData[] = {1.0f}; // Identity
52     TensorInfo weightsInfo(TensorShape({1, 1}), DataType::Float32, 0.0f, 0, true);
53     weightsInfo.SetConstant();
54     ConstTensor weights(weightsInfo, weightsData);
55 
56     // Constant layer that now holds weights data for FullyConnected
57     IConnectableLayer* const constantWeightsLayer = myNetwork->AddConstantLayer(weights, "const weights");
58 
59     FullyConnectedDescriptor fullyConnectedDesc;
60     IConnectableLayer* const fullyConnectedLayer = myNetwork->AddFullyConnectedLayer(fullyConnectedDesc,
61                                                                                      "fully connected");
62     IConnectableLayer* InputLayer  = myNetwork->AddInputLayer(0);
63     IConnectableLayer* OutputLayer = myNetwork->AddOutputLayer(0);
64 
65     InputLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(0));
66     constantWeightsLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(1));
67     fullyConnectedLayer->GetOutputSlot(0).Connect(OutputLayer->GetInputSlot(0));
68 
69     // Create ArmNN runtime
70     IRuntime::CreationOptions options; // default options
71     IRuntimePtr run = IRuntime::Create(options);
72 
73     //Set the tensors in the network.
74     TensorInfo inputTensorInfo(TensorShape({1, 1}), DataType::Float32);
75     InputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
76 
77     TensorInfo outputTensorInfo(TensorShape({1, 1}), DataType::Float32);
78     fullyConnectedLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
79     constantWeightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo);
80 
81     // Optimise ArmNN network
82     IOptimizedNetworkPtr optNet = Optimize(*myNetwork, {Compute::CpuRef}, run->GetDeviceSpec());
83     if (!optNet)
84     {
85         // This shouldn't happen for this simple sample, with reference backend.
86         // But in general usage Optimize could fail if the hardware at runtime cannot
87         // support the model that has been provided.
88         std::cerr << "Error: Failed to optimise the input network." << std::endl;
89         return 1;
90     }
91 
92     // Load graph into runtime.
93     std::string errmsg; // To hold an eventual error message if loading the network fails
94     // Add network properties to enable async execution. The MemorySource::Undefined variables indicate
95     // that neither inputs nor outputs will be imported. Importing will be covered in another example.
96     armnn::INetworkProperties networkProperties(true, MemorySource::Undefined, MemorySource::Undefined);
97     run->LoadNetwork(networkIdentifier,
98                      std::move(optNet),
99                      errmsg,
100                      networkProperties);
101 
102     // Creates structures for inputs and outputs. A vector of float for each execution.
103     std::vector<std::vector<float>> inputData{{number1}, {number2}};
104     std::vector<std::vector<float>> outputData;
105     outputData.resize(2, std::vector<float>(1));
106 
107     inputTensorInfo = run->GetInputTensorInfo(networkIdentifier, 0);
108     inputTensorInfo.SetConstant(true);
109     std::vector<InputTensors> inputTensors
110     {
111         {{0, armnn::ConstTensor(inputTensorInfo, inputData[0].data())}},
112         {{0, armnn::ConstTensor(inputTensorInfo, inputData[1].data())}}
113     };
114     std::vector<OutputTensors> outputTensors
115     {
116         {{0, armnn::Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputData[0].data())}},
117         {{0, armnn::Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputData[1].data())}}
118     };
119 
120     // Lambda function to execute the network. We use it as thread function.
121     auto execute = [&](unsigned int executionIndex)
122     {
123         auto memHandle = run->CreateWorkingMemHandle(networkIdentifier);
124         run->Execute(*memHandle, inputTensors[executionIndex], outputTensors[executionIndex]);
125     };
126 
127     // Prepare some threads and let each execute the network with a different input
128     std::vector<std::thread> threads;
129     for (unsigned int i = 0; i < inputTensors.size(); ++i)
130     {
131         threads.emplace_back(std::thread(execute, i));
132     }
133 
134     // Wait for the threads to finish
135     for (std::thread& t : threads)
136     {
137         if(t.joinable())
138         {
139             t.join();
140         }
141     }
142 
143     std::cout << "Your numbers were " << outputData[0][0] << " and " << outputData[1][0] << std::endl;
144     return 0;
145 
146 }
147