1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/arena_planner.h"
16
17 #include <stddef.h>
18
19 #include <algorithm>
20 #include <cstdint>
21 #include <limits>
22 #include <memory>
23 #include <utility>
24 #include <vector>
25
26 #include "tensorflow/lite/c/common.h"
27 #include "tensorflow/lite/graph_info.h"
28 #include "tensorflow/lite/simple_memory_arena.h"
29
30 namespace tflite {
31 namespace {
32
33 constexpr int32_t kNodeNotAssigned = std::numeric_limits<int32_t>::max();
34
35 } // namespace
36
ArenaPlanner(TfLiteContext * context,std::unique_ptr<GraphInfo> graph_info,bool preserve_all_tensors,int tensor_alignment,int subgraph_index)37 ArenaPlanner::ArenaPlanner(TfLiteContext* context,
38 std::unique_ptr<GraphInfo> graph_info,
39 bool preserve_all_tensors, int tensor_alignment,
40 int subgraph_index)
41 : context_(context),
42 graph_info_(std::move(graph_info)),
43 arena_(kDefaultArenaAlignment, subgraph_index),
44 persistent_arena_(kDefaultArenaAlignment, subgraph_index),
45 preserve_all_tensors_(preserve_all_tensors),
46 tensor_alignment_(tensor_alignment) {}
47
~ArenaPlanner()48 ArenaPlanner::~ArenaPlanner() {
49 arena_.ReleaseBuffer();
50 persistent_arena_.ReleaseBuffer();
51 }
52
BasePointer(TfLiteAllocationType type)53 std::intptr_t ArenaPlanner::BasePointer(TfLiteAllocationType type) {
54 if (type == kTfLiteArenaRwPersistent) {
55 return persistent_arena_.BasePointer();
56 }
57 if (type == kTfLiteArenaRw) {
58 return arena_.BasePointer();
59 }
60 return 0;
61 }
62
ResetAllocations()63 TfLiteStatus ArenaPlanner::ResetAllocations() {
64 TF_LITE_ENSURE_STATUS(arena_.ClearPlan());
65 TF_LITE_ENSURE_STATUS(persistent_arena_.ClearPlan());
66 allocs_.clear();
67 allocs_.resize(graph_info_->num_tensors());
68 return kTfLiteOk;
69 }
70
ResetAllocationsAfter(int node)71 TfLiteStatus ArenaPlanner::ResetAllocationsAfter(int node) {
72 for (int i = 0; i < static_cast<int>(allocs_.size()); ++i) {
73 if (allocs_[i].first_node > node && allocs_[i].size > 0) {
74 TfLiteTensor& tensor = *graph_info_->tensor(i);
75 if (tensor.allocation_type == kTfLiteArenaRw) {
76 TF_LITE_ENSURE_STATUS(arena_.Deallocate(context_, allocs_[i]));
77 allocs_[i].reset();
78 tensor.data.raw = nullptr;
79 }
80 }
81 }
82
83 return kTfLiteOk;
84 }
85
PlanAllocations()86 TfLiteStatus ArenaPlanner::PlanAllocations() {
87 // Invalidate any existing data.
88 TF_LITE_ENSURE_STATUS(ResetAllocations());
89 // Maybe other verb instead of 'Assigned'
90 alloc_node_.assign(graph_info_->num_tensors(), kNodeNotAssigned);
91 dealloc_node_.assign(graph_info_->num_tensors(), kNodeNotAssigned);
92
93 // Keeps track of references to each tensor.
94 std::vector<int> refcounts(graph_info_->num_tensors(), 0);
95
96 auto allocate = [this](int node, int tensor) -> TfLiteStatus {
97 if (alloc_node_[tensor] != kNodeNotAssigned) {
98 // Tensor has already been allocated.
99 return kTfLiteOk;
100 }
101 TF_LITE_ENSURE(context_, dealloc_node_[tensor] == kNodeNotAssigned);
102 alloc_node_[tensor] = node;
103 return kTfLiteOk;
104 };
105
106 auto deallocate = [this](int node, int tensor) -> TfLiteStatus {
107 if (alloc_node_[tensor] == kNodeNotAssigned) {
108 // We don't need to deallocate the tensor, that is never allocated.
109 // This happened with the constant tensors.
110 return kTfLiteOk;
111 }
112 TF_LITE_ENSURE(context_, dealloc_node_[tensor] == kNodeNotAssigned);
113 dealloc_node_[tensor] = node;
114 return kTfLiteOk;
115 };
116
117 // We must make sure the output tensors are never overwritten. We do that by
118 // artificially adding one to their ref-counts so they are never selected
119 // for deallocation.
120 for (int tensor_index : graph_info_->outputs()) {
121 refcounts[tensor_index]++;
122 }
123
124 // Variable tensors also should be ensured to be never overwritten and need to
125 // be alive all the time.
126 for (int tensor_index : graph_info_->variables()) {
127 // Increase the reference count for variable tensors by one, so it will
128 // never be deallocated.
129 refcounts[tensor_index]++;
130 // `variables` is a subgraph-level list and it should never be
131 // kTfLiteOptionalTensor.
132 TF_LITE_ENSURE(context_, tensor_index != kTfLiteOptionalTensor);
133 // Variable tensor should be allocated at the very beginning.
134 TF_LITE_ENSURE_STATUS(allocate(0, tensor_index));
135 }
136
137 // Queue all graph inputs for allocation and make sure they are never
138 // overwritten.
139 for (int tensor_index : graph_info_->inputs()) {
140 if (tensor_index != kTfLiteOptionalTensor) {
141 refcounts[tensor_index]++;
142 TF_LITE_ENSURE_STATUS(allocate(0, tensor_index));
143 }
144 }
145
146 // Count references to node input tensors.
147 for (size_t i = 0; i < graph_info_->num_execution_nodes(); ++i) {
148 const TfLiteNode& node = graph_info_->node(i);
149 TfLiteIntArray* node_inputs = node.inputs;
150 for (int j = 0; j < node_inputs->size; ++j) {
151 int tensor_index = node_inputs->data[j];
152 if (tensor_index != kTfLiteOptionalTensor) {
153 refcounts[tensor_index]++;
154 }
155 }
156 }
157
158 // Go through the graph in execution order.
159 for (size_t i = 0; i < graph_info_->num_execution_nodes(); ++i) {
160 const TfLiteNode& node = graph_info_->node(i);
161
162 // First queue output tensors for allocation.
163 TfLiteIntArray* node_outputs = node.outputs;
164 for (int j = 0; j < node_outputs->size; ++j) {
165 int tensor_index = node_outputs->data[j];
166 TF_LITE_ENSURE_STATUS(allocate(i, tensor_index));
167 }
168
169 // Then update the ref-counts of the node's inputs, and if necessary queue
170 // them for deallocation.
171 if (!preserve_all_tensors_) {
172 TfLiteIntArray* node_inputs = node.inputs;
173 for (int j = 0; j < node_inputs->size; ++j) {
174 int tensor_index = node_inputs->data[j];
175 if (tensor_index != kTfLiteOptionalTensor) {
176 refcounts[tensor_index]--;
177 if (refcounts[tensor_index] == 0) {
178 TF_LITE_ENSURE_STATUS(deallocate(i, tensor_index));
179 }
180 }
181 }
182 }
183 }
184
185 // Note that graph outputs will never be scheduled for deallocation. We
186 // could do that here for completeness, but it won't have any effect.
187 return kTfLiteOk;
188 }
189
ExecuteAllocations(int first_node,int last_node)190 TfLiteStatus ArenaPlanner::ExecuteAllocations(int first_node, int last_node) {
191 // Grow the size of `allocs_` if necessary. This allows allocating temporary
192 // tensors in op's `prepare` function.
193 TF_LITE_ENSURE(context_, graph_info_->num_tensors() >= allocs_.size());
194 alloc_node_.resize(graph_info_->num_tensors(), kNodeNotAssigned);
195 dealloc_node_.resize(graph_info_->num_tensors(), kNodeNotAssigned);
196 allocs_.resize(graph_info_->num_tensors());
197 // Set allocation and deallocation for temporary tensors.
198 for (size_t i = first_node; i <= static_cast<size_t>(last_node) &&
199 i < graph_info_->num_execution_nodes();
200 ++i) {
201 const TfLiteNode& node = graph_info_->node(i);
202 TfLiteIntArray* node_temporaries = node.temporaries;
203 for (int j = 0; j < node_temporaries->size; ++j) {
204 int tensor_index = node_temporaries->data[j];
205 alloc_node_[tensor_index] = i;
206 if (!preserve_all_tensors_) {
207 dealloc_node_[tensor_index] = i;
208 }
209 }
210 }
211
212 TF_LITE_ENSURE_STATUS(CalculateAllocations(first_node, last_node));
213 TF_LITE_ENSURE_STATUS(Commit());
214
215 for (int i = 0; i < static_cast<int>(graph_info_->num_tensors()); ++i) {
216 TF_LITE_ENSURE_STATUS(ResolveTensorAllocation(i));
217 }
218
219 return kTfLiteOk;
220 }
221
ReleaseNonPersistentMemory()222 TfLiteStatus ArenaPlanner::ReleaseNonPersistentMemory() {
223 // Clear non-persistent arena's buffer.
224 TF_LITE_ENSURE_STATUS(arena_.ReleaseBuffer());
225 // Set data pointers for all non-persistent tensors to nullptr.
226 for (int i = 0; i < static_cast<int>(graph_info_->num_tensors()); ++i) {
227 TfLiteTensor& tensor = *graph_info_->tensor(i);
228 if (tensor.allocation_type == kTfLiteArenaRw) {
229 tensor.data.raw = nullptr;
230 }
231 }
232 return kTfLiteOk;
233 }
234
AcquireNonPersistentMemory()235 TfLiteStatus ArenaPlanner::AcquireNonPersistentMemory() {
236 // First commit arena_ to allocate underlying buffer.
237 TF_LITE_ENSURE_STATUS(arena_.Commit(context_));
238 // Resolve allocations for all tensors not on the persistent arena.
239 for (int i = 0; i < static_cast<int>(graph_info_->num_tensors()); ++i) {
240 TfLiteTensor& tensor = *graph_info_->tensor(i);
241 if (tensor.allocation_type == kTfLiteArenaRw) {
242 TF_LITE_ENSURE_STATUS(ResolveTensorAllocation(i));
243 }
244 }
245 return kTfLiteOk;
246 }
247
HasNonPersistentMemory()248 bool ArenaPlanner::HasNonPersistentMemory() {
249 return arena_.GetBufferSize() != 0;
250 }
251
DumpDebugInfo(const std::vector<int> & execution_plan) const252 void ArenaPlanner::DumpDebugInfo(const std::vector<int>& execution_plan) const {
253 arena_.DumpDebugInfo("kTfLiteArenaRw Dump:", execution_plan);
254 persistent_arena_.DumpDebugInfo("kTfLiteArenaRwPersistent Dump:",
255 execution_plan);
256 }
257
GetAllocInfo(size_t * arena_size,size_t * arena_persist_size) const258 void ArenaPlanner::GetAllocInfo(size_t* arena_size,
259 size_t* arena_persist_size) const {
260 *arena_size = arena_.GetBufferSize();
261 *arena_persist_size = persistent_arena_.GetBufferSize();
262 }
263
Commit()264 TfLiteStatus ArenaPlanner::Commit() {
265 TF_LITE_ENSURE_STATUS(arena_.Commit(context_));
266 TF_LITE_ENSURE_STATUS(persistent_arena_.Commit(context_));
267 return kTfLiteOk;
268 }
269
CreateTensorAllocationVector(int first_node,int last_node)270 std::vector<int32_t> ArenaPlanner::CreateTensorAllocationVector(int first_node,
271 int last_node) {
272 auto tensor_compare = [this](int idx1, int idx2) {
273 // Tensors that have lifespan through the whole model inference time are
274 // allocated at the beginning of memory slice. Their respective order
275 // doesn't matter in fact, so here they are sorted by index.
276 if (this->alloc_node_[idx1] == 0 &&
277 this->dealloc_node_[idx1] == kNodeNotAssigned) {
278 if (this->alloc_node_[idx2] == 0 &&
279 this->dealloc_node_[idx2] == kNodeNotAssigned) {
280 return idx1 < idx2;
281 }
282 return true;
283 }
284 if (this->alloc_node_[idx2] == 0 &&
285 this->dealloc_node_[idx2] == kNodeNotAssigned) {
286 return false;
287 }
288
289 // All other tensors are sorted in non-increasing order of their size.
290 auto size1 = this->graph_info_->tensor(idx1)->bytes;
291 auto size2 = this->graph_info_->tensor(idx2)->bytes;
292 if (size1 != size2) {
293 return size1 > size2;
294 }
295 // Tensors with equal size are sorted in order of their allocation time.
296 return this->alloc_node_[idx1] < this->alloc_node_[idx2];
297 };
298
299 std::vector<int32_t> tensor_order;
300 for (int i = 0; i < static_cast<int>(graph_info_->num_tensors()); ++i) {
301 if (alloc_node_[i] >= first_node && alloc_node_[i] <= last_node) {
302 tensor_order.push_back(i);
303 }
304 }
305 // Indices of tensors in order their allocation offsets will be calculated.
306 std::sort(tensor_order.begin(), tensor_order.end(), tensor_compare);
307
308 return tensor_order;
309 }
310
CalculateAllocations(int first_node,int last_node)311 TfLiteStatus ArenaPlanner::CalculateAllocations(int first_node, int last_node) {
312 // Indices of tensors in order their allocation offsets will be calculated.
313 const std::vector<int32_t> tensor_order =
314 CreateTensorAllocationVector(first_node, last_node);
315
316 // Deallocate if the tensor was already allocated.
317 for (const auto& tensor_index : tensor_order) {
318 TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
319 if (tensor.allocation_type == kTfLiteArenaRw &&
320 allocs_[tensor_index].size != 0) {
321 TF_LITE_ENSURE_STATUS(arena_.Deallocate(context_, allocs_[tensor_index]));
322 }
323 }
324
325 // Vector of ids of already allocated tensors, ordered by offset.
326 for (const auto& tensor_index : tensor_order) {
327 TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
328 if (tensor.allocation_type == kTfLiteArenaRw) {
329 TF_LITE_ENSURE_STATUS(
330 arena_.Allocate(context_, tensor_alignment_, tensor.bytes,
331 tensor_index, alloc_node_[tensor_index],
332 dealloc_node_[tensor_index], &allocs_[tensor_index]));
333 }
334 // Check allocs_[].size to prevent from reallocation of persistent tensors.
335 if (tensor.allocation_type == kTfLiteArenaRwPersistent &&
336 allocs_[tensor_index].size == 0) {
337 TF_LITE_ENSURE_STATUS(persistent_arena_.Allocate(
338 context_, tensor_alignment_, tensor.bytes, tensor_index,
339 /*first_node=*/alloc_node_[tensor_index],
340 /*last_node=*/std::numeric_limits<int32_t>::max(),
341 &allocs_[tensor_index]));
342 }
343 }
344 return kTfLiteOk;
345 }
346
ResolveTensorAllocation(int tensor_index)347 TfLiteStatus ArenaPlanner::ResolveTensorAllocation(int tensor_index) {
348 TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
349 if (tensor.allocation_type == kTfLiteArenaRw) {
350 // Skip resolution if the size of the tensor is zero, leaving it as a
351 // nullptr.
352 if (allocs_[tensor_index].size != 0) {
353 TF_LITE_ENSURE_STATUS(arena_.ResolveAlloc(context_, allocs_[tensor_index],
354 &tensor.data.raw));
355 }
356 }
357 if (tensor.allocation_type == kTfLiteArenaRwPersistent) {
358 TF_LITE_ENSURE_STATUS(persistent_arena_.ResolveAlloc(
359 context_, allocs_[tensor_index], &tensor.data.raw));
360 }
361 return kTfLiteOk;
362 }
363
364 } // namespace tflite
365