xref: /aosp_15_r20/external/tensorflow/tensorflow/lite/toco/allocate_transient_arrays.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include <algorithm>
16 #include <memory>
17 #include <set>
18 #include <string>
19 #include <unordered_map>
20 #include <utility>
21 #include <vector>
22 
23 #include "tensorflow/lite/toco/allocate_transient_arrays.h"
24 #include "tensorflow/lite/toco/model.h"
25 #include "tensorflow/lite/toco/model_flags.pb.h"
26 #include "tensorflow/lite/toco/tooling_util.h"
27 #include "tensorflow/core/platform/logging.h"
28 
29 namespace toco {
30 namespace {
31 
32 // The life span of an array.
33 struct ArrayLifespan {
34   // If true, the array is persistent state (as in a RNN). In that case,
35   // its allocation is permanent and the first_op, last_op members are
36   // unused. (The term 'transient' is a misnomer and we should think in
37   // terms of 'workspace' instead).
38   bool persistent = false;
39   // Index of the first op addressing that array. The array must be allocated
40   // just before executing this op.
41   std::size_t first_op = 0;
42   // Index of the last op addressing that array. We want to deallocate the array
43   // immediately after executing this op.
44   std::size_t last_op = 0;
45 };
46 
StartsAt(const ArrayLifespan & lifespan,std::size_t op_index)47 bool StartsAt(const ArrayLifespan& lifespan, std::size_t op_index) {
48   return !lifespan.persistent && lifespan.first_op == op_index;
49 }
50 
EndsAt(const ArrayLifespan & lifespan,std::size_t op_index)51 bool EndsAt(const ArrayLifespan& lifespan, std::size_t op_index) {
52   return !lifespan.persistent && lifespan.last_op == op_index;
53 }
54 
55 // Helper function for ComputeArrayLifespans: updates one ArrayLifespan for
56 // one array for one op.
UpdateArrayLifespan(const std::string & array_name,std::size_t op_index,std::unordered_map<std::string,ArrayLifespan> * array_lifespans)57 void UpdateArrayLifespan(
58     const std::string& array_name, std::size_t op_index,
59     std::unordered_map<std::string, ArrayLifespan>* array_lifespans) {
60   if (array_lifespans->count(array_name)) {
61     auto& lifespan = array_lifespans->at(array_name);
62     if (!lifespan.persistent) {
63       lifespan.first_op = std::min(lifespan.first_op, op_index);
64       lifespan.last_op = std::max(lifespan.last_op, op_index);
65     }
66   } else {
67     ArrayLifespan lifespan;
68     lifespan.first_op = op_index;
69     lifespan.last_op = op_index;
70     (*array_lifespans)[array_name] = lifespan;
71   }
72 }
73 
74 // Computes the ArrayLifespan for each array.
ComputeArrayLifespans(const Model & model,std::unordered_map<std::string,ArrayLifespan> * array_lifespans)75 void ComputeArrayLifespans(
76     const Model& model,
77     std::unordered_map<std::string, ArrayLifespan>* array_lifespans) {
78   CHECK(array_lifespans->empty());
79   for (const auto& rnn_state : model.flags.rnn_states()) {
80     ArrayLifespan lifespan;
81     lifespan.persistent = true;
82     (*array_lifespans)[rnn_state.state_array()] = lifespan;
83   }
84   for (std::size_t op_index = 0; op_index < model.operators.size();
85        op_index++) {
86     const auto& op = model.operators[op_index];
87     for (const auto& input : op->inputs) {
88       UpdateArrayLifespan(input, op_index, array_lifespans);
89     }
90     for (const auto& output : op->outputs) {
91       UpdateArrayLifespan(output, op_index, array_lifespans);
92     }
93   }
94 }
95 
operator ==(const Alloc & a,const Alloc & b)96 inline bool operator==(const Alloc& a, const Alloc& b) {
97   CHECK(a.start != b.start || a.end == b.end);
98   return a.start == b.start;
99 }
100 
101 // Helper to keep track of total allocation size and of currently live
102 // allocations, and containing the core allocation routine.
103 class Allocator {
104  public:
Allocator()105   Allocator() : total_size_(0) {}
106 
107   // Core allocation routine.
Allocate(std::size_t size,Alloc * result)108   void Allocate(std::size_t size, Alloc* result) {
109     if (size == 0) {
110       // zero-sized arrays get a dummy alloc of (0, 0) that does not
111       // need to be kept in the books (no need to insert that into
112       // live_allocs_).
113       // Note: zero-sized arrays shouldn't exist, but handling that case
114       // here allows such pathological cases to get a cleaner error message
115       // later instead of generating spurious allocator failures.
116       result->start = 0;
117       result->end = 0;
118       return;
119     }
120     // Naive algorithm: pick the first gap between live allocations,
121     // that is wide enough for the new array.
122     std::size_t pos = 0;
123     for (const auto& a : live_allocs_) {
124       if (a.start >= pos + size) {
125         result->start = pos;
126         result->end = pos + size;
127         live_allocs_.insert(*result);
128         return;
129       }
130       pos = a.end;
131     }
132     // No sufficiently wide gap was found before an existing live allocation,
133     // so we allocate the new array at the end of the allocation space.
134     // We may then have to grow total_size_.
135     total_size_ = std::max(total_size_, pos + size);
136     result->start = pos;
137     result->end = pos + size;
138     live_allocs_.insert(*result);
139   }
140 
Deallocate(const Alloc & a)141   void Deallocate(const Alloc& a) {
142     // Special-case dummy allocs for zero-sized arrays.
143     if (a.start == 0 && a.end == 0) {
144       // Nothing needs to be done, these aren't kept in the books.
145       return;
146     }
147     auto iter = live_allocs_.lower_bound(a);
148     CHECK(iter != live_allocs_.end());
149     CHECK(*iter == a);
150     live_allocs_.erase(iter);
151   }
152 
total_size() const153   std::size_t total_size() const { return total_size_; }
154 
155  private:
156   std::size_t total_size_;
157   std::set<Alloc> live_allocs_;
158 };
159 
160 // Returns the required transient allocation size (in bytes) for a given array,
161 // or 0 if it's not a transient array.
TransientArraySize(const Model & model,const std::string & array_name,std::size_t transient_data_alignment)162 std::size_t TransientArraySize(const Model& model,
163                                const std::string& array_name,
164                                std::size_t transient_data_alignment) {
165   if (!IsAllocatableTransientArray(model, array_name)) {
166     return 0;
167   }
168   const auto& array = &model.GetArray(array_name);
169   CHECK(array->has_shape())
170       << "Array '" << array_name << "' doesn't have a shape";
171   if (array->data_type == ArrayDataType::kNone) {
172     // Catch a typical issue at the moment with RNN states
173     for (const auto& rnn_state : model.flags.rnn_states()) {
174       if (rnn_state.state_array() == array_name) {
175         LOG(FATAL)
176             << "A RNN state array, " << array_name << ", still does not "
177             << "have a known data type after all graph transformations have "
178             << "run.";
179       }
180     }
181     LOG(FATAL) << "An array, " << array_name << ", still does not "
182                << "have a known data type after all graph transformations have "
183                << "run.";
184   }
185   const std::size_t elem_size = ElementSize(array->data_type);
186   const std::size_t raw_size =
187       elem_size * RequiredBufferSizeForShape(array->shape());
188   const std::size_t rounded_size =
189       RoundUpToNextMultipleOf(raw_size, transient_data_alignment);
190   return rounded_size;
191 }
192 
193 // Allocates an array: call this for every array just before the first
194 // op where it is used.
AllocateTransientArray(const Model & model,const std::string & array_name,Allocator * allocator,std::size_t transient_data_alignment)195 void AllocateTransientArray(const Model& model, const std::string& array_name,
196                             Allocator* allocator,
197                             std::size_t transient_data_alignment) {
198   if (!IsAllocatableTransientArray(model, array_name)) {
199     return;
200   }
201   const std::size_t size =
202       TransientArraySize(model, array_name, transient_data_alignment);
203   const auto& array = &model.GetArray(array_name);
204   CHECK(!array->alloc);
205   allocator->Allocate(size, &array->GetOrCreateAlloc());
206 }
207 
208 // Deallocates an array: call this for every array just after the last
209 // op where it is used.
DeallocateTransientArray(const Model & model,const std::string & array_name,Allocator * allocator)210 void DeallocateTransientArray(const Model& model, const std::string& array_name,
211                               Allocator* allocator) {
212   if (!IsAllocatableTransientArray(model, array_name)) {
213     return;
214   }
215   const auto& array = &model.GetArray(array_name);
216   CHECK(!!array->alloc);
217   allocator->Deallocate(*array->alloc);
218 }
219 
PushBackIfNotFound(const std::string & s,std::vector<std::string> * v)220 void PushBackIfNotFound(const std::string& s, std::vector<std::string>* v) {
221   if (std::find(v->begin(), v->end(), s) == v->end()) {
222     v->push_back(s);
223   }
224 }
225 
226 }  // namespace
227 
AllocateTransientArrays(Model * model,std::size_t transient_data_alignment)228 void AllocateTransientArrays(Model* model,
229                              std::size_t transient_data_alignment) {
230   // Precompute the lifespans for all arrays.
231   std::unordered_map<std::string, ArrayLifespan> array_lifespans;
232   ComputeArrayLifespans(*model, &array_lifespans);
233 
234   // In case of variable batch, our convention will be to compute the
235   // allocations for batch==1, then let the inference code multiply all
236   // the offsets by the actual runtime batch size. Conveniently,
237   // the variable_batch and batch flags are mutually exclusive, and the default
238   // value of batch is 1, so we have nothing special to do here. Let us
239   // just guard this assumption with a CHECK:
240   bool batchless_input_shapes = true;
241   for (const auto& input_array : model->flags.input_arrays()) {
242     if (!input_array.has_shape() || input_array.shape().dims().empty() ||
243         input_array.shape().dims(0) != 1) {
244       batchless_input_shapes = false;
245       break;
246     }
247   }
248   CHECK(!model->flags.variable_batch() || batchless_input_shapes);
249 
250   Allocator allocator;
251 
252   // Construct a sorted map of array names, so that other layout engines can
253   // match exactly.
254   std::map<std::string, const Array*> ordered_arrays_map;
255   for (const auto& pair : model->GetArrayMap()) {
256     ordered_arrays_map[pair.first] = pair.second.get();
257   }
258 
259   // Allocate persistent arrays (like RNN states). For them, 'transient'
260   // is a misnormer, should read 'workspace'.
261   for (const auto& array_pair : ordered_arrays_map) {
262     const std::string& array_name = array_pair.first;
263     auto it = array_lifespans.find(array_name);
264     if (it != array_lifespans.end() && it->second.persistent) {
265       AllocateTransientArray(*model, array_name, &allocator,
266                              transient_data_alignment);
267     }
268   }
269 
270   for (std::size_t op_index = 0; op_index < model->operators.size();
271        op_index++) {
272     const auto& op = model->operators[op_index];
273     // Allocate those arrays whose lifespan starts exactly here.
274     std::vector<std::string> arrays_to_allocate;
275     for (const auto& input : op->inputs) {
276       if (StartsAt(array_lifespans[input], op_index)) {
277         PushBackIfNotFound(input, &arrays_to_allocate);
278       }
279     }
280     for (const auto& output : op->outputs) {
281       if (StartsAt(array_lifespans[output], op_index)) {
282         PushBackIfNotFound(output, &arrays_to_allocate);
283       }
284     }
285     for (const std::string& array : arrays_to_allocate) {
286       AllocateTransientArray(*model, array, &allocator,
287                              transient_data_alignment);
288     }
289 
290     // Deallocate those arrays whose lifespan ends exactly here.
291     std::vector<std::string> arrays_to_deallocate;
292     for (const auto& input : op->inputs) {
293       if (EndsAt(array_lifespans[input], op_index)) {
294         PushBackIfNotFound(input, &arrays_to_deallocate);
295       }
296     }
297     for (const auto& output : op->outputs) {
298       if (EndsAt(array_lifespans[output], op_index)) {
299         PushBackIfNotFound(output, &arrays_to_deallocate);
300       }
301     }
302     for (const std::string& array : arrays_to_deallocate) {
303       DeallocateTransientArray(*model, array, &allocator);
304     }
305   }
306 
307   // Just out of curiosity (not used in the actual allocation process)
308   // evaluate the optimal total allocated size.
309   // First, compute the size of persistent arrays.
310   std::size_t optimal_transient_alloc_size = 0;
311   std::size_t persistent_alloc_size = 0;
312   for (const auto& array_pair : ordered_arrays_map) {
313     const std::string& array_name = array_pair.first;
314     auto it = array_lifespans.find(array_name);
315     if (it != array_lifespans.end() && it->second.persistent) {
316       persistent_alloc_size +=
317           TransientArraySize(*model, array_name, transient_data_alignment);
318     }
319   }
320   for (const auto& op : model->operators) {
321     // for each operator, compute the sum of the sizes of the array that must
322     // be live during the execution of this operator, plus the size of
323     // persistent arrays that must be live at all times.
324     std::vector<std::string> non_persistent_edges;
325     for (const auto& input : op->inputs) {
326       if (!array_lifespans[input].persistent) {
327         PushBackIfNotFound(input, &non_persistent_edges);
328       }
329     }
330     for (const auto& output : op->outputs) {
331       if (!array_lifespans[output].persistent) {
332         PushBackIfNotFound(output, &non_persistent_edges);
333       }
334     }
335     std::size_t size = persistent_alloc_size;
336     for (const std::string& edge : non_persistent_edges) {
337       size += TransientArraySize(*model, edge, transient_data_alignment);
338     }
339     // The optimal total size is the maximum of all operator-specific sizes.
340     optimal_transient_alloc_size = std::max(optimal_transient_alloc_size, size);
341   }
342 
343   model->transient_data_size = allocator.total_size();
344   model->transient_data_alignment = transient_data_alignment;
345   CHECK_GE(model->transient_data_size, optimal_transient_alloc_size);
346   LOG(INFO) << "Total transient array allocated size: "
347             << model->transient_data_size << " bytes, "
348             << "theoretical optimal value: " << optimal_transient_alloc_size
349             << " bytes.";
350   CheckInvariants(*model);
351 }
352 }  // namespace toco
353