1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/common/memory_management/greedy_by_size_assignment.h"
17
18 #include <algorithm>
19 #include <cstddef>
20 #include <iterator>
21 #include <vector>
22
23 #include "absl/status/status.h"
24 #include "tensorflow/lite/delegates/gpu/common/memory_management/internal.h"
25 #include "tensorflow/lite/delegates/gpu/common/memory_management/types.h"
26 #include "tensorflow/lite/delegates/gpu/common/util.h"
27
28 namespace tflite {
29 namespace gpu {
30 namespace {
31
32 struct SizeDistPriorityInfo {
33 // - Tensor with leftmost position in positional maximums vector has higher
34 // priority;
35 // - If two tensors have equal position, the one, that has usage interval with
36 // smallest positive distance (best_dist) to some of already assigned tensors,
37 // has higher priority;
38 // - If two tensors have equal position and best_dist, the one with greater
39 // tensor_size has higher priority.
operator >tflite::gpu::__anonca5caf5c0111::SizeDistPriorityInfo40 bool operator>(const SizeDistPriorityInfo& other) const {
41 return position < other.position ||
42 (position == other.position &&
43 (best_dist < other.best_dist || (best_dist == other.best_dist &&
44 tensor_size > other.tensor_size)));
45 }
46
47 // Recalculate best distance and best object, based on precalculated distances
48 // in vector dist.
RecalcBestDisttflite::gpu::__anonca5caf5c0111::SizeDistPriorityInfo49 void RecalcBestDist() {
50 best_dist = kNotAssigned;
51 for (size_t obj_id = 0; obj_id < dist.size(); ++obj_id) {
52 if (dist[obj_id] < best_dist) {
53 best_dist = dist[obj_id];
54 best_object = obj_id;
55 }
56 }
57 }
58
59 size_t position;
60 size_t tensor_size;
61 std::vector<size_t> dist;
62 size_t best_dist;
63 size_t best_object;
64 size_t tensor_usage_id;
65 };
66
67 } // namespace
68
GreedyBySizeAssignment(const std::vector<TensorUsageRecord<size_t>> & usage_records,size_t base_addr_align_bytes,OffsetsAssignment * assignment)69 absl::Status GreedyBySizeAssignment(
70 const std::vector<TensorUsageRecord<size_t>>& usage_records,
71 size_t base_addr_align_bytes, OffsetsAssignment* assignment) {
72 const size_t num_tensors = usage_records.size();
73 assignment->offsets.resize(num_tensors);
74 assignment->total_size = 0;
75
76 // Ordered records are to be sorted by size of corresponding tensor.
77 std::vector<TensorUsageWithIndex<size_t>> ordered_records;
78 for (size_t i = 0; i < num_tensors; ++i) {
79 ordered_records.emplace_back(&usage_records[i], i);
80 }
81 std::stable_sort(ordered_records.begin(), ordered_records.end(),
82 CompareBySize);
83
84 // Vector of ids of already allocated tensors, ordered by offset.
85 std::vector<size_t> ordered_allocs;
86
87 for (const auto& rec_with_idx : ordered_records) {
88 const TensorUsageRecord<size_t>* rec = rec_with_idx.usage_record;
89 size_t best_diff = kNotAssigned;
90 size_t best_offset = kNotAssigned;
91 size_t prev_offset = 0;
92 for (const auto& allocated_id : ordered_allocs) {
93 if (usage_records[allocated_id].last_task < rec->first_task ||
94 usage_records[allocated_id].first_task > rec->last_task) {
95 // Tensor allocated_id has usage interval, that doesn't intersect with
96 // current tensor's usage interval, so we skip it.
97 continue;
98 }
99 size_t cur_offset = assignment->offsets[allocated_id];
100 if (cur_offset >= prev_offset) {
101 size_t diff = cur_offset - prev_offset;
102 // Check, if current_tensor fits into the gap, located directly to the
103 // left of tensor allocated_id offset, and that this gap is the smallest
104 // of previously considered suitable gaps.
105 if (diff >= rec->tensor_size && diff < best_diff) {
106 best_diff = diff;
107 best_offset = prev_offset;
108 }
109 }
110 prev_offset = std::max(
111 prev_offset,
112 AlignByN(cur_offset + usage_records[allocated_id].tensor_size,
113 base_addr_align_bytes));
114 }
115 // prev_offset should be no more than the total size with additional
116 // alignment boundary introduced in AlignByN. Per object alignment added is
117 // no more than (base_addr_align_bytes - 1).
118 if (assignment->total_size +
119 ordered_allocs.size() * (base_addr_align_bytes - 1) <
120 prev_offset) {
121 return absl::InternalError("Total size is wrong.");
122 }
123
124 // If no suitable gap found, we should allocate current tensor after the
125 // rightmost tensor, which usage interval intersects with the current one.
126 if (best_offset == kNotAssigned) {
127 best_offset = prev_offset;
128 }
129
130 // Assign best_offset to the current tensor and find the correct place to
131 // insert information about it into ordered_allocs to save the order.
132 auto it = ordered_allocs.begin();
133 while (it != ordered_allocs.end() &&
134 assignment->offsets[*it] <= best_offset) {
135 ++it;
136 }
137 ordered_allocs.insert(it, rec_with_idx.idx);
138 assignment->offsets[rec_with_idx.idx] = best_offset;
139 assignment->total_size =
140 std::max(assignment->total_size, best_offset + rec->tensor_size);
141 }
142 return absl::OkStatus();
143 }
144
145 // Assigns given tensors to shared objects, using the following greedy
146 // algorithm:
147 // - We have tensor usage records of all intermideate tensors as an input. Each
148 // record consists of tensor size, first and last tasks, that use it. Let's call
149 // [first_task..last_task] a tensor usage interval;
150 // - Distance between two usage intervals is the absolute difference between
151 // closest tasks in their intervals. If two usage intervals don't intersect,
152 // than the distance between them is positive;
153 // - Calculate positional maximums vector, e.g. the vector of lower bounds on
154 // size of each shared object;
155 // - For each tensor find the rightmost positional maximum, that is greater or
156 // equal, than current tensor's size (call it position);
157 // - Iterate through all tensors in non-decreasing order of their
158 // SizeDistPriority (described above);
159 // - For every such tensor, assign it to the object, that already has tensor,
160 // which usage interval has the smallest existing positive distance to the
161 // current tensor's usage interval (this distance and object id are already
162 // precalculated in its SizeDistPriority record). Size of the chosen object can
163 // possible increase;
164 // - If there are several such objects, use the largest one;
165 // - If there are no suitable shared objects, assign current tensor to the new
166 // object with size equal to current tensor's size;
167 // - Modify SizeDistPriority records of tensors, that haven't been assigned yet,
168 // to reflect distance changes after that assignment.
GreedyBySizeDistPriorityAssignment(const std::vector<TensorUsageRecord<size_t>> & usage_records,ObjectsAssignment<size_t> * assignment)169 absl::Status GreedyBySizeDistPriorityAssignment(
170 const std::vector<TensorUsageRecord<size_t>>& usage_records,
171 ObjectsAssignment<size_t>* assignment) {
172 std::vector<size_t> positional_max =
173 CalculatePositionalMaximums(usage_records);
174
175 size_t num_records = usage_records.size();
176 std::vector<SizeDistPriorityInfo> priority_info(num_records);
177 for (size_t rec_id = 0; rec_id < usage_records.size(); ++rec_id) {
178 priority_info[rec_id].tensor_usage_id = rec_id;
179 priority_info[rec_id].tensor_size = usage_records[rec_id].tensor_size;
180
181 // No objects have been created yet.
182 priority_info[rec_id].best_dist = kNotAssigned;
183 priority_info[rec_id].best_object = kNotAssigned;
184
185 // Find the rightmost positional maximum, that is greater or
186 size_t pos = 0;
187 while (pos < positional_max.size() &&
188 positional_max[pos] >= priority_info[rec_id].tensor_size) {
189 ++pos;
190 }
191 if (pos == 0) {
192 return absl::InternalError("Variable pos must be positive.");
193 }
194 priority_info[rec_id].position = pos - 1;
195 }
196
197 assignment->object_sizes.clear();
198 assignment->object_ids.assign(num_records, kNotAssigned);
199 for (size_t it = 0; it < num_records; ++it) {
200 size_t best_info_id = kNotAssigned;
201 for (size_t info_id = 0; info_id < num_records; ++info_id) {
202 if (assignment->object_ids[priority_info[info_id].tensor_usage_id] !=
203 kNotAssigned) {
204 // Tensor already assigned.
205 continue;
206 }
207 if (best_info_id == kNotAssigned ||
208 priority_info[info_id] > priority_info[best_info_id]) {
209 best_info_id = info_id;
210 }
211 }
212 if (best_info_id == kNotAssigned) {
213 // During each iteration we assign exactly one of the tensors, so some not
214 // yet assigned tensors must exist.
215 return absl::InternalError("Invalid value for variable best_info_id.");
216 }
217
218 size_t best_rec_id = priority_info[best_info_id].tensor_usage_id;
219 size_t best_obj_id = priority_info[best_info_id].best_object;
220 bool new_object = false;
221 if (priority_info[best_info_id].best_dist == kNotAssigned) {
222 // No suitable shared object, so we create a new one.
223 new_object = true;
224 best_obj_id = assignment->object_sizes.size();
225 assignment->object_ids[best_rec_id] = best_obj_id;
226 assignment->object_sizes.push_back(
227 usage_records[best_rec_id].tensor_size);
228 } else {
229 // Assign tensor best_rec_id to the already existing object best_obj_id.
230 assignment->object_ids[best_rec_id] = best_obj_id;
231 assignment->object_sizes[best_obj_id] =
232 std::max(assignment->object_sizes[best_obj_id],
233 usage_records[best_rec_id].tensor_size);
234 }
235
236 // Modify SizeDistPriority records of tensors, that haven't been assigned
237 // yet, to reflect distance changes after that assignment.
238 for (size_t info_id = 0; info_id < num_records; ++info_id) {
239 // SizeDistPriority record info_id contains priority of tensor rec_id.
240 size_t rec_id = priority_info[info_id].tensor_usage_id;
241
242 if (assignment->object_ids[rec_id] != kNotAssigned) {
243 // Tensor rec_id is already assigned.
244 continue;
245 }
246 if (!new_object &&
247 priority_info[info_id].dist[best_obj_id] == kNotAssigned) {
248 // Tensor rec_id intersects with some of the tensors, that are assigned
249 // to object best_obj_id.
250 continue;
251 }
252
253 size_t dist = kNotAssigned;
254 if (usage_records[rec_id].last_task <
255 usage_records[best_rec_id].first_task) {
256 dist = usage_records[best_rec_id].first_task -
257 usage_records[rec_id].last_task;
258 } else if (usage_records[best_rec_id].last_task <
259 usage_records[rec_id].first_task) {
260 dist = usage_records[rec_id].first_task -
261 usage_records[best_rec_id].last_task;
262 }
263
264 if (new_object) {
265 // best_rec_id is the only tensor, assigned to the new object.
266 priority_info[info_id].dist.push_back(dist);
267 } else if (dist == kNotAssigned) {
268 // Usage intervals of tensors rec_id and best_rec_id intersect. So
269 // rec_id can't be assigned to best_obj_id anymore.
270 priority_info[info_id].dist[best_obj_id] = kNotAssigned;
271 if (priority_info[info_id].best_object == best_obj_id) {
272 // best_obj_id was the best shared object for tensor rec_id, but now
273 // it's not suitable anymore, so we need some recalculation.
274 priority_info[info_id].RecalcBestDist();
275 }
276 } else {
277 // Update distance, because it has probably been changed.
278 priority_info[info_id].dist[best_obj_id] =
279 std::min(priority_info[info_id].dist[best_obj_id], dist);
280 }
281 if (dist < priority_info[info_id].best_dist) {
282 // Update best distance and best object for tensor rec_id.
283 priority_info[info_id].best_dist = dist;
284 priority_info[info_id].best_object = best_obj_id;
285 }
286 }
287 }
288 return absl::OkStatus();
289 }
290
291 } // namespace gpu
292 } // namespace tflite
293