1 // Copyright 2024 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expresso or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "DeviceOpTracker.h"
16 
17 #include <algorithm>
18 #include <type_traits>
19 
20 #include "host-common/GfxstreamFatalError.h"
21 #include "host-common/logging.h"
22 
23 namespace gfxstream {
24 namespace vk {
25 namespace {
26 
27 using emugl::ABORT_REASON_OTHER;
28 using emugl::FatalError;
29 
30 constexpr const size_t kSizeLoggingThreshold = 20;
31 constexpr const auto kSizeLoggingTimeThreshold = std::chrono::seconds(1);
32 
33 constexpr const auto kAutoDeleteTimeThreshold = std::chrono::seconds(5);
34 
35 template <typename T>
36 inline constexpr bool always_false_v = false;
37 
38 }  // namespace
39 
DeviceOpTracker(VkDevice device,VulkanDispatch * deviceDispatch)40 DeviceOpTracker::DeviceOpTracker(VkDevice device, VulkanDispatch* deviceDispatch)
41     : mDevice(device), mDeviceDispatch(deviceDispatch) {}
42 
AddPendingGarbage(DeviceOpWaitable waitable,VkFence fence)43 void DeviceOpTracker::AddPendingGarbage(DeviceOpWaitable waitable, VkFence fence) {
44     std::lock_guard<std::mutex> lock(mPendingGarbageMutex);
45 
46     mPendingGarbage.push_back(PendingGarbage{
47         .waitable = std::move(waitable),
48         .obj = fence,
49         .timepoint = std::chrono::system_clock::now(),
50     });
51 
52     if (mPendingGarbage.size() > kSizeLoggingThreshold) {
53         WARN("VkDevice:%p has %d pending garbage objects.", mDevice, mPendingGarbage.size());
54     }
55 }
56 
AddPendingGarbage(DeviceOpWaitable waitable,VkSemaphore semaphore)57 void DeviceOpTracker::AddPendingGarbage(DeviceOpWaitable waitable, VkSemaphore semaphore) {
58     std::lock_guard<std::mutex> lock(mPendingGarbageMutex);
59 
60     mPendingGarbage.push_back(PendingGarbage{
61         .waitable = std::move(waitable),
62         .obj = semaphore,
63         .timepoint = std::chrono::system_clock::now(),
64     });
65 
66     if (mPendingGarbage.size() > kSizeLoggingThreshold) {
67         WARN("VkDevice:%p has %d pending garbage objects.", mDevice, mPendingGarbage.size());
68     }
69 }
70 
Poll()71 void DeviceOpTracker::Poll() {
72     std::lock_guard<std::mutex> lock(mPollFunctionsMutex);
73     mPollFunctions.erase(std::remove_if(mPollFunctions.begin(), mPollFunctions.end(),
74                                         [](const PollFunction& pollingFunc) {
75                                             DeviceOpStatus status = pollingFunc.func();
76                                             return status != DeviceOpStatus::kPending;
77                                         }),
78                          mPollFunctions.end());
79 
80     if (mPollFunctions.size() > kSizeLoggingThreshold) {
81         // Only report old-enough objects to avoid reporting lots of pending waitables
82         // when many requests have been done in a small amount of time.
83         const auto now = std::chrono::system_clock::now();
84         const auto old = now - kSizeLoggingTimeThreshold;
85         int numOldFuncs = std::count_if(
86             mPollFunctions.begin(), mPollFunctions.end(), [old](const PollFunction& pollingFunc) {
87                 return (pollingFunc.timepoint < old);
88             });
89         if (numOldFuncs > kSizeLoggingThreshold) {
90             WARN("VkDevice:%p has %d pending waitables, %d taking more than %d milliseconds.",
91                  mDevice, mPollFunctions.size(), numOldFuncs,
92                  std::chrono::duration_cast<std::chrono::milliseconds>(kSizeLoggingTimeThreshold));
93         }
94     }
95 }
96 
PollAndProcessGarbage()97 void DeviceOpTracker::PollAndProcessGarbage() {
98     Poll();
99 
100     const auto now = std::chrono::system_clock::now();
101     const auto old = now - kAutoDeleteTimeThreshold;
102     {
103         std::lock_guard<std::mutex> lock(mPendingGarbageMutex);
104 
105         // Assuming that pending garbage is added to the queue in the roughly the order
106         // they are used, encountering an unsignaled/pending waitable likely means that
107         // all pending garbage after is also still pending. This might not necessarily
108         // always be the case but it is a simple heuristic to try to minimize the amount
109         // of work performed here as it is expected that this function will be called
110         // while processing other guest vulkan functions.
111         auto firstPendingIt = std::find_if(mPendingGarbage.begin(), mPendingGarbage.end(),
112                                            [old](const PendingGarbage& pendingGarbage) {
113                                                if (pendingGarbage.timepoint < old) {
114                                                    return /*still pending=*/false;
115                                                }
116                                                return !IsDone(pendingGarbage.waitable);
117                                            });
118 
119         for (auto it = mPendingGarbage.begin(); it != firstPendingIt; it++) {
120             PendingGarbage& pendingGarbage = *it;
121 
122             if (pendingGarbage.timepoint < old) {
123                 const auto difference = std::chrono::duration_cast<std::chrono::milliseconds>(
124                     pendingGarbage.timepoint - now);
125                 WARN("VkDevice:%p had a waitable pending for %d milliseconds. Leaking object.",
126                      mDevice, difference.count());
127                 continue;
128             }
129 
130             std::visit(
131                 [this](auto&& arg) {
132                     using T = std::decay_t<decltype(arg)>;
133                     if constexpr (std::is_same_v<T, VkFence>) {
134                         mDeviceDispatch->vkDestroyFence(mDevice, arg, nullptr);
135                     } else if constexpr (std::is_same_v<T, VkSemaphore>) {
136                         mDeviceDispatch->vkDestroySemaphore(mDevice, arg, nullptr);
137                     } else {
138                         static_assert(always_false_v<T>, "non-exhaustive visitor!");
139                     }
140                 },
141                 pendingGarbage.obj);
142         }
143 
144         mPendingGarbage.erase(mPendingGarbage.begin(), firstPendingIt);
145 
146         if (mPendingGarbage.size() > kSizeLoggingThreshold) {
147             WARN("VkDevice:%p has %d pending garbage objects.", mDevice, mPendingGarbage.size());
148         }
149     }
150 }
151 
OnDestroyDevice()152 void DeviceOpTracker::OnDestroyDevice() {
153     mDeviceDispatch->vkDeviceWaitIdle(mDevice);
154 
155     PollAndProcessGarbage();
156 
157     {
158         std::lock_guard<std::mutex> lock(mPendingGarbageMutex);
159         if (!mPendingGarbage.empty()) {
160             WARN("VkDevice:%p has %d leaking garbage objects on destruction.", mDevice,
161                  mPendingGarbage.size());
162         }
163     }
164 }
165 
AddPendingDeviceOp(std::function<DeviceOpStatus ()> pollFunction)166 void DeviceOpTracker::AddPendingDeviceOp(std::function<DeviceOpStatus()> pollFunction) {
167     std::lock_guard<std::mutex> lock(mPollFunctionsMutex);
168     mPollFunctions.push_back(PollFunction{
169         .func = std::move(pollFunction),
170         .timepoint = std::chrono::system_clock::now(),
171     });
172 }
173 
DeviceOpBuilder(DeviceOpTracker & tracker)174 DeviceOpBuilder::DeviceOpBuilder(DeviceOpTracker& tracker) : mTracker(tracker) {}
175 
~DeviceOpBuilder()176 DeviceOpBuilder::~DeviceOpBuilder() {
177     if (!mSubmittedFence) {
178         GFXSTREAM_ABORT(FatalError(ABORT_REASON_OTHER))
179             << "Invalid usage: failed to call OnQueueSubmittedWithFence().";
180     }
181 }
182 
CreateFenceForOp()183 VkFence DeviceOpBuilder::CreateFenceForOp() {
184     const VkFenceCreateInfo fenceCreateInfo = {
185         .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
186         .pNext = nullptr,
187         .flags = 0,
188     };
189     VkFence fence = VK_NULL_HANDLE;
190     VkResult result = mTracker.mDeviceDispatch->vkCreateFence(mTracker.mDevice, &fenceCreateInfo,
191                                                               nullptr, &fence);
192 
193     mCreatedFence = fence;
194     if (result != VK_SUCCESS) {
195         ERR("DeviceOpBuilder failed to create VkFence!");
196         return VK_NULL_HANDLE;
197     }
198     return fence;
199 }
200 
OnQueueSubmittedWithFence(VkFence fence)201 DeviceOpWaitable DeviceOpBuilder::OnQueueSubmittedWithFence(VkFence fence) {
202     if (mCreatedFence.has_value() && fence != mCreatedFence) {
203         GFXSTREAM_ABORT(FatalError(ABORT_REASON_OTHER))
204             << "Invalid usage: failed to call OnQueueSubmittedWithFence() with the fence "
205             << "requested from CreateFenceForOp.";
206     }
207     mSubmittedFence = fence;
208 
209     const bool destroyFenceOnCompletion = mCreatedFence.has_value();
210 
211     std::shared_ptr<std::promise<void>> promise = std::make_shared<std::promise<void>>();
212     DeviceOpWaitable future = promise->get_future().share();
213 
214     mTracker.AddPendingDeviceOp([device = mTracker.mDevice,
215                                  deviceDispatch = mTracker.mDeviceDispatch, fence,
216                                  promise = std::move(promise), destroyFenceOnCompletion] {
217         if (fence == VK_NULL_HANDLE) {
218             return DeviceOpStatus::kDone;
219         }
220 
221         VkResult result = deviceDispatch->vkGetFenceStatus(device, fence);
222         if (result == VK_NOT_READY) {
223             return DeviceOpStatus::kPending;
224         }
225 
226         if (destroyFenceOnCompletion) {
227             deviceDispatch->vkDestroyFence(device, fence, nullptr);
228         }
229         promise->set_value();
230 
231         return result == VK_SUCCESS ? DeviceOpStatus::kDone : DeviceOpStatus::kFailure;
232     });
233 
234     return future;
235 }
236 
237 }  // namespace vk
238 }  // namespace gfxstream
239