1 /*
2 * Copyright 2021 Google LLC
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "vkr_ring.h"
7
8 #include <stdio.h>
9 #include <time.h>
10
11 #include "vrend_iov.h"
12
13 #include "vkr_context.h"
14
15 enum vkr_ring_status_flag {
16 VKR_RING_STATUS_IDLE = 1u << 0,
17 };
18
19 /* callers must make sure they do not seek to end-of-resource or beyond */
20 static const struct iovec *
seek_resource(const struct vkr_resource_attachment * att,int base_iov_index,size_t offset,int * out_iov_index,size_t * out_iov_offset)21 seek_resource(const struct vkr_resource_attachment *att,
22 int base_iov_index,
23 size_t offset,
24 int *out_iov_index,
25 size_t *out_iov_offset)
26 {
27 const struct iovec *iov = &att->iov[base_iov_index];
28 assert(iov - att->iov < att->iov_count);
29 while (offset >= iov->iov_len) {
30 offset -= iov->iov_len;
31 iov++;
32 assert(iov - att->iov < att->iov_count);
33 }
34
35 *out_iov_index = iov - att->iov;
36 *out_iov_offset = offset;
37
38 return iov;
39 }
40
41 static void *
get_resource_pointer(const struct vkr_resource_attachment * att,int base_iov_index,size_t offset)42 get_resource_pointer(const struct vkr_resource_attachment *att,
43 int base_iov_index,
44 size_t offset)
45 {
46 const struct iovec *iov =
47 seek_resource(att, base_iov_index, offset, &base_iov_index, &offset);
48 return (uint8_t *)iov->iov_base + offset;
49 }
50
51 static void
vkr_ring_init_extra(struct vkr_ring * ring,const struct vkr_ring_layout * layout)52 vkr_ring_init_extra(struct vkr_ring *ring, const struct vkr_ring_layout *layout)
53 {
54 struct vkr_ring_extra *extra = &ring->extra;
55
56 seek_resource(layout->attachment, 0, layout->extra.begin, &extra->base_iov_index,
57 &extra->base_iov_offset);
58
59 extra->region = vkr_region_make_relative(&layout->extra);
60 }
61
62 static void
vkr_ring_init_buffer(struct vkr_ring * ring,const struct vkr_ring_layout * layout)63 vkr_ring_init_buffer(struct vkr_ring *ring, const struct vkr_ring_layout *layout)
64 {
65 struct vkr_ring_buffer *buf = &ring->buffer;
66
67 const struct iovec *base_iov =
68 seek_resource(layout->attachment, 0, layout->buffer.begin, &buf->base_iov_index,
69 &buf->base_iov_offset);
70
71 buf->size = vkr_region_size(&layout->buffer);
72 assert(util_is_power_of_two_nonzero(buf->size));
73 buf->mask = buf->size - 1;
74
75 buf->cur = 0;
76 buf->cur_iov = base_iov;
77 buf->cur_iov_index = buf->base_iov_index;
78 buf->cur_iov_offset = buf->base_iov_offset;
79 }
80
81 static bool
vkr_ring_init_control(struct vkr_ring * ring,const struct vkr_ring_layout * layout)82 vkr_ring_init_control(struct vkr_ring *ring, const struct vkr_ring_layout *layout)
83 {
84 struct vkr_ring_control *ctrl = &ring->control;
85
86 ctrl->head = get_resource_pointer(layout->attachment, 0, layout->head.begin);
87 ctrl->tail = get_resource_pointer(layout->attachment, 0, layout->tail.begin);
88 ctrl->status = get_resource_pointer(layout->attachment, 0, layout->status.begin);
89
90 /* we will manage head and status, and we expect them to be 0 initially */
91 if (*ctrl->head || *ctrl->status)
92 return false;
93
94 return true;
95 }
96
97 static void
vkr_ring_store_head(struct vkr_ring * ring)98 vkr_ring_store_head(struct vkr_ring *ring)
99 {
100 /* the renderer is expected to load the head with memory_order_acquire,
101 * forming a release-acquire ordering
102 */
103 atomic_store_explicit(ring->control.head, ring->buffer.cur, memory_order_release);
104 }
105
106 static uint32_t
vkr_ring_load_tail(const struct vkr_ring * ring)107 vkr_ring_load_tail(const struct vkr_ring *ring)
108 {
109 /* the driver is expected to store the tail with memory_order_release,
110 * forming a release-acquire ordering
111 */
112 return atomic_load_explicit(ring->control.tail, memory_order_acquire);
113 }
114
115 static void
vkr_ring_store_status(struct vkr_ring * ring,uint32_t status)116 vkr_ring_store_status(struct vkr_ring *ring, uint32_t status)
117 {
118 atomic_store_explicit(ring->control.status, status, memory_order_seq_cst);
119 }
120
121 /* TODO consider requiring virgl_resource to be logically contiguous */
122 static void
vkr_ring_read_buffer(struct vkr_ring * ring,void * data,uint32_t size)123 vkr_ring_read_buffer(struct vkr_ring *ring, void *data, uint32_t size)
124 {
125 struct vkr_ring_buffer *buf = &ring->buffer;
126 const struct vkr_resource_attachment *att = ring->attachment;
127
128 assert(size <= buf->size);
129 const uint32_t buf_offset = buf->cur & buf->mask;
130 const uint32_t buf_avail = buf->size - buf_offset;
131 const bool wrap = size >= buf_avail;
132
133 uint32_t read_size;
134 uint32_t wrap_size;
135 if (!wrap) {
136 read_size = size;
137 wrap_size = 0;
138 } else {
139 read_size = buf_avail;
140 /* When size == buf_avail, wrap is true but wrap_size is 0. We want to
141 * wrap because it seems slightly faster on the next call. Besides,
142 * seek_resource does not support seeking to end-of-resource which could
143 * happen if we don't wrap and the buffer region end coincides with the
144 * resource end.
145 */
146 wrap_size = size - buf_avail;
147 }
148
149 /* do the reads */
150 if (read_size <= buf->cur_iov->iov_len - buf->cur_iov_offset) {
151 const void *src = (const uint8_t *)buf->cur_iov->iov_base + buf->cur_iov_offset;
152 memcpy(data, src, read_size);
153
154 /* fast path */
155 if (!wrap) {
156 assert(!wrap_size);
157 buf->cur += read_size;
158 buf->cur_iov_offset += read_size;
159 return;
160 }
161 } else {
162 vrend_read_from_iovec(buf->cur_iov, att->iov_count - buf->cur_iov_index,
163 buf->cur_iov_offset, data, read_size);
164 }
165
166 if (wrap_size) {
167 vrend_read_from_iovec(att->iov + buf->base_iov_index,
168 att->iov_count - buf->base_iov_index, buf->base_iov_offset,
169 (char *)data + read_size, wrap_size);
170 }
171
172 /* advance cur */
173 buf->cur += size;
174 if (!wrap) {
175 buf->cur_iov = seek_resource(att, buf->cur_iov_index, buf->cur_iov_offset + size,
176 &buf->cur_iov_index, &buf->cur_iov_offset);
177 } else {
178 buf->cur_iov =
179 seek_resource(att, buf->base_iov_index, buf->base_iov_offset + wrap_size,
180 &buf->cur_iov_index, &buf->cur_iov_offset);
181 }
182 }
183
184 struct vkr_ring *
vkr_ring_create(const struct vkr_ring_layout * layout,struct virgl_context * ctx,uint64_t idle_timeout)185 vkr_ring_create(const struct vkr_ring_layout *layout,
186 struct virgl_context *ctx,
187 uint64_t idle_timeout)
188 {
189 struct vkr_ring *ring;
190 int ret;
191
192 ring = calloc(1, sizeof(*ring));
193 if (!ring)
194 return NULL;
195
196 ring->attachment = layout->attachment;
197
198 if (!vkr_ring_init_control(ring, layout)) {
199 free(ring);
200 return NULL;
201 }
202
203 vkr_ring_init_buffer(ring, layout);
204 vkr_ring_init_extra(ring, layout);
205
206 ring->cmd = malloc(ring->buffer.size);
207 if (!ring->cmd) {
208 free(ring);
209 return NULL;
210 }
211
212 ring->context = ctx;
213 ring->idle_timeout = idle_timeout;
214
215 ret = mtx_init(&ring->mutex, mtx_plain);
216 if (ret != thrd_success) {
217 free(ring->cmd);
218 free(ring);
219 return NULL;
220 }
221 ret = cnd_init(&ring->cond);
222 if (ret != thrd_success) {
223 mtx_destroy(&ring->mutex);
224 free(ring->cmd);
225 free(ring);
226 return NULL;
227 }
228
229 return ring;
230 }
231
232 void
vkr_ring_destroy(struct vkr_ring * ring)233 vkr_ring_destroy(struct vkr_ring *ring)
234 {
235 list_del(&ring->head);
236
237 assert(!ring->started);
238 mtx_destroy(&ring->mutex);
239 cnd_destroy(&ring->cond);
240 free(ring->cmd);
241 free(ring);
242 }
243
244 static uint64_t
vkr_ring_now(void)245 vkr_ring_now(void)
246 {
247 const uint64_t ns_per_sec = 1000000000llu;
248 struct timespec now;
249 if (clock_gettime(CLOCK_MONOTONIC, &now))
250 return 0;
251 return ns_per_sec * now.tv_sec + now.tv_nsec;
252 }
253
254 static void
vkr_ring_relax(uint32_t * iter)255 vkr_ring_relax(uint32_t *iter)
256 {
257 /* TODO do better */
258 const uint32_t busy_wait_order = 4;
259 const uint32_t base_sleep_us = 10;
260
261 (*iter)++;
262 if (*iter < (1u << busy_wait_order)) {
263 thrd_yield();
264 return;
265 }
266
267 const uint32_t shift = util_last_bit(*iter) - busy_wait_order - 1;
268 const uint32_t us = base_sleep_us << shift;
269 const struct timespec ts = {
270 .tv_sec = us / 1000000,
271 .tv_nsec = (us % 1000000) * 1000,
272 };
273 clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL);
274 }
275
276 static int
vkr_ring_thread(void * arg)277 vkr_ring_thread(void *arg)
278 {
279 struct vkr_ring *ring = arg;
280 struct virgl_context *ctx = ring->context;
281 char thread_name[16];
282
283 snprintf(thread_name, ARRAY_SIZE(thread_name), "vkr-ring-%d", ctx->ctx_id);
284 u_thread_setname(thread_name);
285
286 uint64_t last_submit = vkr_ring_now();
287 uint32_t relax_iter = 0;
288 int ret = 0;
289 while (ring->started) {
290 bool wait = false;
291 uint32_t cmd_size;
292
293 if (vkr_ring_now() >= last_submit + ring->idle_timeout) {
294 ring->pending_notify = false;
295 vkr_ring_store_status(ring, VKR_RING_STATUS_IDLE);
296 wait = ring->buffer.cur == vkr_ring_load_tail(ring);
297 if (!wait)
298 vkr_ring_store_status(ring, 0);
299 }
300
301 if (wait) {
302 TRACE_SCOPE("ring idle");
303
304 mtx_lock(&ring->mutex);
305 if (ring->started && !ring->pending_notify)
306 cnd_wait(&ring->cond, &ring->mutex);
307 vkr_ring_store_status(ring, 0);
308 mtx_unlock(&ring->mutex);
309
310 if (!ring->started)
311 break;
312
313 last_submit = vkr_ring_now();
314 relax_iter = 0;
315 }
316
317 cmd_size = vkr_ring_load_tail(ring) - ring->buffer.cur;
318 if (cmd_size) {
319 if (cmd_size > ring->buffer.size) {
320 ret = -EINVAL;
321 break;
322 }
323
324 vkr_ring_read_buffer(ring, ring->cmd, cmd_size);
325 ctx->submit_cmd(ctx, ring->cmd, cmd_size);
326 vkr_ring_store_head(ring);
327
328 last_submit = vkr_ring_now();
329 relax_iter = 0;
330 } else {
331 vkr_ring_relax(&relax_iter);
332 }
333 }
334
335 return ret;
336 }
337
338 void
vkr_ring_start(struct vkr_ring * ring)339 vkr_ring_start(struct vkr_ring *ring)
340 {
341 int ret;
342
343 assert(!ring->started);
344 ring->started = true;
345 ret = thrd_create(&ring->thread, vkr_ring_thread, ring);
346 if (ret != thrd_success)
347 ring->started = false;
348 }
349
350 bool
vkr_ring_stop(struct vkr_ring * ring)351 vkr_ring_stop(struct vkr_ring *ring)
352 {
353 mtx_lock(&ring->mutex);
354 if (thrd_equal(ring->thread, thrd_current())) {
355 mtx_unlock(&ring->mutex);
356 return false;
357 }
358 assert(ring->started);
359 ring->started = false;
360 cnd_signal(&ring->cond);
361 mtx_unlock(&ring->mutex);
362
363 thrd_join(ring->thread, NULL);
364
365 return true;
366 }
367
368 void
vkr_ring_notify(struct vkr_ring * ring)369 vkr_ring_notify(struct vkr_ring *ring)
370 {
371 mtx_lock(&ring->mutex);
372 ring->pending_notify = true;
373 cnd_signal(&ring->cond);
374 mtx_unlock(&ring->mutex);
375
376 {
377 TRACE_SCOPE("ring notify done");
378 }
379 }
380
381 bool
vkr_ring_write_extra(struct vkr_ring * ring,size_t offset,uint32_t val)382 vkr_ring_write_extra(struct vkr_ring *ring, size_t offset, uint32_t val)
383 {
384 struct vkr_ring_extra *extra = &ring->extra;
385
386 if (unlikely(extra->cached_offset != offset || !extra->cached_data)) {
387 const struct vkr_region access = VKR_REGION_INIT(offset, sizeof(val));
388 if (!vkr_region_is_valid(&access) || !vkr_region_is_within(&access, &extra->region))
389 return false;
390
391 /* Mesa always sets offset to 0 and the cache hit rate will be 100% */
392 extra->cached_offset = offset;
393 extra->cached_data = get_resource_pointer(ring->attachment, extra->base_iov_index,
394 extra->base_iov_offset + offset);
395 }
396
397 atomic_store_explicit(extra->cached_data, val, memory_order_release);
398
399 {
400 TRACE_SCOPE("ring extra done");
401 }
402
403 return true;
404 }
405