1 /*
2 * Copyright 2021 Google LLC
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "vn_ring.h"
7
8 #include <sys/resource.h>
9
10 #include "venus-protocol/vn_protocol_driver_transport.h"
11
12 #include "vn_cs.h"
13 #include "vn_instance.h"
14 #include "vn_renderer.h"
15
16 #define VN_RING_IDLE_TIMEOUT_NS (1ull * 1000 * 1000)
17
18 static_assert(ATOMIC_INT_LOCK_FREE == 2 && sizeof(atomic_uint) == 4,
19 "vn_ring_shared requires lock-free 32-bit atomic_uint");
20
21 /* pointers to a ring in a BO */
22 struct vn_ring_shared {
23 const volatile atomic_uint *head;
24 volatile atomic_uint *tail;
25 volatile atomic_uint *status;
26 void *buffer;
27 void *extra;
28 };
29
30 struct vn_ring {
31 uint64_t id;
32 struct vn_instance *instance;
33 struct vn_renderer_shmem *shmem;
34
35 uint32_t buffer_size;
36 uint32_t buffer_mask;
37
38 struct vn_ring_shared shared;
39 uint32_t cur;
40
41 /* This mutex ensures below:
42 * - atomic of ring submission
43 * - reply shmem resource set and ring submission are paired
44 */
45 mtx_t mutex;
46
47 /* size limit for cmd submission via ring shmem, derived from
48 * (buffer_size >> direct_order) upon vn_ring_create
49 */
50 uint32_t direct_size;
51
52 /* used for indirect submission of large command (non-VkCommandBuffer) */
53 struct vn_cs_encoder upload;
54
55 struct list_head submits;
56 struct list_head free_submits;
57
58 /* to synchronize renderer/ring */
59 mtx_t roundtrip_mutex;
60 uint64_t roundtrip_next;
61
62 int64_t last_notify;
63 int64_t next_notify;
64 };
65
66 struct vn_ring_submit {
67 uint32_t seqno;
68
69 struct list_head head;
70
71 /* BOs to keep alive (TODO make sure shmems are pinned) */
72 uint32_t shmem_count;
73 struct vn_renderer_shmem *shmems[];
74 };
75
76 struct vn_ring_submission {
77 const struct vn_cs_encoder *cs;
78 struct vn_ring_submit *submit;
79
80 struct {
81 struct vn_cs_encoder cs;
82 struct vn_cs_encoder_buffer buffer;
83 uint32_t data[64];
84 } indirect;
85 };
86
87 static uint32_t
vn_ring_load_head(const struct vn_ring * ring)88 vn_ring_load_head(const struct vn_ring *ring)
89 {
90 /* the renderer is expected to store the head with memory_order_release,
91 * forming a release-acquire ordering
92 */
93 return atomic_load_explicit(ring->shared.head, memory_order_acquire);
94 }
95
96 static void
vn_ring_store_tail(struct vn_ring * ring)97 vn_ring_store_tail(struct vn_ring *ring)
98 {
99 /* the renderer is expected to load the tail with memory_order_acquire,
100 * forming a release-acquire ordering
101 */
102 return atomic_store_explicit(ring->shared.tail, ring->cur,
103 memory_order_release);
104 }
105
106 uint32_t
vn_ring_load_status(const struct vn_ring * ring)107 vn_ring_load_status(const struct vn_ring *ring)
108 {
109 /* must be called and ordered after vn_ring_store_tail for idle status */
110 return atomic_load_explicit(ring->shared.status, memory_order_seq_cst);
111 }
112
113 void
vn_ring_unset_status_bits(struct vn_ring * ring,uint32_t mask)114 vn_ring_unset_status_bits(struct vn_ring *ring, uint32_t mask)
115 {
116 atomic_fetch_and_explicit(ring->shared.status, ~mask,
117 memory_order_seq_cst);
118 }
119
120 static void
vn_ring_write_buffer(struct vn_ring * ring,const void * data,uint32_t size)121 vn_ring_write_buffer(struct vn_ring *ring, const void *data, uint32_t size)
122 {
123 assert(ring->cur + size - vn_ring_load_head(ring) <= ring->buffer_size);
124
125 const uint32_t offset = ring->cur & ring->buffer_mask;
126 if (offset + size <= ring->buffer_size) {
127 memcpy(ring->shared.buffer + offset, data, size);
128 } else {
129 const uint32_t s = ring->buffer_size - offset;
130 memcpy(ring->shared.buffer + offset, data, s);
131 memcpy(ring->shared.buffer, data + s, size - s);
132 }
133
134 ring->cur += size;
135 }
136
137 static bool
vn_ring_ge_seqno(const struct vn_ring * ring,uint32_t a,uint32_t b)138 vn_ring_ge_seqno(const struct vn_ring *ring, uint32_t a, uint32_t b)
139 {
140 /* this can return false negative when not called fast enough (e.g., when
141 * called once every couple hours), but following calls with larger a's
142 * will correct itself
143 *
144 * TODO use real seqnos?
145 */
146 if (a >= b)
147 return ring->cur >= a || ring->cur < b;
148 else
149 return ring->cur >= a && ring->cur < b;
150 }
151
152 static void
vn_ring_retire_submits(struct vn_ring * ring,uint32_t seqno)153 vn_ring_retire_submits(struct vn_ring *ring, uint32_t seqno)
154 {
155 struct vn_renderer *renderer = ring->instance->renderer;
156 list_for_each_entry_safe(struct vn_ring_submit, submit, &ring->submits,
157 head) {
158 if (!vn_ring_ge_seqno(ring, seqno, submit->seqno))
159 break;
160
161 for (uint32_t i = 0; i < submit->shmem_count; i++)
162 vn_renderer_shmem_unref(renderer, submit->shmems[i]);
163
164 list_move_to(&submit->head, &ring->free_submits);
165 }
166 }
167
168 bool
vn_ring_get_seqno_status(struct vn_ring * ring,uint32_t seqno)169 vn_ring_get_seqno_status(struct vn_ring *ring, uint32_t seqno)
170 {
171 return vn_ring_ge_seqno(ring, vn_ring_load_head(ring), seqno);
172 }
173
174 static void
vn_ring_wait_seqno(struct vn_ring * ring,uint32_t seqno)175 vn_ring_wait_seqno(struct vn_ring *ring, uint32_t seqno)
176 {
177 /* A renderer wait incurs several hops and the renderer might poll
178 * repeatedly anyway. Let's just poll here.
179 */
180 const enum vn_relax_reason reason = ring == ring->instance->ring.ring
181 ? VN_RELAX_REASON_RING_SEQNO
182 : VN_RELAX_REASON_TLS_RING_SEQNO;
183 struct vn_relax_state relax_state = vn_relax_init(ring->instance, reason);
184 do {
185 if (vn_ring_get_seqno_status(ring, seqno)) {
186 vn_relax_fini(&relax_state);
187 return;
188 }
189 vn_relax(&relax_state);
190 } while (true);
191 }
192
193 void
vn_ring_wait_all(struct vn_ring * ring)194 vn_ring_wait_all(struct vn_ring *ring)
195 {
196 /* load from tail rather than ring->cur for atomicity */
197 const uint32_t pending_seqno =
198 atomic_load_explicit(ring->shared.tail, memory_order_relaxed);
199 vn_ring_wait_seqno(ring, pending_seqno);
200 }
201
202 static bool
vn_ring_has_space(const struct vn_ring * ring,uint32_t size,uint32_t * out_head)203 vn_ring_has_space(const struct vn_ring *ring,
204 uint32_t size,
205 uint32_t *out_head)
206 {
207 const uint32_t head = vn_ring_load_head(ring);
208 if (likely(ring->cur + size - head <= ring->buffer_size)) {
209 *out_head = head;
210 return true;
211 }
212
213 return false;
214 }
215
216 static uint32_t
vn_ring_wait_space(struct vn_ring * ring,uint32_t size)217 vn_ring_wait_space(struct vn_ring *ring, uint32_t size)
218 {
219 assert(size <= ring->buffer_size);
220
221 uint32_t head;
222 if (likely(vn_ring_has_space(ring, size, &head)))
223 return head;
224
225 {
226 VN_TRACE_FUNC();
227
228 /* see the reasoning in vn_ring_wait_seqno */
229 struct vn_relax_state relax_state =
230 vn_relax_init(ring->instance, VN_RELAX_REASON_RING_SPACE);
231 do {
232 vn_relax(&relax_state);
233 if (vn_ring_has_space(ring, size, &head)) {
234 vn_relax_fini(&relax_state);
235 return head;
236 }
237 } while (true);
238 }
239 }
240
241 void
vn_ring_get_layout(size_t buf_size,size_t extra_size,struct vn_ring_layout * layout)242 vn_ring_get_layout(size_t buf_size,
243 size_t extra_size,
244 struct vn_ring_layout *layout)
245 {
246 /* this can be changed/extended quite freely */
247 struct layout {
248 alignas(64) uint32_t head;
249 alignas(64) uint32_t tail;
250 alignas(64) uint32_t status;
251
252 alignas(64) uint8_t buffer[];
253 };
254
255 assert(buf_size && util_is_power_of_two_or_zero(buf_size));
256
257 layout->head_offset = offsetof(struct layout, head);
258 layout->tail_offset = offsetof(struct layout, tail);
259 layout->status_offset = offsetof(struct layout, status);
260
261 layout->buffer_offset = offsetof(struct layout, buffer);
262 layout->buffer_size = buf_size;
263
264 layout->extra_offset = layout->buffer_offset + layout->buffer_size;
265 layout->extra_size = extra_size;
266
267 layout->shmem_size = layout->extra_offset + layout->extra_size;
268 }
269
270 struct vn_ring *
vn_ring_create(struct vn_instance * instance,const struct vn_ring_layout * layout,uint8_t direct_order,bool is_tls_ring)271 vn_ring_create(struct vn_instance *instance,
272 const struct vn_ring_layout *layout,
273 uint8_t direct_order,
274 bool is_tls_ring)
275 {
276 VN_TRACE_FUNC();
277
278 const VkAllocationCallbacks *alloc = &instance->base.base.alloc;
279
280 struct vn_ring *ring = vk_zalloc(alloc, sizeof(*ring), VN_DEFAULT_ALIGN,
281 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
282 if (!ring)
283 return NULL;
284
285 ring->id = (uintptr_t)ring;
286 ring->instance = instance;
287 ring->shmem =
288 vn_renderer_shmem_create(instance->renderer, layout->shmem_size);
289 if (!ring->shmem) {
290 if (VN_DEBUG(INIT))
291 vn_log(instance, "failed to allocate/map ring shmem");
292 vk_free(alloc, ring);
293 return NULL;
294 }
295
296 void *shared = ring->shmem->mmap_ptr;
297 memset(shared, 0, layout->shmem_size);
298
299 assert(layout->buffer_size &&
300 util_is_power_of_two_or_zero(layout->buffer_size));
301 ring->buffer_size = layout->buffer_size;
302 ring->buffer_mask = ring->buffer_size - 1;
303
304 ring->shared.head = shared + layout->head_offset;
305 ring->shared.tail = shared + layout->tail_offset;
306 ring->shared.status = shared + layout->status_offset;
307 ring->shared.buffer = shared + layout->buffer_offset;
308 ring->shared.extra = shared + layout->extra_offset;
309
310 mtx_init(&ring->mutex, mtx_plain);
311
312 ring->direct_size = layout->buffer_size >> direct_order;
313 assert(ring->direct_size);
314
315 vn_cs_encoder_init(&ring->upload, instance,
316 VN_CS_ENCODER_STORAGE_SHMEM_ARRAY, 1 * 1024 * 1024);
317
318 list_inithead(&ring->submits);
319 list_inithead(&ring->free_submits);
320
321 mtx_init(&ring->roundtrip_mutex, mtx_plain);
322 ring->roundtrip_next = 1;
323
324 /* VkRingPriorityInfoMESA support requires
325 * VK_MESA_VENUS_PROTOCOL_SPEC_VERSION >= 2 */
326 int prio = 0;
327 bool ring_priority = false;
328 if (instance->renderer->info.vk_mesa_venus_protocol_spec_version >= 2) {
329 errno = 0;
330 prio = getpriority(PRIO_PROCESS, 0);
331 ring_priority = is_tls_ring && !(prio == -1 && errno);
332 }
333 const struct VkRingPriorityInfoMESA priority_info = {
334 .sType = VK_STRUCTURE_TYPE_RING_PRIORITY_INFO_MESA,
335 .priority = prio,
336 };
337 const struct VkRingMonitorInfoMESA monitor_info = {
338 .sType = VK_STRUCTURE_TYPE_RING_MONITOR_INFO_MESA,
339 .pNext = ring_priority ? &priority_info : NULL,
340 .maxReportingPeriodMicroseconds = VN_WATCHDOG_REPORT_PERIOD_US,
341 };
342 const struct VkRingCreateInfoMESA info = {
343 .sType = VK_STRUCTURE_TYPE_RING_CREATE_INFO_MESA,
344 .pNext = &monitor_info,
345 .resourceId = ring->shmem->res_id,
346 .size = layout->shmem_size,
347 .idleTimeout = VN_RING_IDLE_TIMEOUT_NS,
348 .headOffset = layout->head_offset,
349 .tailOffset = layout->tail_offset,
350 .statusOffset = layout->status_offset,
351 .bufferOffset = layout->buffer_offset,
352 .bufferSize = layout->buffer_size,
353 .extraOffset = layout->extra_offset,
354 .extraSize = layout->extra_size,
355 };
356
357 uint32_t create_ring_data[64];
358 struct vn_cs_encoder local_enc = VN_CS_ENCODER_INITIALIZER_LOCAL(
359 create_ring_data, sizeof(create_ring_data));
360 vn_encode_vkCreateRingMESA(&local_enc, 0, ring->id, &info);
361 vn_renderer_submit_simple(instance->renderer, create_ring_data,
362 vn_cs_encoder_get_len(&local_enc));
363
364 return ring;
365 }
366
367 void
vn_ring_destroy(struct vn_ring * ring)368 vn_ring_destroy(struct vn_ring *ring)
369 {
370 VN_TRACE_FUNC();
371
372 const VkAllocationCallbacks *alloc = &ring->instance->base.base.alloc;
373
374 uint32_t destroy_ring_data[4];
375 struct vn_cs_encoder local_enc = VN_CS_ENCODER_INITIALIZER_LOCAL(
376 destroy_ring_data, sizeof(destroy_ring_data));
377 vn_encode_vkDestroyRingMESA(&local_enc, 0, ring->id);
378 vn_renderer_submit_simple(ring->instance->renderer, destroy_ring_data,
379 vn_cs_encoder_get_len(&local_enc));
380
381 mtx_destroy(&ring->roundtrip_mutex);
382
383 vn_ring_retire_submits(ring, ring->cur);
384 assert(list_is_empty(&ring->submits));
385
386 list_for_each_entry_safe(struct vn_ring_submit, submit,
387 &ring->free_submits, head)
388 free(submit);
389
390 vn_cs_encoder_fini(&ring->upload);
391 vn_renderer_shmem_unref(ring->instance->renderer, ring->shmem);
392
393 mtx_destroy(&ring->mutex);
394
395 vk_free(alloc, ring);
396 }
397
398 uint64_t
vn_ring_get_id(struct vn_ring * ring)399 vn_ring_get_id(struct vn_ring *ring)
400 {
401 return ring->id;
402 }
403
404 static struct vn_ring_submit *
vn_ring_get_submit(struct vn_ring * ring,uint32_t shmem_count)405 vn_ring_get_submit(struct vn_ring *ring, uint32_t shmem_count)
406 {
407 const uint32_t min_shmem_count = 2;
408 struct vn_ring_submit *submit;
409
410 /* TODO this could be simplified if we could omit shmem_count */
411 if (shmem_count <= min_shmem_count &&
412 !list_is_empty(&ring->free_submits)) {
413 submit =
414 list_first_entry(&ring->free_submits, struct vn_ring_submit, head);
415 list_del(&submit->head);
416 } else {
417 const size_t submit_size = offsetof(
418 struct vn_ring_submit, shmems[MAX2(shmem_count, min_shmem_count)]);
419 submit = malloc(submit_size);
420 }
421
422 return submit;
423 }
424
425 static bool
vn_ring_submit_internal(struct vn_ring * ring,struct vn_ring_submit * submit,const struct vn_cs_encoder * cs,uint32_t * seqno)426 vn_ring_submit_internal(struct vn_ring *ring,
427 struct vn_ring_submit *submit,
428 const struct vn_cs_encoder *cs,
429 uint32_t *seqno)
430 {
431 /* write cs to the ring */
432 assert(!vn_cs_encoder_is_empty(cs));
433
434 /* avoid -Wmaybe-unitialized */
435 uint32_t cur_seqno = 0;
436
437 for (uint32_t i = 0; i < cs->buffer_count; i++) {
438 const struct vn_cs_encoder_buffer *buf = &cs->buffers[i];
439 cur_seqno = vn_ring_wait_space(ring, buf->committed_size);
440 vn_ring_write_buffer(ring, buf->base, buf->committed_size);
441 }
442
443 vn_ring_store_tail(ring);
444 const VkRingStatusFlagsMESA status = vn_ring_load_status(ring);
445 if (status & VK_RING_STATUS_FATAL_BIT_MESA) {
446 vn_log(NULL, "vn_ring_submit abort on fatal");
447 abort();
448 }
449
450 vn_ring_retire_submits(ring, cur_seqno);
451
452 submit->seqno = ring->cur;
453 list_addtail(&submit->head, &ring->submits);
454
455 *seqno = submit->seqno;
456
457 /* Notify renderer to wake up idle ring if at least VN_RING_IDLE_TIMEOUT_NS
458 * has passed since the last sent notification to avoid excessive wake up
459 * calls (non-trivial since submitted via virtio-gpu kernel).
460 */
461 if (status & VK_RING_STATUS_IDLE_BIT_MESA) {
462 const int64_t now = os_time_get_nano();
463 if (os_time_timeout(ring->last_notify, ring->next_notify, now)) {
464 ring->last_notify = now;
465 ring->next_notify = now + VN_RING_IDLE_TIMEOUT_NS;
466 return true;
467 }
468 }
469 return false;
470 }
471
472 static const struct vn_cs_encoder *
vn_ring_submission_get_cs(struct vn_ring_submission * submit,const struct vn_cs_encoder * cs,bool direct)473 vn_ring_submission_get_cs(struct vn_ring_submission *submit,
474 const struct vn_cs_encoder *cs,
475 bool direct)
476 {
477 if (direct)
478 return cs;
479
480 STACK_ARRAY(VkCommandStreamDescriptionMESA, descs, cs->buffer_count);
481
482 uint32_t desc_count = 0;
483 for (uint32_t i = 0; i < cs->buffer_count; i++) {
484 const struct vn_cs_encoder_buffer *buf = &cs->buffers[i];
485 if (buf->committed_size) {
486 descs[desc_count++] = (VkCommandStreamDescriptionMESA){
487 .resourceId = buf->shmem->res_id,
488 .offset = buf->offset,
489 .size = buf->committed_size,
490 };
491 }
492 }
493
494 const size_t exec_size = vn_sizeof_vkExecuteCommandStreamsMESA(
495 desc_count, descs, NULL, 0, NULL, 0);
496 void *exec_data = submit->indirect.data;
497 if (exec_size > sizeof(submit->indirect.data)) {
498 exec_data = malloc(exec_size);
499 if (!exec_data) {
500 STACK_ARRAY_FINISH(descs);
501 return NULL;
502 }
503 }
504
505 submit->indirect.buffer = VN_CS_ENCODER_BUFFER_INITIALIZER(exec_data);
506 submit->indirect.cs =
507 VN_CS_ENCODER_INITIALIZER(&submit->indirect.buffer, exec_size);
508 vn_encode_vkExecuteCommandStreamsMESA(&submit->indirect.cs, 0, desc_count,
509 descs, NULL, 0, NULL, 0);
510 vn_cs_encoder_commit(&submit->indirect.cs);
511
512 STACK_ARRAY_FINISH(descs);
513
514 return &submit->indirect.cs;
515 }
516
517 static struct vn_ring_submit *
vn_ring_submission_get_ring_submit(struct vn_ring * ring,const struct vn_cs_encoder * cs,struct vn_renderer_shmem * extra_shmem,bool direct)518 vn_ring_submission_get_ring_submit(struct vn_ring *ring,
519 const struct vn_cs_encoder *cs,
520 struct vn_renderer_shmem *extra_shmem,
521 bool direct)
522 {
523 struct vn_renderer *renderer = ring->instance->renderer;
524 const uint32_t shmem_count =
525 (direct ? 0 : cs->buffer_count) + (extra_shmem ? 1 : 0);
526 struct vn_ring_submit *submit = vn_ring_get_submit(ring, shmem_count);
527 if (!submit)
528 return NULL;
529
530 submit->shmem_count = shmem_count;
531 if (!direct) {
532 for (uint32_t i = 0; i < cs->buffer_count; i++) {
533 submit->shmems[i] =
534 vn_renderer_shmem_ref(renderer, cs->buffers[i].shmem);
535 }
536 }
537 if (extra_shmem) {
538 submit->shmems[shmem_count - 1] =
539 vn_renderer_shmem_ref(renderer, extra_shmem);
540 }
541
542 return submit;
543 }
544
545 static inline void
vn_ring_submission_cleanup(struct vn_ring_submission * submit)546 vn_ring_submission_cleanup(struct vn_ring_submission *submit)
547 {
548 if (submit->cs == &submit->indirect.cs &&
549 submit->indirect.buffer.base != submit->indirect.data)
550 free(submit->indirect.buffer.base);
551 }
552
553 static VkResult
vn_ring_submission_prepare(struct vn_ring * ring,struct vn_ring_submission * submit,const struct vn_cs_encoder * cs,struct vn_renderer_shmem * extra_shmem,bool direct)554 vn_ring_submission_prepare(struct vn_ring *ring,
555 struct vn_ring_submission *submit,
556 const struct vn_cs_encoder *cs,
557 struct vn_renderer_shmem *extra_shmem,
558 bool direct)
559 {
560 submit->cs = vn_ring_submission_get_cs(submit, cs, direct);
561 if (!submit->cs)
562 return VK_ERROR_OUT_OF_HOST_MEMORY;
563
564 submit->submit =
565 vn_ring_submission_get_ring_submit(ring, cs, extra_shmem, direct);
566 if (!submit->submit) {
567 vn_ring_submission_cleanup(submit);
568 return VK_ERROR_OUT_OF_HOST_MEMORY;
569 }
570
571 return VK_SUCCESS;
572 }
573
574 static inline bool
vn_ring_submission_can_direct(const struct vn_ring * ring,const struct vn_cs_encoder * cs)575 vn_ring_submission_can_direct(const struct vn_ring *ring,
576 const struct vn_cs_encoder *cs)
577 {
578 return vn_cs_encoder_get_len(cs) <= ring->direct_size;
579 }
580
581 static struct vn_cs_encoder *
vn_ring_cs_upload_locked(struct vn_ring * ring,const struct vn_cs_encoder * cs)582 vn_ring_cs_upload_locked(struct vn_ring *ring, const struct vn_cs_encoder *cs)
583 {
584 VN_TRACE_FUNC();
585 assert(cs->storage_type == VN_CS_ENCODER_STORAGE_POINTER &&
586 cs->buffer_count == 1);
587 const void *cs_data = cs->buffers[0].base;
588 const size_t cs_size = cs->total_committed_size;
589 assert(cs_size == vn_cs_encoder_get_len(cs));
590
591 struct vn_cs_encoder *upload = &ring->upload;
592 vn_cs_encoder_reset(upload);
593
594 if (!vn_cs_encoder_reserve(upload, cs_size))
595 return NULL;
596
597 vn_cs_encoder_write(upload, cs_size, cs_data, cs_size);
598 vn_cs_encoder_commit(upload);
599
600 if (vn_cs_encoder_needs_roundtrip(upload))
601 vn_ring_roundtrip(ring);
602
603 return upload;
604 }
605
606 static VkResult
vn_ring_submit_locked(struct vn_ring * ring,const struct vn_cs_encoder * cs,struct vn_renderer_shmem * extra_shmem,uint32_t * ring_seqno)607 vn_ring_submit_locked(struct vn_ring *ring,
608 const struct vn_cs_encoder *cs,
609 struct vn_renderer_shmem *extra_shmem,
610 uint32_t *ring_seqno)
611 {
612 const bool direct = vn_ring_submission_can_direct(ring, cs);
613 if (!direct && cs->storage_type == VN_CS_ENCODER_STORAGE_POINTER) {
614 cs = vn_ring_cs_upload_locked(ring, cs);
615 if (!cs)
616 return VK_ERROR_OUT_OF_HOST_MEMORY;
617 assert(cs->storage_type != VN_CS_ENCODER_STORAGE_POINTER);
618 }
619
620 struct vn_ring_submission submit;
621 VkResult result =
622 vn_ring_submission_prepare(ring, &submit, cs, extra_shmem, direct);
623 if (result != VK_SUCCESS)
624 return result;
625
626 uint32_t seqno;
627 const bool notify =
628 vn_ring_submit_internal(ring, submit.submit, submit.cs, &seqno);
629 if (notify) {
630 uint32_t notify_ring_data[8];
631 struct vn_cs_encoder local_enc = VN_CS_ENCODER_INITIALIZER_LOCAL(
632 notify_ring_data, sizeof(notify_ring_data));
633 vn_encode_vkNotifyRingMESA(&local_enc, 0, ring->id, seqno, 0);
634 vn_renderer_submit_simple(ring->instance->renderer, notify_ring_data,
635 vn_cs_encoder_get_len(&local_enc));
636 }
637
638 vn_ring_submission_cleanup(&submit);
639
640 if (ring_seqno)
641 *ring_seqno = seqno;
642
643 return VK_SUCCESS;
644 }
645
646 VkResult
vn_ring_submit_command_simple(struct vn_ring * ring,const struct vn_cs_encoder * cs)647 vn_ring_submit_command_simple(struct vn_ring *ring,
648 const struct vn_cs_encoder *cs)
649 {
650 mtx_lock(&ring->mutex);
651 VkResult result = vn_ring_submit_locked(ring, cs, NULL, NULL);
652 mtx_unlock(&ring->mutex);
653
654 return result;
655 }
656
657 static inline void
vn_ring_set_reply_shmem_locked(struct vn_ring * ring,struct vn_renderer_shmem * shmem,size_t offset,size_t size)658 vn_ring_set_reply_shmem_locked(struct vn_ring *ring,
659 struct vn_renderer_shmem *shmem,
660 size_t offset,
661 size_t size)
662 {
663
664 uint32_t set_reply_command_stream_data[16];
665 struct vn_cs_encoder local_enc = VN_CS_ENCODER_INITIALIZER_LOCAL(
666 set_reply_command_stream_data, sizeof(set_reply_command_stream_data));
667 const struct VkCommandStreamDescriptionMESA stream = {
668 .resourceId = shmem->res_id,
669 .offset = offset,
670 .size = size,
671 };
672 vn_encode_vkSetReplyCommandStreamMESA(&local_enc, 0, &stream);
673 vn_cs_encoder_commit(&local_enc);
674 vn_ring_submit_locked(ring, &local_enc, NULL, NULL);
675 }
676
677 void
vn_ring_submit_command(struct vn_ring * ring,struct vn_ring_submit_command * submit)678 vn_ring_submit_command(struct vn_ring *ring,
679 struct vn_ring_submit_command *submit)
680 {
681 assert(!vn_cs_encoder_is_empty(&submit->command));
682
683 vn_cs_encoder_commit(&submit->command);
684
685 size_t reply_offset = 0;
686 if (submit->reply_size) {
687 submit->reply_shmem = vn_instance_reply_shmem_alloc(
688 ring->instance, submit->reply_size, &reply_offset);
689 if (!submit->reply_shmem)
690 return;
691
692 if (ring->instance->renderer->info.has_guest_vram &&
693 !submit->reply_shmem->cache_timestamp)
694 vn_ring_roundtrip(ring);
695 }
696
697 mtx_lock(&ring->mutex);
698 if (submit->reply_size) {
699 vn_ring_set_reply_shmem_locked(ring, submit->reply_shmem, reply_offset,
700 submit->reply_size);
701 }
702 submit->ring_seqno_valid =
703 VK_SUCCESS == vn_ring_submit_locked(ring, &submit->command,
704 submit->reply_shmem,
705 &submit->ring_seqno);
706 mtx_unlock(&ring->mutex);
707
708 if (submit->reply_size) {
709 if (likely(submit->ring_seqno_valid)) {
710 void *reply_ptr = submit->reply_shmem->mmap_ptr + reply_offset;
711 submit->reply =
712 VN_CS_DECODER_INITIALIZER(reply_ptr, submit->reply_size);
713 vn_ring_wait_seqno(ring, submit->ring_seqno);
714 } else {
715 vn_renderer_shmem_unref(ring->instance->renderer,
716 submit->reply_shmem);
717 submit->reply_shmem = NULL;
718 }
719 }
720 }
721
722 void
vn_ring_free_command_reply(struct vn_ring * ring,struct vn_ring_submit_command * submit)723 vn_ring_free_command_reply(struct vn_ring *ring,
724 struct vn_ring_submit_command *submit)
725 {
726 assert(submit->reply_shmem);
727 vn_renderer_shmem_unref(ring->instance->renderer, submit->reply_shmem);
728 }
729
730 VkResult
vn_ring_submit_roundtrip(struct vn_ring * ring,uint64_t * roundtrip_seqno)731 vn_ring_submit_roundtrip(struct vn_ring *ring, uint64_t *roundtrip_seqno)
732 {
733 uint32_t local_data[8];
734 struct vn_cs_encoder local_enc =
735 VN_CS_ENCODER_INITIALIZER_LOCAL(local_data, sizeof(local_data));
736
737 mtx_lock(&ring->roundtrip_mutex);
738 const uint64_t seqno = ring->roundtrip_next++;
739 vn_encode_vkSubmitVirtqueueSeqnoMESA(&local_enc, 0, ring->id, seqno);
740 VkResult result =
741 vn_renderer_submit_simple(ring->instance->renderer, local_data,
742 vn_cs_encoder_get_len(&local_enc));
743 mtx_unlock(&ring->roundtrip_mutex);
744
745 *roundtrip_seqno = seqno;
746 return result;
747 }
748
749 void
vn_ring_wait_roundtrip(struct vn_ring * ring,uint64_t roundtrip_seqno)750 vn_ring_wait_roundtrip(struct vn_ring *ring, uint64_t roundtrip_seqno)
751 {
752 vn_async_vkWaitVirtqueueSeqnoMESA(ring, roundtrip_seqno);
753 }
754