xref: /aosp_15_r20/external/mesa3d/src/virtio/vulkan/vn_ring.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2021 Google LLC
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "vn_ring.h"
7 
8 #include <sys/resource.h>
9 
10 #include "venus-protocol/vn_protocol_driver_transport.h"
11 
12 #include "vn_cs.h"
13 #include "vn_instance.h"
14 #include "vn_renderer.h"
15 
16 #define VN_RING_IDLE_TIMEOUT_NS (1ull * 1000 * 1000)
17 
18 static_assert(ATOMIC_INT_LOCK_FREE == 2 && sizeof(atomic_uint) == 4,
19               "vn_ring_shared requires lock-free 32-bit atomic_uint");
20 
21 /* pointers to a ring in a BO */
22 struct vn_ring_shared {
23    const volatile atomic_uint *head;
24    volatile atomic_uint *tail;
25    volatile atomic_uint *status;
26    void *buffer;
27    void *extra;
28 };
29 
30 struct vn_ring {
31    uint64_t id;
32    struct vn_instance *instance;
33    struct vn_renderer_shmem *shmem;
34 
35    uint32_t buffer_size;
36    uint32_t buffer_mask;
37 
38    struct vn_ring_shared shared;
39    uint32_t cur;
40 
41    /* This mutex ensures below:
42     * - atomic of ring submission
43     * - reply shmem resource set and ring submission are paired
44     */
45    mtx_t mutex;
46 
47    /* size limit for cmd submission via ring shmem, derived from
48     * (buffer_size >> direct_order) upon vn_ring_create
49     */
50    uint32_t direct_size;
51 
52    /* used for indirect submission of large command (non-VkCommandBuffer) */
53    struct vn_cs_encoder upload;
54 
55    struct list_head submits;
56    struct list_head free_submits;
57 
58    /* to synchronize renderer/ring */
59    mtx_t roundtrip_mutex;
60    uint64_t roundtrip_next;
61 
62    int64_t last_notify;
63    int64_t next_notify;
64 };
65 
66 struct vn_ring_submit {
67    uint32_t seqno;
68 
69    struct list_head head;
70 
71    /* BOs to keep alive (TODO make sure shmems are pinned) */
72    uint32_t shmem_count;
73    struct vn_renderer_shmem *shmems[];
74 };
75 
76 struct vn_ring_submission {
77    const struct vn_cs_encoder *cs;
78    struct vn_ring_submit *submit;
79 
80    struct {
81       struct vn_cs_encoder cs;
82       struct vn_cs_encoder_buffer buffer;
83       uint32_t data[64];
84    } indirect;
85 };
86 
87 static uint32_t
vn_ring_load_head(const struct vn_ring * ring)88 vn_ring_load_head(const struct vn_ring *ring)
89 {
90    /* the renderer is expected to store the head with memory_order_release,
91     * forming a release-acquire ordering
92     */
93    return atomic_load_explicit(ring->shared.head, memory_order_acquire);
94 }
95 
96 static void
vn_ring_store_tail(struct vn_ring * ring)97 vn_ring_store_tail(struct vn_ring *ring)
98 {
99    /* the renderer is expected to load the tail with memory_order_acquire,
100     * forming a release-acquire ordering
101     */
102    return atomic_store_explicit(ring->shared.tail, ring->cur,
103                                 memory_order_release);
104 }
105 
106 uint32_t
vn_ring_load_status(const struct vn_ring * ring)107 vn_ring_load_status(const struct vn_ring *ring)
108 {
109    /* must be called and ordered after vn_ring_store_tail for idle status */
110    return atomic_load_explicit(ring->shared.status, memory_order_seq_cst);
111 }
112 
113 void
vn_ring_unset_status_bits(struct vn_ring * ring,uint32_t mask)114 vn_ring_unset_status_bits(struct vn_ring *ring, uint32_t mask)
115 {
116    atomic_fetch_and_explicit(ring->shared.status, ~mask,
117                              memory_order_seq_cst);
118 }
119 
120 static void
vn_ring_write_buffer(struct vn_ring * ring,const void * data,uint32_t size)121 vn_ring_write_buffer(struct vn_ring *ring, const void *data, uint32_t size)
122 {
123    assert(ring->cur + size - vn_ring_load_head(ring) <= ring->buffer_size);
124 
125    const uint32_t offset = ring->cur & ring->buffer_mask;
126    if (offset + size <= ring->buffer_size) {
127       memcpy(ring->shared.buffer + offset, data, size);
128    } else {
129       const uint32_t s = ring->buffer_size - offset;
130       memcpy(ring->shared.buffer + offset, data, s);
131       memcpy(ring->shared.buffer, data + s, size - s);
132    }
133 
134    ring->cur += size;
135 }
136 
137 static bool
vn_ring_ge_seqno(const struct vn_ring * ring,uint32_t a,uint32_t b)138 vn_ring_ge_seqno(const struct vn_ring *ring, uint32_t a, uint32_t b)
139 {
140    /* this can return false negative when not called fast enough (e.g., when
141     * called once every couple hours), but following calls with larger a's
142     * will correct itself
143     *
144     * TODO use real seqnos?
145     */
146    if (a >= b)
147       return ring->cur >= a || ring->cur < b;
148    else
149       return ring->cur >= a && ring->cur < b;
150 }
151 
152 static void
vn_ring_retire_submits(struct vn_ring * ring,uint32_t seqno)153 vn_ring_retire_submits(struct vn_ring *ring, uint32_t seqno)
154 {
155    struct vn_renderer *renderer = ring->instance->renderer;
156    list_for_each_entry_safe(struct vn_ring_submit, submit, &ring->submits,
157                             head) {
158       if (!vn_ring_ge_seqno(ring, seqno, submit->seqno))
159          break;
160 
161       for (uint32_t i = 0; i < submit->shmem_count; i++)
162          vn_renderer_shmem_unref(renderer, submit->shmems[i]);
163 
164       list_move_to(&submit->head, &ring->free_submits);
165    }
166 }
167 
168 bool
vn_ring_get_seqno_status(struct vn_ring * ring,uint32_t seqno)169 vn_ring_get_seqno_status(struct vn_ring *ring, uint32_t seqno)
170 {
171    return vn_ring_ge_seqno(ring, vn_ring_load_head(ring), seqno);
172 }
173 
174 static void
vn_ring_wait_seqno(struct vn_ring * ring,uint32_t seqno)175 vn_ring_wait_seqno(struct vn_ring *ring, uint32_t seqno)
176 {
177    /* A renderer wait incurs several hops and the renderer might poll
178     * repeatedly anyway.  Let's just poll here.
179     */
180    const enum vn_relax_reason reason = ring == ring->instance->ring.ring
181                                           ? VN_RELAX_REASON_RING_SEQNO
182                                           : VN_RELAX_REASON_TLS_RING_SEQNO;
183    struct vn_relax_state relax_state = vn_relax_init(ring->instance, reason);
184    do {
185       if (vn_ring_get_seqno_status(ring, seqno)) {
186          vn_relax_fini(&relax_state);
187          return;
188       }
189       vn_relax(&relax_state);
190    } while (true);
191 }
192 
193 void
vn_ring_wait_all(struct vn_ring * ring)194 vn_ring_wait_all(struct vn_ring *ring)
195 {
196    /* load from tail rather than ring->cur for atomicity */
197    const uint32_t pending_seqno =
198       atomic_load_explicit(ring->shared.tail, memory_order_relaxed);
199    vn_ring_wait_seqno(ring, pending_seqno);
200 }
201 
202 static bool
vn_ring_has_space(const struct vn_ring * ring,uint32_t size,uint32_t * out_head)203 vn_ring_has_space(const struct vn_ring *ring,
204                   uint32_t size,
205                   uint32_t *out_head)
206 {
207    const uint32_t head = vn_ring_load_head(ring);
208    if (likely(ring->cur + size - head <= ring->buffer_size)) {
209       *out_head = head;
210       return true;
211    }
212 
213    return false;
214 }
215 
216 static uint32_t
vn_ring_wait_space(struct vn_ring * ring,uint32_t size)217 vn_ring_wait_space(struct vn_ring *ring, uint32_t size)
218 {
219    assert(size <= ring->buffer_size);
220 
221    uint32_t head;
222    if (likely(vn_ring_has_space(ring, size, &head)))
223       return head;
224 
225    {
226       VN_TRACE_FUNC();
227 
228       /* see the reasoning in vn_ring_wait_seqno */
229       struct vn_relax_state relax_state =
230          vn_relax_init(ring->instance, VN_RELAX_REASON_RING_SPACE);
231       do {
232          vn_relax(&relax_state);
233          if (vn_ring_has_space(ring, size, &head)) {
234             vn_relax_fini(&relax_state);
235             return head;
236          }
237       } while (true);
238    }
239 }
240 
241 void
vn_ring_get_layout(size_t buf_size,size_t extra_size,struct vn_ring_layout * layout)242 vn_ring_get_layout(size_t buf_size,
243                    size_t extra_size,
244                    struct vn_ring_layout *layout)
245 {
246    /* this can be changed/extended quite freely */
247    struct layout {
248       alignas(64) uint32_t head;
249       alignas(64) uint32_t tail;
250       alignas(64) uint32_t status;
251 
252       alignas(64) uint8_t buffer[];
253    };
254 
255    assert(buf_size && util_is_power_of_two_or_zero(buf_size));
256 
257    layout->head_offset = offsetof(struct layout, head);
258    layout->tail_offset = offsetof(struct layout, tail);
259    layout->status_offset = offsetof(struct layout, status);
260 
261    layout->buffer_offset = offsetof(struct layout, buffer);
262    layout->buffer_size = buf_size;
263 
264    layout->extra_offset = layout->buffer_offset + layout->buffer_size;
265    layout->extra_size = extra_size;
266 
267    layout->shmem_size = layout->extra_offset + layout->extra_size;
268 }
269 
270 struct vn_ring *
vn_ring_create(struct vn_instance * instance,const struct vn_ring_layout * layout,uint8_t direct_order,bool is_tls_ring)271 vn_ring_create(struct vn_instance *instance,
272                const struct vn_ring_layout *layout,
273                uint8_t direct_order,
274                bool is_tls_ring)
275 {
276    VN_TRACE_FUNC();
277 
278    const VkAllocationCallbacks *alloc = &instance->base.base.alloc;
279 
280    struct vn_ring *ring = vk_zalloc(alloc, sizeof(*ring), VN_DEFAULT_ALIGN,
281                                     VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
282    if (!ring)
283       return NULL;
284 
285    ring->id = (uintptr_t)ring;
286    ring->instance = instance;
287    ring->shmem =
288       vn_renderer_shmem_create(instance->renderer, layout->shmem_size);
289    if (!ring->shmem) {
290       if (VN_DEBUG(INIT))
291          vn_log(instance, "failed to allocate/map ring shmem");
292       vk_free(alloc, ring);
293       return NULL;
294    }
295 
296    void *shared = ring->shmem->mmap_ptr;
297    memset(shared, 0, layout->shmem_size);
298 
299    assert(layout->buffer_size &&
300           util_is_power_of_two_or_zero(layout->buffer_size));
301    ring->buffer_size = layout->buffer_size;
302    ring->buffer_mask = ring->buffer_size - 1;
303 
304    ring->shared.head = shared + layout->head_offset;
305    ring->shared.tail = shared + layout->tail_offset;
306    ring->shared.status = shared + layout->status_offset;
307    ring->shared.buffer = shared + layout->buffer_offset;
308    ring->shared.extra = shared + layout->extra_offset;
309 
310    mtx_init(&ring->mutex, mtx_plain);
311 
312    ring->direct_size = layout->buffer_size >> direct_order;
313    assert(ring->direct_size);
314 
315    vn_cs_encoder_init(&ring->upload, instance,
316                       VN_CS_ENCODER_STORAGE_SHMEM_ARRAY, 1 * 1024 * 1024);
317 
318    list_inithead(&ring->submits);
319    list_inithead(&ring->free_submits);
320 
321    mtx_init(&ring->roundtrip_mutex, mtx_plain);
322    ring->roundtrip_next = 1;
323 
324    /* VkRingPriorityInfoMESA support requires
325     * VK_MESA_VENUS_PROTOCOL_SPEC_VERSION >= 2  */
326    int prio = 0;
327    bool ring_priority = false;
328    if (instance->renderer->info.vk_mesa_venus_protocol_spec_version >= 2) {
329       errno = 0;
330       prio = getpriority(PRIO_PROCESS, 0);
331       ring_priority = is_tls_ring && !(prio == -1 && errno);
332    }
333    const struct VkRingPriorityInfoMESA priority_info = {
334       .sType = VK_STRUCTURE_TYPE_RING_PRIORITY_INFO_MESA,
335       .priority = prio,
336    };
337    const struct VkRingMonitorInfoMESA monitor_info = {
338       .sType = VK_STRUCTURE_TYPE_RING_MONITOR_INFO_MESA,
339       .pNext = ring_priority ? &priority_info : NULL,
340       .maxReportingPeriodMicroseconds = VN_WATCHDOG_REPORT_PERIOD_US,
341    };
342    const struct VkRingCreateInfoMESA info = {
343       .sType = VK_STRUCTURE_TYPE_RING_CREATE_INFO_MESA,
344       .pNext = &monitor_info,
345       .resourceId = ring->shmem->res_id,
346       .size = layout->shmem_size,
347       .idleTimeout = VN_RING_IDLE_TIMEOUT_NS,
348       .headOffset = layout->head_offset,
349       .tailOffset = layout->tail_offset,
350       .statusOffset = layout->status_offset,
351       .bufferOffset = layout->buffer_offset,
352       .bufferSize = layout->buffer_size,
353       .extraOffset = layout->extra_offset,
354       .extraSize = layout->extra_size,
355    };
356 
357    uint32_t create_ring_data[64];
358    struct vn_cs_encoder local_enc = VN_CS_ENCODER_INITIALIZER_LOCAL(
359       create_ring_data, sizeof(create_ring_data));
360    vn_encode_vkCreateRingMESA(&local_enc, 0, ring->id, &info);
361    vn_renderer_submit_simple(instance->renderer, create_ring_data,
362                              vn_cs_encoder_get_len(&local_enc));
363 
364    return ring;
365 }
366 
367 void
vn_ring_destroy(struct vn_ring * ring)368 vn_ring_destroy(struct vn_ring *ring)
369 {
370    VN_TRACE_FUNC();
371 
372    const VkAllocationCallbacks *alloc = &ring->instance->base.base.alloc;
373 
374    uint32_t destroy_ring_data[4];
375    struct vn_cs_encoder local_enc = VN_CS_ENCODER_INITIALIZER_LOCAL(
376       destroy_ring_data, sizeof(destroy_ring_data));
377    vn_encode_vkDestroyRingMESA(&local_enc, 0, ring->id);
378    vn_renderer_submit_simple(ring->instance->renderer, destroy_ring_data,
379                              vn_cs_encoder_get_len(&local_enc));
380 
381    mtx_destroy(&ring->roundtrip_mutex);
382 
383    vn_ring_retire_submits(ring, ring->cur);
384    assert(list_is_empty(&ring->submits));
385 
386    list_for_each_entry_safe(struct vn_ring_submit, submit,
387                             &ring->free_submits, head)
388       free(submit);
389 
390    vn_cs_encoder_fini(&ring->upload);
391    vn_renderer_shmem_unref(ring->instance->renderer, ring->shmem);
392 
393    mtx_destroy(&ring->mutex);
394 
395    vk_free(alloc, ring);
396 }
397 
398 uint64_t
vn_ring_get_id(struct vn_ring * ring)399 vn_ring_get_id(struct vn_ring *ring)
400 {
401    return ring->id;
402 }
403 
404 static struct vn_ring_submit *
vn_ring_get_submit(struct vn_ring * ring,uint32_t shmem_count)405 vn_ring_get_submit(struct vn_ring *ring, uint32_t shmem_count)
406 {
407    const uint32_t min_shmem_count = 2;
408    struct vn_ring_submit *submit;
409 
410    /* TODO this could be simplified if we could omit shmem_count */
411    if (shmem_count <= min_shmem_count &&
412        !list_is_empty(&ring->free_submits)) {
413       submit =
414          list_first_entry(&ring->free_submits, struct vn_ring_submit, head);
415       list_del(&submit->head);
416    } else {
417       const size_t submit_size = offsetof(
418          struct vn_ring_submit, shmems[MAX2(shmem_count, min_shmem_count)]);
419       submit = malloc(submit_size);
420    }
421 
422    return submit;
423 }
424 
425 static bool
vn_ring_submit_internal(struct vn_ring * ring,struct vn_ring_submit * submit,const struct vn_cs_encoder * cs,uint32_t * seqno)426 vn_ring_submit_internal(struct vn_ring *ring,
427                         struct vn_ring_submit *submit,
428                         const struct vn_cs_encoder *cs,
429                         uint32_t *seqno)
430 {
431    /* write cs to the ring */
432    assert(!vn_cs_encoder_is_empty(cs));
433 
434    /* avoid -Wmaybe-unitialized */
435    uint32_t cur_seqno = 0;
436 
437    for (uint32_t i = 0; i < cs->buffer_count; i++) {
438       const struct vn_cs_encoder_buffer *buf = &cs->buffers[i];
439       cur_seqno = vn_ring_wait_space(ring, buf->committed_size);
440       vn_ring_write_buffer(ring, buf->base, buf->committed_size);
441    }
442 
443    vn_ring_store_tail(ring);
444    const VkRingStatusFlagsMESA status = vn_ring_load_status(ring);
445    if (status & VK_RING_STATUS_FATAL_BIT_MESA) {
446       vn_log(NULL, "vn_ring_submit abort on fatal");
447       abort();
448    }
449 
450    vn_ring_retire_submits(ring, cur_seqno);
451 
452    submit->seqno = ring->cur;
453    list_addtail(&submit->head, &ring->submits);
454 
455    *seqno = submit->seqno;
456 
457    /* Notify renderer to wake up idle ring if at least VN_RING_IDLE_TIMEOUT_NS
458     * has passed since the last sent notification to avoid excessive wake up
459     * calls (non-trivial since submitted via virtio-gpu kernel).
460     */
461    if (status & VK_RING_STATUS_IDLE_BIT_MESA) {
462       const int64_t now = os_time_get_nano();
463       if (os_time_timeout(ring->last_notify, ring->next_notify, now)) {
464          ring->last_notify = now;
465          ring->next_notify = now + VN_RING_IDLE_TIMEOUT_NS;
466          return true;
467       }
468    }
469    return false;
470 }
471 
472 static const struct vn_cs_encoder *
vn_ring_submission_get_cs(struct vn_ring_submission * submit,const struct vn_cs_encoder * cs,bool direct)473 vn_ring_submission_get_cs(struct vn_ring_submission *submit,
474                           const struct vn_cs_encoder *cs,
475                           bool direct)
476 {
477    if (direct)
478       return cs;
479 
480    STACK_ARRAY(VkCommandStreamDescriptionMESA, descs, cs->buffer_count);
481 
482    uint32_t desc_count = 0;
483    for (uint32_t i = 0; i < cs->buffer_count; i++) {
484       const struct vn_cs_encoder_buffer *buf = &cs->buffers[i];
485       if (buf->committed_size) {
486          descs[desc_count++] = (VkCommandStreamDescriptionMESA){
487             .resourceId = buf->shmem->res_id,
488             .offset = buf->offset,
489             .size = buf->committed_size,
490          };
491       }
492    }
493 
494    const size_t exec_size = vn_sizeof_vkExecuteCommandStreamsMESA(
495       desc_count, descs, NULL, 0, NULL, 0);
496    void *exec_data = submit->indirect.data;
497    if (exec_size > sizeof(submit->indirect.data)) {
498       exec_data = malloc(exec_size);
499       if (!exec_data) {
500          STACK_ARRAY_FINISH(descs);
501          return NULL;
502       }
503    }
504 
505    submit->indirect.buffer = VN_CS_ENCODER_BUFFER_INITIALIZER(exec_data);
506    submit->indirect.cs =
507       VN_CS_ENCODER_INITIALIZER(&submit->indirect.buffer, exec_size);
508    vn_encode_vkExecuteCommandStreamsMESA(&submit->indirect.cs, 0, desc_count,
509                                          descs, NULL, 0, NULL, 0);
510    vn_cs_encoder_commit(&submit->indirect.cs);
511 
512    STACK_ARRAY_FINISH(descs);
513 
514    return &submit->indirect.cs;
515 }
516 
517 static struct vn_ring_submit *
vn_ring_submission_get_ring_submit(struct vn_ring * ring,const struct vn_cs_encoder * cs,struct vn_renderer_shmem * extra_shmem,bool direct)518 vn_ring_submission_get_ring_submit(struct vn_ring *ring,
519                                    const struct vn_cs_encoder *cs,
520                                    struct vn_renderer_shmem *extra_shmem,
521                                    bool direct)
522 {
523    struct vn_renderer *renderer = ring->instance->renderer;
524    const uint32_t shmem_count =
525       (direct ? 0 : cs->buffer_count) + (extra_shmem ? 1 : 0);
526    struct vn_ring_submit *submit = vn_ring_get_submit(ring, shmem_count);
527    if (!submit)
528       return NULL;
529 
530    submit->shmem_count = shmem_count;
531    if (!direct) {
532       for (uint32_t i = 0; i < cs->buffer_count; i++) {
533          submit->shmems[i] =
534             vn_renderer_shmem_ref(renderer, cs->buffers[i].shmem);
535       }
536    }
537    if (extra_shmem) {
538       submit->shmems[shmem_count - 1] =
539          vn_renderer_shmem_ref(renderer, extra_shmem);
540    }
541 
542    return submit;
543 }
544 
545 static inline void
vn_ring_submission_cleanup(struct vn_ring_submission * submit)546 vn_ring_submission_cleanup(struct vn_ring_submission *submit)
547 {
548    if (submit->cs == &submit->indirect.cs &&
549        submit->indirect.buffer.base != submit->indirect.data)
550       free(submit->indirect.buffer.base);
551 }
552 
553 static VkResult
vn_ring_submission_prepare(struct vn_ring * ring,struct vn_ring_submission * submit,const struct vn_cs_encoder * cs,struct vn_renderer_shmem * extra_shmem,bool direct)554 vn_ring_submission_prepare(struct vn_ring *ring,
555                            struct vn_ring_submission *submit,
556                            const struct vn_cs_encoder *cs,
557                            struct vn_renderer_shmem *extra_shmem,
558                            bool direct)
559 {
560    submit->cs = vn_ring_submission_get_cs(submit, cs, direct);
561    if (!submit->cs)
562       return VK_ERROR_OUT_OF_HOST_MEMORY;
563 
564    submit->submit =
565       vn_ring_submission_get_ring_submit(ring, cs, extra_shmem, direct);
566    if (!submit->submit) {
567       vn_ring_submission_cleanup(submit);
568       return VK_ERROR_OUT_OF_HOST_MEMORY;
569    }
570 
571    return VK_SUCCESS;
572 }
573 
574 static inline bool
vn_ring_submission_can_direct(const struct vn_ring * ring,const struct vn_cs_encoder * cs)575 vn_ring_submission_can_direct(const struct vn_ring *ring,
576                               const struct vn_cs_encoder *cs)
577 {
578    return vn_cs_encoder_get_len(cs) <= ring->direct_size;
579 }
580 
581 static struct vn_cs_encoder *
vn_ring_cs_upload_locked(struct vn_ring * ring,const struct vn_cs_encoder * cs)582 vn_ring_cs_upload_locked(struct vn_ring *ring, const struct vn_cs_encoder *cs)
583 {
584    VN_TRACE_FUNC();
585    assert(cs->storage_type == VN_CS_ENCODER_STORAGE_POINTER &&
586           cs->buffer_count == 1);
587    const void *cs_data = cs->buffers[0].base;
588    const size_t cs_size = cs->total_committed_size;
589    assert(cs_size == vn_cs_encoder_get_len(cs));
590 
591    struct vn_cs_encoder *upload = &ring->upload;
592    vn_cs_encoder_reset(upload);
593 
594    if (!vn_cs_encoder_reserve(upload, cs_size))
595       return NULL;
596 
597    vn_cs_encoder_write(upload, cs_size, cs_data, cs_size);
598    vn_cs_encoder_commit(upload);
599 
600    if (vn_cs_encoder_needs_roundtrip(upload))
601       vn_ring_roundtrip(ring);
602 
603    return upload;
604 }
605 
606 static VkResult
vn_ring_submit_locked(struct vn_ring * ring,const struct vn_cs_encoder * cs,struct vn_renderer_shmem * extra_shmem,uint32_t * ring_seqno)607 vn_ring_submit_locked(struct vn_ring *ring,
608                       const struct vn_cs_encoder *cs,
609                       struct vn_renderer_shmem *extra_shmem,
610                       uint32_t *ring_seqno)
611 {
612    const bool direct = vn_ring_submission_can_direct(ring, cs);
613    if (!direct && cs->storage_type == VN_CS_ENCODER_STORAGE_POINTER) {
614       cs = vn_ring_cs_upload_locked(ring, cs);
615       if (!cs)
616          return VK_ERROR_OUT_OF_HOST_MEMORY;
617       assert(cs->storage_type != VN_CS_ENCODER_STORAGE_POINTER);
618    }
619 
620    struct vn_ring_submission submit;
621    VkResult result =
622       vn_ring_submission_prepare(ring, &submit, cs, extra_shmem, direct);
623    if (result != VK_SUCCESS)
624       return result;
625 
626    uint32_t seqno;
627    const bool notify =
628       vn_ring_submit_internal(ring, submit.submit, submit.cs, &seqno);
629    if (notify) {
630       uint32_t notify_ring_data[8];
631       struct vn_cs_encoder local_enc = VN_CS_ENCODER_INITIALIZER_LOCAL(
632          notify_ring_data, sizeof(notify_ring_data));
633       vn_encode_vkNotifyRingMESA(&local_enc, 0, ring->id, seqno, 0);
634       vn_renderer_submit_simple(ring->instance->renderer, notify_ring_data,
635                                 vn_cs_encoder_get_len(&local_enc));
636    }
637 
638    vn_ring_submission_cleanup(&submit);
639 
640    if (ring_seqno)
641       *ring_seqno = seqno;
642 
643    return VK_SUCCESS;
644 }
645 
646 VkResult
vn_ring_submit_command_simple(struct vn_ring * ring,const struct vn_cs_encoder * cs)647 vn_ring_submit_command_simple(struct vn_ring *ring,
648                               const struct vn_cs_encoder *cs)
649 {
650    mtx_lock(&ring->mutex);
651    VkResult result = vn_ring_submit_locked(ring, cs, NULL, NULL);
652    mtx_unlock(&ring->mutex);
653 
654    return result;
655 }
656 
657 static inline void
vn_ring_set_reply_shmem_locked(struct vn_ring * ring,struct vn_renderer_shmem * shmem,size_t offset,size_t size)658 vn_ring_set_reply_shmem_locked(struct vn_ring *ring,
659                                struct vn_renderer_shmem *shmem,
660                                size_t offset,
661                                size_t size)
662 {
663 
664    uint32_t set_reply_command_stream_data[16];
665    struct vn_cs_encoder local_enc = VN_CS_ENCODER_INITIALIZER_LOCAL(
666       set_reply_command_stream_data, sizeof(set_reply_command_stream_data));
667    const struct VkCommandStreamDescriptionMESA stream = {
668       .resourceId = shmem->res_id,
669       .offset = offset,
670       .size = size,
671    };
672    vn_encode_vkSetReplyCommandStreamMESA(&local_enc, 0, &stream);
673    vn_cs_encoder_commit(&local_enc);
674    vn_ring_submit_locked(ring, &local_enc, NULL, NULL);
675 }
676 
677 void
vn_ring_submit_command(struct vn_ring * ring,struct vn_ring_submit_command * submit)678 vn_ring_submit_command(struct vn_ring *ring,
679                        struct vn_ring_submit_command *submit)
680 {
681    assert(!vn_cs_encoder_is_empty(&submit->command));
682 
683    vn_cs_encoder_commit(&submit->command);
684 
685    size_t reply_offset = 0;
686    if (submit->reply_size) {
687       submit->reply_shmem = vn_instance_reply_shmem_alloc(
688          ring->instance, submit->reply_size, &reply_offset);
689       if (!submit->reply_shmem)
690          return;
691 
692       if (ring->instance->renderer->info.has_guest_vram &&
693           !submit->reply_shmem->cache_timestamp)
694          vn_ring_roundtrip(ring);
695    }
696 
697    mtx_lock(&ring->mutex);
698    if (submit->reply_size) {
699       vn_ring_set_reply_shmem_locked(ring, submit->reply_shmem, reply_offset,
700                                      submit->reply_size);
701    }
702    submit->ring_seqno_valid =
703       VK_SUCCESS == vn_ring_submit_locked(ring, &submit->command,
704                                           submit->reply_shmem,
705                                           &submit->ring_seqno);
706    mtx_unlock(&ring->mutex);
707 
708    if (submit->reply_size) {
709       if (likely(submit->ring_seqno_valid)) {
710          void *reply_ptr = submit->reply_shmem->mmap_ptr + reply_offset;
711          submit->reply =
712             VN_CS_DECODER_INITIALIZER(reply_ptr, submit->reply_size);
713          vn_ring_wait_seqno(ring, submit->ring_seqno);
714       } else {
715          vn_renderer_shmem_unref(ring->instance->renderer,
716                                  submit->reply_shmem);
717          submit->reply_shmem = NULL;
718       }
719    }
720 }
721 
722 void
vn_ring_free_command_reply(struct vn_ring * ring,struct vn_ring_submit_command * submit)723 vn_ring_free_command_reply(struct vn_ring *ring,
724                            struct vn_ring_submit_command *submit)
725 {
726    assert(submit->reply_shmem);
727    vn_renderer_shmem_unref(ring->instance->renderer, submit->reply_shmem);
728 }
729 
730 VkResult
vn_ring_submit_roundtrip(struct vn_ring * ring,uint64_t * roundtrip_seqno)731 vn_ring_submit_roundtrip(struct vn_ring *ring, uint64_t *roundtrip_seqno)
732 {
733    uint32_t local_data[8];
734    struct vn_cs_encoder local_enc =
735       VN_CS_ENCODER_INITIALIZER_LOCAL(local_data, sizeof(local_data));
736 
737    mtx_lock(&ring->roundtrip_mutex);
738    const uint64_t seqno = ring->roundtrip_next++;
739    vn_encode_vkSubmitVirtqueueSeqnoMESA(&local_enc, 0, ring->id, seqno);
740    VkResult result =
741       vn_renderer_submit_simple(ring->instance->renderer, local_data,
742                                 vn_cs_encoder_get_len(&local_enc));
743    mtx_unlock(&ring->roundtrip_mutex);
744 
745    *roundtrip_seqno = seqno;
746    return result;
747 }
748 
749 void
vn_ring_wait_roundtrip(struct vn_ring * ring,uint64_t roundtrip_seqno)750 vn_ring_wait_roundtrip(struct vn_ring *ring, uint64_t roundtrip_seqno)
751 {
752    vn_async_vkWaitVirtqueueSeqnoMESA(ring, roundtrip_seqno);
753 }
754