xref: /aosp_15_r20/external/virglrenderer/src/venus/vkr_ring.c (revision bbecb9d118dfdb95f99bd754f8fa9be01f189df3)
1 /*
2  * Copyright 2021 Google LLC
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "vkr_ring.h"
7 
8 #include <stdio.h>
9 #include <time.h>
10 
11 #include "vrend_iov.h"
12 
13 #include "vkr_context.h"
14 
15 enum vkr_ring_status_flag {
16    VKR_RING_STATUS_IDLE = 1u << 0,
17 };
18 
19 /* callers must make sure they do not seek to end-of-resource or beyond */
20 static const struct iovec *
seek_resource(const struct vkr_resource_attachment * att,int base_iov_index,size_t offset,int * out_iov_index,size_t * out_iov_offset)21 seek_resource(const struct vkr_resource_attachment *att,
22               int base_iov_index,
23               size_t offset,
24               int *out_iov_index,
25               size_t *out_iov_offset)
26 {
27    const struct iovec *iov = &att->iov[base_iov_index];
28    assert(iov - att->iov < att->iov_count);
29    while (offset >= iov->iov_len) {
30       offset -= iov->iov_len;
31       iov++;
32       assert(iov - att->iov < att->iov_count);
33    }
34 
35    *out_iov_index = iov - att->iov;
36    *out_iov_offset = offset;
37 
38    return iov;
39 }
40 
41 static void *
get_resource_pointer(const struct vkr_resource_attachment * att,int base_iov_index,size_t offset)42 get_resource_pointer(const struct vkr_resource_attachment *att,
43                      int base_iov_index,
44                      size_t offset)
45 {
46    const struct iovec *iov =
47       seek_resource(att, base_iov_index, offset, &base_iov_index, &offset);
48    return (uint8_t *)iov->iov_base + offset;
49 }
50 
51 static void
vkr_ring_init_extra(struct vkr_ring * ring,const struct vkr_ring_layout * layout)52 vkr_ring_init_extra(struct vkr_ring *ring, const struct vkr_ring_layout *layout)
53 {
54    struct vkr_ring_extra *extra = &ring->extra;
55 
56    seek_resource(layout->attachment, 0, layout->extra.begin, &extra->base_iov_index,
57                  &extra->base_iov_offset);
58 
59    extra->region = vkr_region_make_relative(&layout->extra);
60 }
61 
62 static void
vkr_ring_init_buffer(struct vkr_ring * ring,const struct vkr_ring_layout * layout)63 vkr_ring_init_buffer(struct vkr_ring *ring, const struct vkr_ring_layout *layout)
64 {
65    struct vkr_ring_buffer *buf = &ring->buffer;
66 
67    const struct iovec *base_iov =
68       seek_resource(layout->attachment, 0, layout->buffer.begin, &buf->base_iov_index,
69                     &buf->base_iov_offset);
70 
71    buf->size = vkr_region_size(&layout->buffer);
72    assert(util_is_power_of_two_nonzero(buf->size));
73    buf->mask = buf->size - 1;
74 
75    buf->cur = 0;
76    buf->cur_iov = base_iov;
77    buf->cur_iov_index = buf->base_iov_index;
78    buf->cur_iov_offset = buf->base_iov_offset;
79 }
80 
81 static bool
vkr_ring_init_control(struct vkr_ring * ring,const struct vkr_ring_layout * layout)82 vkr_ring_init_control(struct vkr_ring *ring, const struct vkr_ring_layout *layout)
83 {
84    struct vkr_ring_control *ctrl = &ring->control;
85 
86    ctrl->head = get_resource_pointer(layout->attachment, 0, layout->head.begin);
87    ctrl->tail = get_resource_pointer(layout->attachment, 0, layout->tail.begin);
88    ctrl->status = get_resource_pointer(layout->attachment, 0, layout->status.begin);
89 
90    /* we will manage head and status, and we expect them to be 0 initially */
91    if (*ctrl->head || *ctrl->status)
92       return false;
93 
94    return true;
95 }
96 
97 static void
vkr_ring_store_head(struct vkr_ring * ring)98 vkr_ring_store_head(struct vkr_ring *ring)
99 {
100    /* the renderer is expected to load the head with memory_order_acquire,
101     * forming a release-acquire ordering
102     */
103    atomic_store_explicit(ring->control.head, ring->buffer.cur, memory_order_release);
104 }
105 
106 static uint32_t
vkr_ring_load_tail(const struct vkr_ring * ring)107 vkr_ring_load_tail(const struct vkr_ring *ring)
108 {
109    /* the driver is expected to store the tail with memory_order_release,
110     * forming a release-acquire ordering
111     */
112    return atomic_load_explicit(ring->control.tail, memory_order_acquire);
113 }
114 
115 static void
vkr_ring_store_status(struct vkr_ring * ring,uint32_t status)116 vkr_ring_store_status(struct vkr_ring *ring, uint32_t status)
117 {
118    atomic_store_explicit(ring->control.status, status, memory_order_seq_cst);
119 }
120 
121 /* TODO consider requiring virgl_resource to be logically contiguous */
122 static void
vkr_ring_read_buffer(struct vkr_ring * ring,void * data,uint32_t size)123 vkr_ring_read_buffer(struct vkr_ring *ring, void *data, uint32_t size)
124 {
125    struct vkr_ring_buffer *buf = &ring->buffer;
126    const struct vkr_resource_attachment *att = ring->attachment;
127 
128    assert(size <= buf->size);
129    const uint32_t buf_offset = buf->cur & buf->mask;
130    const uint32_t buf_avail = buf->size - buf_offset;
131    const bool wrap = size >= buf_avail;
132 
133    uint32_t read_size;
134    uint32_t wrap_size;
135    if (!wrap) {
136       read_size = size;
137       wrap_size = 0;
138    } else {
139       read_size = buf_avail;
140       /* When size == buf_avail, wrap is true but wrap_size is 0.  We want to
141        * wrap because it seems slightly faster on the next call.  Besides,
142        * seek_resource does not support seeking to end-of-resource which could
143        * happen if we don't wrap and the buffer region end coincides with the
144        * resource end.
145        */
146       wrap_size = size - buf_avail;
147    }
148 
149    /* do the reads */
150    if (read_size <= buf->cur_iov->iov_len - buf->cur_iov_offset) {
151       const void *src = (const uint8_t *)buf->cur_iov->iov_base + buf->cur_iov_offset;
152       memcpy(data, src, read_size);
153 
154       /* fast path */
155       if (!wrap) {
156          assert(!wrap_size);
157          buf->cur += read_size;
158          buf->cur_iov_offset += read_size;
159          return;
160       }
161    } else {
162       vrend_read_from_iovec(buf->cur_iov, att->iov_count - buf->cur_iov_index,
163                             buf->cur_iov_offset, data, read_size);
164    }
165 
166    if (wrap_size) {
167       vrend_read_from_iovec(att->iov + buf->base_iov_index,
168                             att->iov_count - buf->base_iov_index, buf->base_iov_offset,
169                             (char *)data + read_size, wrap_size);
170    }
171 
172    /* advance cur */
173    buf->cur += size;
174    if (!wrap) {
175       buf->cur_iov = seek_resource(att, buf->cur_iov_index, buf->cur_iov_offset + size,
176                                    &buf->cur_iov_index, &buf->cur_iov_offset);
177    } else {
178       buf->cur_iov =
179          seek_resource(att, buf->base_iov_index, buf->base_iov_offset + wrap_size,
180                        &buf->cur_iov_index, &buf->cur_iov_offset);
181    }
182 }
183 
184 struct vkr_ring *
vkr_ring_create(const struct vkr_ring_layout * layout,struct virgl_context * ctx,uint64_t idle_timeout)185 vkr_ring_create(const struct vkr_ring_layout *layout,
186                 struct virgl_context *ctx,
187                 uint64_t idle_timeout)
188 {
189    struct vkr_ring *ring;
190    int ret;
191 
192    ring = calloc(1, sizeof(*ring));
193    if (!ring)
194       return NULL;
195 
196    ring->attachment = layout->attachment;
197 
198    if (!vkr_ring_init_control(ring, layout)) {
199       free(ring);
200       return NULL;
201    }
202 
203    vkr_ring_init_buffer(ring, layout);
204    vkr_ring_init_extra(ring, layout);
205 
206    ring->cmd = malloc(ring->buffer.size);
207    if (!ring->cmd) {
208       free(ring);
209       return NULL;
210    }
211 
212    ring->context = ctx;
213    ring->idle_timeout = idle_timeout;
214 
215    ret = mtx_init(&ring->mutex, mtx_plain);
216    if (ret != thrd_success) {
217       free(ring->cmd);
218       free(ring);
219       return NULL;
220    }
221    ret = cnd_init(&ring->cond);
222    if (ret != thrd_success) {
223       mtx_destroy(&ring->mutex);
224       free(ring->cmd);
225       free(ring);
226       return NULL;
227    }
228 
229    return ring;
230 }
231 
232 void
vkr_ring_destroy(struct vkr_ring * ring)233 vkr_ring_destroy(struct vkr_ring *ring)
234 {
235    list_del(&ring->head);
236 
237    assert(!ring->started);
238    mtx_destroy(&ring->mutex);
239    cnd_destroy(&ring->cond);
240    free(ring->cmd);
241    free(ring);
242 }
243 
244 static uint64_t
vkr_ring_now(void)245 vkr_ring_now(void)
246 {
247    const uint64_t ns_per_sec = 1000000000llu;
248    struct timespec now;
249    if (clock_gettime(CLOCK_MONOTONIC, &now))
250       return 0;
251    return ns_per_sec * now.tv_sec + now.tv_nsec;
252 }
253 
254 static void
vkr_ring_relax(uint32_t * iter)255 vkr_ring_relax(uint32_t *iter)
256 {
257    /* TODO do better */
258    const uint32_t busy_wait_order = 4;
259    const uint32_t base_sleep_us = 10;
260 
261    (*iter)++;
262    if (*iter < (1u << busy_wait_order)) {
263       thrd_yield();
264       return;
265    }
266 
267    const uint32_t shift = util_last_bit(*iter) - busy_wait_order - 1;
268    const uint32_t us = base_sleep_us << shift;
269    const struct timespec ts = {
270       .tv_sec = us / 1000000,
271       .tv_nsec = (us % 1000000) * 1000,
272    };
273    clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL);
274 }
275 
276 static int
vkr_ring_thread(void * arg)277 vkr_ring_thread(void *arg)
278 {
279    struct vkr_ring *ring = arg;
280    struct virgl_context *ctx = ring->context;
281    char thread_name[16];
282 
283    snprintf(thread_name, ARRAY_SIZE(thread_name), "vkr-ring-%d", ctx->ctx_id);
284    u_thread_setname(thread_name);
285 
286    uint64_t last_submit = vkr_ring_now();
287    uint32_t relax_iter = 0;
288    int ret = 0;
289    while (ring->started) {
290       bool wait = false;
291       uint32_t cmd_size;
292 
293       if (vkr_ring_now() >= last_submit + ring->idle_timeout) {
294          ring->pending_notify = false;
295          vkr_ring_store_status(ring, VKR_RING_STATUS_IDLE);
296          wait = ring->buffer.cur == vkr_ring_load_tail(ring);
297          if (!wait)
298             vkr_ring_store_status(ring, 0);
299       }
300 
301       if (wait) {
302          TRACE_SCOPE("ring idle");
303 
304          mtx_lock(&ring->mutex);
305          if (ring->started && !ring->pending_notify)
306             cnd_wait(&ring->cond, &ring->mutex);
307          vkr_ring_store_status(ring, 0);
308          mtx_unlock(&ring->mutex);
309 
310          if (!ring->started)
311             break;
312 
313          last_submit = vkr_ring_now();
314          relax_iter = 0;
315       }
316 
317       cmd_size = vkr_ring_load_tail(ring) - ring->buffer.cur;
318       if (cmd_size) {
319          if (cmd_size > ring->buffer.size) {
320             ret = -EINVAL;
321             break;
322          }
323 
324          vkr_ring_read_buffer(ring, ring->cmd, cmd_size);
325          ctx->submit_cmd(ctx, ring->cmd, cmd_size);
326          vkr_ring_store_head(ring);
327 
328          last_submit = vkr_ring_now();
329          relax_iter = 0;
330       } else {
331          vkr_ring_relax(&relax_iter);
332       }
333    }
334 
335    return ret;
336 }
337 
338 void
vkr_ring_start(struct vkr_ring * ring)339 vkr_ring_start(struct vkr_ring *ring)
340 {
341    int ret;
342 
343    assert(!ring->started);
344    ring->started = true;
345    ret = thrd_create(&ring->thread, vkr_ring_thread, ring);
346    if (ret != thrd_success)
347       ring->started = false;
348 }
349 
350 bool
vkr_ring_stop(struct vkr_ring * ring)351 vkr_ring_stop(struct vkr_ring *ring)
352 {
353    mtx_lock(&ring->mutex);
354    if (thrd_equal(ring->thread, thrd_current())) {
355       mtx_unlock(&ring->mutex);
356       return false;
357    }
358    assert(ring->started);
359    ring->started = false;
360    cnd_signal(&ring->cond);
361    mtx_unlock(&ring->mutex);
362 
363    thrd_join(ring->thread, NULL);
364 
365    return true;
366 }
367 
368 void
vkr_ring_notify(struct vkr_ring * ring)369 vkr_ring_notify(struct vkr_ring *ring)
370 {
371    mtx_lock(&ring->mutex);
372    ring->pending_notify = true;
373    cnd_signal(&ring->cond);
374    mtx_unlock(&ring->mutex);
375 
376    {
377       TRACE_SCOPE("ring notify done");
378    }
379 }
380 
381 bool
vkr_ring_write_extra(struct vkr_ring * ring,size_t offset,uint32_t val)382 vkr_ring_write_extra(struct vkr_ring *ring, size_t offset, uint32_t val)
383 {
384    struct vkr_ring_extra *extra = &ring->extra;
385 
386    if (unlikely(extra->cached_offset != offset || !extra->cached_data)) {
387       const struct vkr_region access = VKR_REGION_INIT(offset, sizeof(val));
388       if (!vkr_region_is_valid(&access) || !vkr_region_is_within(&access, &extra->region))
389          return false;
390 
391       /* Mesa always sets offset to 0 and the cache hit rate will be 100% */
392       extra->cached_offset = offset;
393       extra->cached_data = get_resource_pointer(ring->attachment, extra->base_iov_index,
394                                                 extra->base_iov_offset + offset);
395    }
396 
397    atomic_store_explicit(extra->cached_data, val, memory_order_release);
398 
399    {
400       TRACE_SCOPE("ring extra done");
401    }
402 
403    return true;
404 }
405