1 /*
2 * Copyright © 2014-2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file v3d_job.c
25 *
26 * Functions for submitting V3D render jobs to the kernel.
27 */
28
29 #include <xf86drm.h>
30 #include <libsync.h>
31 #include "v3d_context.h"
32 /* The OQ/semaphore packets are the same across V3D versions. */
33 #define V3D_VERSION 42
34 #include "broadcom/cle/v3dx_pack.h"
35 #include "broadcom/common/v3d_macros.h"
36 #include "util/hash_table.h"
37 #include "util/ralloc.h"
38 #include "util/set.h"
39 #include "broadcom/clif/clif_dump.h"
40
41 void
v3d_job_free(struct v3d_context * v3d,struct v3d_job * job)42 v3d_job_free(struct v3d_context *v3d, struct v3d_job *job)
43 {
44 set_foreach(job->bos, entry) {
45 struct v3d_bo *bo = (struct v3d_bo *)entry->key;
46 v3d_bo_unreference(&bo);
47 }
48
49 _mesa_hash_table_remove_key(v3d->jobs, &job->key);
50
51 if (job->write_prscs) {
52 set_foreach(job->write_prscs, entry) {
53 const struct pipe_resource *prsc = entry->key;
54
55 _mesa_hash_table_remove_key(v3d->write_jobs, prsc);
56 }
57 }
58
59 for (int i = 0; i < job->nr_cbufs; i++) {
60 if (job->cbufs[i]) {
61 _mesa_hash_table_remove_key(v3d->write_jobs,
62 job->cbufs[i]->texture);
63 pipe_surface_reference(&job->cbufs[i], NULL);
64 }
65 }
66 if (job->zsbuf) {
67 struct v3d_resource *rsc = v3d_resource(job->zsbuf->texture);
68 if (rsc->separate_stencil)
69 _mesa_hash_table_remove_key(v3d->write_jobs,
70 &rsc->separate_stencil->base);
71
72 _mesa_hash_table_remove_key(v3d->write_jobs,
73 job->zsbuf->texture);
74 pipe_surface_reference(&job->zsbuf, NULL);
75 }
76 if (job->bbuf)
77 pipe_surface_reference(&job->bbuf, NULL);
78
79 if (job->dbuf)
80 pipe_surface_reference(&job->dbuf, NULL);
81
82 if (v3d->job == job)
83 v3d->job = NULL;
84
85 v3d_destroy_cl(&job->bcl);
86 v3d_destroy_cl(&job->rcl);
87 v3d_destroy_cl(&job->indirect);
88 v3d_bo_unreference(&job->tile_alloc);
89 v3d_bo_unreference(&job->tile_state);
90
91 ralloc_free(job);
92 }
93
94 struct v3d_job *
v3d_job_create(struct v3d_context * v3d)95 v3d_job_create(struct v3d_context *v3d)
96 {
97 struct v3d_job *job = rzalloc(v3d, struct v3d_job);
98
99 job->v3d = v3d;
100
101 v3d_init_cl(job, &job->bcl);
102 v3d_init_cl(job, &job->rcl);
103 v3d_init_cl(job, &job->indirect);
104
105 job->draw_min_x = ~0;
106 job->draw_min_y = ~0;
107 job->draw_max_x = 0;
108 job->draw_max_y = 0;
109
110 job->bos = _mesa_set_create(job,
111 _mesa_hash_pointer,
112 _mesa_key_pointer_equal);
113 return job;
114 }
115
116 void
v3d_job_add_bo(struct v3d_job * job,struct v3d_bo * bo)117 v3d_job_add_bo(struct v3d_job *job, struct v3d_bo *bo)
118 {
119 if (!bo)
120 return;
121
122 if (_mesa_set_search(job->bos, bo))
123 return;
124
125 v3d_bo_reference(bo);
126 _mesa_set_add(job->bos, bo);
127 job->referenced_size += bo->size;
128
129 uint32_t *bo_handles = (void *)(uintptr_t)job->submit.bo_handles;
130
131 if (job->submit.bo_handle_count >= job->bo_handles_size) {
132 job->bo_handles_size = MAX2(4, job->bo_handles_size * 2);
133 bo_handles = reralloc(job, bo_handles,
134 uint32_t, job->bo_handles_size);
135 job->submit.bo_handles = (uintptr_t)(void *)bo_handles;
136 }
137 bo_handles[job->submit.bo_handle_count++] = bo->handle;
138 }
139
140 void
v3d_job_add_write_resource(struct v3d_job * job,struct pipe_resource * prsc)141 v3d_job_add_write_resource(struct v3d_job *job, struct pipe_resource *prsc)
142 {
143 struct v3d_context *v3d = job->v3d;
144
145 if (!job->write_prscs) {
146 job->write_prscs = _mesa_set_create(job,
147 _mesa_hash_pointer,
148 _mesa_key_pointer_equal);
149 }
150
151 _mesa_set_add(job->write_prscs, prsc);
152 _mesa_hash_table_insert(v3d->write_jobs, prsc, job);
153 }
154
155 void
v3d_flush_jobs_using_bo(struct v3d_context * v3d,struct v3d_bo * bo)156 v3d_flush_jobs_using_bo(struct v3d_context *v3d, struct v3d_bo *bo)
157 {
158 hash_table_foreach(v3d->jobs, entry) {
159 struct v3d_job *job = entry->data;
160
161 if (_mesa_set_search(job->bos, bo))
162 v3d_job_submit(v3d, job);
163 }
164 }
165
166 void
v3d_job_add_tf_write_resource(struct v3d_job * job,struct pipe_resource * prsc)167 v3d_job_add_tf_write_resource(struct v3d_job *job, struct pipe_resource *prsc)
168 {
169 v3d_job_add_write_resource(job, prsc);
170
171 if (!job->tf_write_prscs)
172 job->tf_write_prscs = _mesa_pointer_set_create(job);
173
174 _mesa_set_add(job->tf_write_prscs, prsc);
175 }
176
177 static bool
v3d_job_writes_resource_from_tf(struct v3d_job * job,struct pipe_resource * prsc)178 v3d_job_writes_resource_from_tf(struct v3d_job *job,
179 struct pipe_resource *prsc)
180 {
181 if (!job->tf_enabled)
182 return false;
183
184 if (!job->tf_write_prscs)
185 return false;
186
187 return _mesa_set_search(job->tf_write_prscs, prsc) != NULL;
188 }
189
190 void
v3d_flush_jobs_writing_resource(struct v3d_context * v3d,struct pipe_resource * prsc,enum v3d_flush_cond flush_cond,bool is_compute_pipeline)191 v3d_flush_jobs_writing_resource(struct v3d_context *v3d,
192 struct pipe_resource *prsc,
193 enum v3d_flush_cond flush_cond,
194 bool is_compute_pipeline)
195 {
196 struct hash_entry *entry = _mesa_hash_table_search(v3d->write_jobs,
197 prsc);
198 if (!entry)
199 return;
200
201 struct v3d_resource *rsc = v3d_resource(prsc);
202
203 /* We need to sync if graphics pipeline reads a resource written
204 * by the compute pipeline. The same is needed for the case of
205 * graphics-compute dependency but flushing the job.
206 */
207 if (!is_compute_pipeline && rsc->bo != NULL && rsc->compute_written) {
208 v3d->sync_on_last_compute_job = true;
209 rsc->compute_written = false;
210 }
211 if (is_compute_pipeline && rsc->bo != NULL && rsc->graphics_written) {
212 flush_cond = V3D_FLUSH_ALWAYS;
213 rsc->graphics_written = false;
214 }
215
216 struct v3d_job *job = entry->data;
217
218 bool needs_flush;
219 switch (flush_cond) {
220 case V3D_FLUSH_ALWAYS:
221 needs_flush = true;
222 break;
223 case V3D_FLUSH_NOT_CURRENT_JOB:
224 needs_flush = !v3d->job || v3d->job != job;
225 break;
226 case V3D_FLUSH_DEFAULT:
227 default:
228 /* For writes from TF in the same job we use the "Wait for TF"
229 * feature provided by the hardware so we don't want to flush.
230 * The exception to this is when the caller is about to map the
231 * resource since in that case we don't have a 'Wait for TF'
232 * command the in command stream. In this scenario the caller
233 * is expected to set 'always_flush' to True.
234 */
235 needs_flush = !v3d_job_writes_resource_from_tf(job, prsc);
236 }
237
238 if (needs_flush)
239 v3d_job_submit(v3d, job);
240 }
241
242 void
v3d_flush_jobs_reading_resource(struct v3d_context * v3d,struct pipe_resource * prsc,enum v3d_flush_cond flush_cond,bool is_compute_pipeline)243 v3d_flush_jobs_reading_resource(struct v3d_context *v3d,
244 struct pipe_resource *prsc,
245 enum v3d_flush_cond flush_cond,
246 bool is_compute_pipeline)
247 {
248 struct v3d_resource *rsc = v3d_resource(prsc);
249
250 /* We only need to force the flush on TF writes, which is the only
251 * case where we might skip the flush to use the 'Wait for TF'
252 * command. Here we are flushing for a read, which means that the
253 * caller intends to write to the resource, so we don't care if
254 * there was a previous TF write to it.
255 */
256 v3d_flush_jobs_writing_resource(v3d, prsc, flush_cond,
257 is_compute_pipeline);
258
259 hash_table_foreach(v3d->jobs, entry) {
260 struct v3d_job *job = entry->data;
261
262 if (!_mesa_set_search(job->bos, rsc->bo))
263 continue;
264
265 bool needs_flush;
266 switch (flush_cond) {
267 case V3D_FLUSH_NOT_CURRENT_JOB:
268 needs_flush = !v3d->job || v3d->job != job;
269 break;
270 case V3D_FLUSH_ALWAYS:
271 case V3D_FLUSH_DEFAULT:
272 default:
273 needs_flush = true;
274 }
275
276 if (needs_flush)
277 v3d_job_submit(v3d, job);
278
279 /* Reminder: v3d->jobs is safe to keep iterating even
280 * after deletion of an entry.
281 */
282 continue;
283 }
284 }
285
286 /**
287 * Returns a v3d_job structure for tracking V3D rendering to a particular FBO.
288 *
289 * If we've already started rendering to this FBO, then return the same job,
290 * otherwise make a new one. If we're beginning rendering to an FBO, make
291 * sure that any previous reads of the FBO (or writes to its color/Z surfaces)
292 * have been flushed.
293 */
294 struct v3d_job *
v3d_get_job(struct v3d_context * v3d,uint32_t nr_cbufs,struct pipe_surface ** cbufs,struct pipe_surface * zsbuf,struct pipe_surface * bbuf)295 v3d_get_job(struct v3d_context *v3d,
296 uint32_t nr_cbufs,
297 struct pipe_surface **cbufs,
298 struct pipe_surface *zsbuf,
299 struct pipe_surface *bbuf)
300 {
301 /* Return the existing job for this FBO if we have one */
302 struct v3d_job_key local_key = {
303 .cbufs = {
304 cbufs[0],
305 cbufs[1],
306 cbufs[2],
307 cbufs[3],
308 },
309 .zsbuf = zsbuf,
310 .bbuf = bbuf,
311 };
312 struct hash_entry *entry = _mesa_hash_table_search(v3d->jobs,
313 &local_key);
314 if (entry)
315 return entry->data;
316
317 /* Creating a new job. Make sure that any previous jobs reading or
318 * writing these buffers are flushed.
319 */
320 struct v3d_job *job = v3d_job_create(v3d);
321 job->nr_cbufs = nr_cbufs;
322
323 for (int i = 0; i < job->nr_cbufs; i++) {
324 if (cbufs[i]) {
325 v3d_flush_jobs_reading_resource(v3d, cbufs[i]->texture,
326 V3D_FLUSH_DEFAULT,
327 false);
328 pipe_surface_reference(&job->cbufs[i], cbufs[i]);
329
330 if (cbufs[i]->texture->nr_samples > 1)
331 job->msaa = true;
332 }
333 }
334 if (zsbuf) {
335 v3d_flush_jobs_reading_resource(v3d, zsbuf->texture,
336 V3D_FLUSH_DEFAULT,
337 false);
338 pipe_surface_reference(&job->zsbuf, zsbuf);
339 if (zsbuf->texture->nr_samples > 1)
340 job->msaa = true;
341 }
342 if (bbuf) {
343 pipe_surface_reference(&job->bbuf, bbuf);
344 if (bbuf->texture->nr_samples > 1)
345 job->msaa = true;
346 }
347
348 for (int i = 0; i < job->nr_cbufs; i++) {
349 if (cbufs[i])
350 _mesa_hash_table_insert(v3d->write_jobs,
351 cbufs[i]->texture, job);
352 }
353 if (zsbuf) {
354 _mesa_hash_table_insert(v3d->write_jobs, zsbuf->texture, job);
355
356 struct v3d_resource *rsc = v3d_resource(zsbuf->texture);
357 if (rsc->separate_stencil) {
358 v3d_flush_jobs_reading_resource(v3d,
359 &rsc->separate_stencil->base,
360 V3D_FLUSH_DEFAULT,
361 false);
362 _mesa_hash_table_insert(v3d->write_jobs,
363 &rsc->separate_stencil->base,
364 job);
365 }
366 }
367
368 job->double_buffer = V3D_DBG(DOUBLE_BUFFER) && !job->msaa;
369
370 memcpy(&job->key, &local_key, sizeof(local_key));
371 _mesa_hash_table_insert(v3d->jobs, &job->key, job);
372
373 return job;
374 }
375
376 struct v3d_job *
v3d_get_job_for_fbo(struct v3d_context * v3d)377 v3d_get_job_for_fbo(struct v3d_context *v3d)
378 {
379 if (v3d->job)
380 return v3d->job;
381
382 uint32_t nr_cbufs = v3d->framebuffer.nr_cbufs;
383 struct pipe_surface **cbufs = v3d->framebuffer.cbufs;
384 struct pipe_surface *zsbuf = v3d->framebuffer.zsbuf;
385 struct v3d_job *job = v3d_get_job(v3d, nr_cbufs, cbufs, zsbuf, NULL);
386
387 if (v3d->framebuffer.samples >= 1) {
388 job->msaa = true;
389 job->double_buffer = false;
390 }
391
392 v3d_get_tile_buffer_size(&v3d->screen->devinfo,
393 job->msaa, job->double_buffer,
394 job->nr_cbufs, job->cbufs, job->bbuf,
395 &job->tile_width,
396 &job->tile_height,
397 &job->internal_bpp);
398
399 /* The dirty flags are tracking what's been updated while v3d->job has
400 * been bound, so set them all to ~0 when switching between jobs. We
401 * also need to reset all state at the start of rendering.
402 */
403 v3d->dirty = ~0;
404
405 /* If we're binding to uninitialized buffers, no need to load their
406 * contents before drawing.
407 */
408 for (int i = 0; i < nr_cbufs; i++) {
409 if (cbufs[i]) {
410 struct v3d_resource *rsc = v3d_resource(cbufs[i]->texture);
411 if (!rsc->writes)
412 job->clear_tlb |= PIPE_CLEAR_COLOR0 << i;
413 }
414 }
415
416 if (zsbuf) {
417 struct v3d_resource *rsc = v3d_resource(zsbuf->texture);
418 if (!rsc->writes)
419 job->clear_tlb |= PIPE_CLEAR_DEPTH;
420
421 if (rsc->separate_stencil)
422 rsc = rsc->separate_stencil;
423
424 if (!rsc->writes)
425 job->clear_tlb |= PIPE_CLEAR_STENCIL;
426 }
427
428 job->draw_tiles_x = DIV_ROUND_UP(v3d->framebuffer.width,
429 job->tile_width);
430 job->draw_tiles_y = DIV_ROUND_UP(v3d->framebuffer.height,
431 job->tile_height);
432
433 v3d->job = job;
434
435 return job;
436 }
437
438 static void
v3d_clif_dump(struct v3d_context * v3d,struct v3d_job * job)439 v3d_clif_dump(struct v3d_context *v3d, struct v3d_job *job)
440 {
441 if (!(V3D_DBG(CL) ||
442 V3D_DBG(CL_NO_BIN) ||
443 V3D_DBG(CLIF)))
444 return;
445
446 struct clif_dump *clif = clif_dump_init(&v3d->screen->devinfo,
447 stderr,
448 V3D_DBG(CL) ||
449 V3D_DBG(CL_NO_BIN),
450 V3D_DBG(CL_NO_BIN));
451
452 set_foreach(job->bos, entry) {
453 struct v3d_bo *bo = (void *)entry->key;
454 char *name = ralloc_asprintf(NULL, "%s_0x%x",
455 bo->name, bo->offset);
456
457 v3d_bo_map(bo);
458 clif_dump_add_bo(clif, name, bo->offset, bo->size, bo->map);
459
460 ralloc_free(name);
461 }
462
463 clif_dump(clif, &job->submit);
464
465 clif_dump_destroy(clif);
466 }
467
468 static void
v3d_read_and_accumulate_primitive_counters(struct v3d_context * v3d)469 v3d_read_and_accumulate_primitive_counters(struct v3d_context *v3d)
470 {
471 assert(v3d->prim_counts);
472
473 perf_debug("stalling on TF counts readback\n");
474 struct v3d_resource *rsc = v3d_resource(v3d->prim_counts);
475 if (v3d_bo_wait(rsc->bo, OS_TIMEOUT_INFINITE, "prim-counts")) {
476 uint32_t *map = v3d_bo_map(rsc->bo) + v3d->prim_counts_offset;
477 v3d->tf_prims_generated += map[V3D_PRIM_COUNTS_TF_WRITTEN];
478 /* When we only have a vertex shader with no primitive
479 * restart, we determine the primitive count in the CPU so
480 * don't update it here again.
481 */
482 if (v3d->prog.gs || v3d->prim_restart) {
483 v3d->prims_generated += map[V3D_PRIM_COUNTS_WRITTEN];
484 uint8_t prim_mode =
485 v3d->prog.gs ? v3d->prog.gs->prog_data.gs->out_prim_type
486 : v3d->prim_mode;
487 uint32_t vertices_written =
488 map[V3D_PRIM_COUNTS_TF_WRITTEN] * mesa_vertices_per_prim(prim_mode);
489 for (int i = 0; i < v3d->streamout.num_targets; i++) {
490 v3d_stream_output_target(v3d->streamout.targets[i])->offset +=
491 vertices_written;
492 }
493 }
494 }
495 }
496
497 /**
498 * Submits the job to the kernel and then reinitializes it.
499 */
500 void
v3d_job_submit(struct v3d_context * v3d,struct v3d_job * job)501 v3d_job_submit(struct v3d_context *v3d, struct v3d_job *job)
502 {
503 struct v3d_screen *screen = v3d->screen;
504 struct v3d_device_info *devinfo = &screen->devinfo;
505
506 if (!job->needs_flush)
507 goto done;
508
509 /* The GL_PRIMITIVES_GENERATED query is included with
510 * OES_geometry_shader.
511 */
512 job->needs_primitives_generated =
513 v3d->n_primitives_generated_queries_in_flight > 0 &&
514 v3d->prog.gs;
515
516 if (job->needs_primitives_generated)
517 v3d_ensure_prim_counts_allocated(v3d);
518
519 v3d_X(devinfo, emit_rcl)(job);
520
521 if (cl_offset(&job->bcl) > 0)
522 v3d_X(devinfo, bcl_epilogue)(v3d, job);
523
524 if (v3d->in_fence_fd >= 0) {
525 /* PIPE_CAP_NATIVE_FENCE */
526 if (drmSyncobjImportSyncFile(v3d->fd, v3d->in_syncobj,
527 v3d->in_fence_fd)) {
528 fprintf(stderr, "Failed to import native fence.\n");
529 } else {
530 job->submit.in_sync_bcl = v3d->in_syncobj;
531 }
532 close(v3d->in_fence_fd);
533 v3d->in_fence_fd = -1;
534 } else {
535 /* While the RCL will implicitly depend on the last RCL to have
536 * finished, we also need to block on any previous TFU job we
537 * may have dispatched.
538 */
539 job->submit.in_sync_rcl = v3d->out_sync;
540 }
541
542 /* Update the sync object for the last rendering by our context. */
543 job->submit.out_sync = v3d->out_sync;
544
545 job->submit.bcl_end = job->bcl.bo->offset + cl_offset(&job->bcl);
546 job->submit.rcl_end = job->rcl.bo->offset + cl_offset(&job->rcl);
547
548 if (v3d->active_perfmon) {
549 assert(screen->has_perfmon);
550 job->submit.perfmon_id = v3d->active_perfmon->kperfmon_id;
551 }
552
553 /* If we are submitting a job with a different perfmon, we need to
554 * ensure the previous one fully finishes before starting this;
555 * otherwise it would wrongly mix counter results.
556 */
557 if (v3d->active_perfmon != v3d->last_perfmon) {
558 v3d->last_perfmon = v3d->active_perfmon;
559 job->submit.in_sync_bcl = v3d->out_sync;
560 }
561
562 job->submit.flags = 0;
563 if (job->tmu_dirty_rcl && screen->has_cache_flush)
564 job->submit.flags |= DRM_V3D_SUBMIT_CL_FLUSH_CACHE;
565
566 /* On V3D 4.1, the tile alloc/state setup moved to register writes
567 * instead of binner packets.
568 */
569 if (devinfo->ver >= 42) {
570 v3d_job_add_bo(job, job->tile_alloc);
571 job->submit.qma = job->tile_alloc->offset;
572 job->submit.qms = job->tile_alloc->size;
573
574 v3d_job_add_bo(job, job->tile_state);
575 job->submit.qts = job->tile_state->offset;
576 }
577
578 v3d_clif_dump(v3d, job);
579
580 if (!V3D_DBG(NORAST)) {
581 int ret;
582
583 ret = v3d_ioctl(v3d->fd, DRM_IOCTL_V3D_SUBMIT_CL, &job->submit);
584 static bool warned = false;
585 if (ret && !warned) {
586 fprintf(stderr, "Draw call returned %s. "
587 "Expect corruption.\n", strerror(errno));
588 warned = true;
589 } else if (!ret) {
590 if (v3d->active_perfmon)
591 v3d->active_perfmon->job_submitted = true;
592 }
593
594 /* If we are submitting a job in the middle of transform
595 * feedback or there is a primitives generated query with a
596 * geometry shader then we need to read the primitive counts
597 * and accumulate them, otherwise they will be reset at the
598 * start of the next draw when we emit the Tile Binning Mode
599 * Configuration packet.
600 *
601 * If the job doesn't have any TF draw calls, then we know
602 * the primitive count must be zero and we can skip stalling
603 * for this. This also fixes a problem because it seems that
604 * in this scenario the counters are not reset with the Tile
605 * Binning Mode Configuration packet, which would translate
606 * to us reading an obsolete (possibly non-zero) value from
607 * the GPU counters.
608 */
609 if (job->needs_primitives_generated ||
610 (v3d->streamout.num_targets &&
611 job->tf_draw_calls_queued > 0))
612 v3d_read_and_accumulate_primitive_counters(v3d);
613 }
614
615 done:
616 v3d_job_free(v3d, job);
617 }
618
619 DERIVE_HASH_TABLE(v3d_job_key);
620
621 void
v3d_job_init(struct v3d_context * v3d)622 v3d_job_init(struct v3d_context *v3d)
623 {
624 v3d->jobs = v3d_job_key_table_create(v3d);
625 v3d->write_jobs = _mesa_hash_table_create(v3d,
626 _mesa_hash_pointer,
627 _mesa_key_pointer_equal);
628 }
629
630