xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/panfrost/pan_job.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (C) 2019-2020 Collabora, Ltd.
3  * Copyright (C) 2019 Alyssa Rosenzweig
4  * Copyright (C) 2014-2017 Broadcom
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  *
25  */
26 
27 #include <assert.h>
28 
29 #include "util/format/u_format.h"
30 #include "util/hash_table.h"
31 #include "util/ralloc.h"
32 #include "util/rounding.h"
33 #include "util/u_framebuffer.h"
34 #include "util/u_pack_color.h"
35 #include "pan_bo.h"
36 #include "pan_context.h"
37 #include "pan_util.h"
38 
39 #define foreach_batch(ctx, idx)                                                \
40    BITSET_FOREACH_SET(idx, ctx->batches.active, PAN_MAX_BATCHES)
41 
42 static unsigned
panfrost_batch_idx(struct panfrost_batch * batch)43 panfrost_batch_idx(struct panfrost_batch *batch)
44 {
45    return batch - batch->ctx->batches.slots;
46 }
47 
48 static bool
panfrost_any_batch_other_than(struct panfrost_context * ctx,unsigned index)49 panfrost_any_batch_other_than(struct panfrost_context *ctx, unsigned index)
50 {
51    unsigned i;
52    foreach_batch(ctx, i) {
53       if (i != index)
54          return true;
55    }
56 
57    return false;
58 }
59 
60 /* Adds the BO backing surface to a batch if the surface is non-null */
61 
62 static void
panfrost_batch_add_surface(struct panfrost_batch * batch,struct pipe_surface * surf)63 panfrost_batch_add_surface(struct panfrost_batch *batch,
64                            struct pipe_surface *surf)
65 {
66    if (surf) {
67       struct panfrost_resource *rsrc = pan_resource(surf->texture);
68       pan_legalize_format(batch->ctx, rsrc, surf->format, true, false);
69       panfrost_batch_write_rsrc(batch, rsrc, PIPE_SHADER_FRAGMENT);
70    }
71 }
72 
73 static void
panfrost_batch_init(struct panfrost_context * ctx,const struct pipe_framebuffer_state * key,struct panfrost_batch * batch)74 panfrost_batch_init(struct panfrost_context *ctx,
75                     const struct pipe_framebuffer_state *key,
76                     struct panfrost_batch *batch)
77 {
78    struct pipe_screen *pscreen = ctx->base.screen;
79    struct panfrost_screen *screen = pan_screen(pscreen);
80    struct panfrost_device *dev = &screen->dev;
81 
82    batch->ctx = ctx;
83 
84    batch->seqnum = ++ctx->batches.seqnum;
85 
86    util_dynarray_init(&batch->bos, NULL);
87 
88    batch->minx = batch->miny = ~0;
89    batch->maxx = batch->maxy = 0;
90 
91    util_copy_framebuffer_state(&batch->key, key);
92 
93    /* Preallocate the main pool, since every batch has at least one job
94     * structure so it will be used */
95    panfrost_pool_init(&batch->pool, NULL, dev, 0, 65536, "Batch pool", true,
96                       true);
97 
98    /* Don't preallocate the invisible pool, since not every batch will use
99     * the pre-allocation, particularly if the varyings are larger than the
100     * preallocation and a reallocation is needed after anyway. */
101    panfrost_pool_init(&batch->invisible_pool, NULL, dev, PAN_BO_INVISIBLE,
102                       65536, "Varyings", false, true);
103 
104    for (unsigned i = 0; i < batch->key.nr_cbufs; ++i)
105       panfrost_batch_add_surface(batch, batch->key.cbufs[i]);
106 
107    panfrost_batch_add_surface(batch, batch->key.zsbuf);
108 
109    screen->vtbl.init_batch(batch);
110 }
111 
112 static void
panfrost_batch_cleanup(struct panfrost_context * ctx,struct panfrost_batch * batch)113 panfrost_batch_cleanup(struct panfrost_context *ctx,
114                        struct panfrost_batch *batch)
115 {
116    struct panfrost_screen *screen = pan_screen(ctx->base.screen);
117    struct panfrost_device *dev = pan_device(ctx->base.screen);
118 
119    assert(batch->seqnum);
120 
121    if (ctx->batch == batch)
122       ctx->batch = NULL;
123 
124    screen->vtbl.cleanup_batch(batch);
125 
126    unsigned batch_idx = panfrost_batch_idx(batch);
127 
128    pan_bo_access *flags = util_dynarray_begin(&batch->bos);
129    unsigned end_bo = util_dynarray_num_elements(&batch->bos, pan_bo_access);
130 
131    for (int i = 0; i < end_bo; ++i) {
132       if (!flags[i])
133          continue;
134 
135       struct panfrost_bo *bo = pan_lookup_bo(dev, i);
136       panfrost_bo_unreference(bo);
137    }
138 
139    /* There is no more writer for anything we wrote */
140    hash_table_foreach(ctx->writers, ent) {
141       if (ent->data == batch)
142          _mesa_hash_table_remove(ctx->writers, ent);
143    }
144 
145    panfrost_pool_cleanup(&batch->pool);
146    panfrost_pool_cleanup(&batch->invisible_pool);
147 
148    util_unreference_framebuffer_state(&batch->key);
149 
150    util_dynarray_fini(&batch->bos);
151 
152    memset(batch, 0, sizeof(*batch));
153    BITSET_CLEAR(ctx->batches.active, batch_idx);
154 }
155 
156 static void panfrost_batch_submit(struct panfrost_context *ctx,
157                                   struct panfrost_batch *batch);
158 
159 static struct panfrost_batch *
panfrost_get_batch(struct panfrost_context * ctx,const struct pipe_framebuffer_state * key)160 panfrost_get_batch(struct panfrost_context *ctx,
161                    const struct pipe_framebuffer_state *key)
162 {
163    struct panfrost_batch *batch = NULL;
164 
165    for (unsigned i = 0; i < PAN_MAX_BATCHES; i++) {
166       if (ctx->batches.slots[i].seqnum &&
167           util_framebuffer_state_equal(&ctx->batches.slots[i].key, key)) {
168          /* We found a match, increase the seqnum for the LRU
169           * eviction logic.
170           */
171          ctx->batches.slots[i].seqnum = ++ctx->batches.seqnum;
172          return &ctx->batches.slots[i];
173       }
174 
175       if (!batch || batch->seqnum > ctx->batches.slots[i].seqnum)
176          batch = &ctx->batches.slots[i];
177    }
178 
179    assert(batch);
180 
181    /* The selected slot is used, we need to flush the batch */
182    if (batch->seqnum) {
183       perf_debug(ctx, "Flushing batch due to seqnum overflow");
184       panfrost_batch_submit(ctx, batch);
185    }
186 
187    panfrost_batch_init(ctx, key, batch);
188 
189    unsigned batch_idx = panfrost_batch_idx(batch);
190    BITSET_SET(ctx->batches.active, batch_idx);
191 
192    return batch;
193 }
194 
195 /* Get the job corresponding to the FBO we're currently rendering into */
196 
197 struct panfrost_batch *
panfrost_get_batch_for_fbo(struct panfrost_context * ctx)198 panfrost_get_batch_for_fbo(struct panfrost_context *ctx)
199 {
200    /* If we already began rendering, use that */
201 
202    if (ctx->batch) {
203       assert(util_framebuffer_state_equal(&ctx->batch->key,
204                                           &ctx->pipe_framebuffer));
205       return ctx->batch;
206    }
207 
208    /* If not, look up the job */
209    struct panfrost_batch *batch =
210       panfrost_get_batch(ctx, &ctx->pipe_framebuffer);
211 
212    /* Set this job as the current FBO job. Will be reset when updating the
213     * FB state and when submitting or releasing a job.
214     */
215    ctx->batch = batch;
216    panfrost_dirty_state_all(ctx);
217    return batch;
218 }
219 
220 struct panfrost_batch *
panfrost_get_fresh_batch_for_fbo(struct panfrost_context * ctx,const char * reason)221 panfrost_get_fresh_batch_for_fbo(struct panfrost_context *ctx,
222                                  const char *reason)
223 {
224    struct panfrost_batch *batch;
225 
226    batch = panfrost_get_batch(ctx, &ctx->pipe_framebuffer);
227    panfrost_dirty_state_all(ctx);
228 
229    /* We only need to submit and get a fresh batch if there is no
230     * draw/clear queued. Otherwise we may reuse the batch. */
231 
232    if (batch->draw_count + batch->compute_count > 0) {
233       perf_debug(ctx, "Flushing the current FBO due to: %s", reason);
234       panfrost_batch_submit(ctx, batch);
235       batch = panfrost_get_batch(ctx, &ctx->pipe_framebuffer);
236    }
237 
238    ctx->batch = batch;
239    return batch;
240 }
241 
242 static bool panfrost_batch_uses_resource(struct panfrost_batch *batch,
243                                          struct panfrost_resource *rsrc);
244 
245 static void
panfrost_batch_update_access(struct panfrost_batch * batch,struct panfrost_resource * rsrc,bool writes)246 panfrost_batch_update_access(struct panfrost_batch *batch,
247                              struct panfrost_resource *rsrc, bool writes)
248 {
249    struct panfrost_context *ctx = batch->ctx;
250    uint32_t batch_idx = panfrost_batch_idx(batch);
251 
252    if (writes) {
253       _mesa_hash_table_insert(ctx->writers, rsrc, batch);
254    }
255 
256    /* The rest of this routine is just about flushing other batches. If there
257     * aren't any, we can skip a lot of work.
258     */
259    if (!panfrost_any_batch_other_than(ctx, batch_idx))
260       return;
261 
262    struct hash_entry *entry = _mesa_hash_table_search(ctx->writers, rsrc);
263    struct panfrost_batch *writer = entry ? entry->data : NULL;
264 
265    /* Both reads and writes flush the existing writer */
266    if (writer != NULL && writer != batch)
267       panfrost_batch_submit(ctx, writer);
268 
269    /* Writes (only) flush readers too */
270    if (writes) {
271       unsigned i;
272       foreach_batch(ctx, i) {
273          struct panfrost_batch *batch = &ctx->batches.slots[i];
274 
275          /* Skip the entry if this our batch. */
276          if (i == batch_idx)
277             continue;
278 
279          /* Submit if it's a user */
280          if (panfrost_batch_uses_resource(batch, rsrc))
281             panfrost_batch_submit(ctx, batch);
282       }
283    }
284 }
285 
286 static pan_bo_access *
panfrost_batch_get_bo_access(struct panfrost_batch * batch,unsigned handle)287 panfrost_batch_get_bo_access(struct panfrost_batch *batch, unsigned handle)
288 {
289    unsigned size = util_dynarray_num_elements(&batch->bos, pan_bo_access);
290 
291    if (handle >= size) {
292       unsigned grow = handle + 1 - size;
293 
294       memset(util_dynarray_grow(&batch->bos, pan_bo_access, grow), 0,
295              grow * sizeof(pan_bo_access));
296    }
297 
298    return util_dynarray_element(&batch->bos, pan_bo_access, handle);
299 }
300 
301 static bool
panfrost_batch_uses_resource(struct panfrost_batch * batch,struct panfrost_resource * rsrc)302 panfrost_batch_uses_resource(struct panfrost_batch *batch,
303                              struct panfrost_resource *rsrc)
304 {
305    /* A resource is used iff its current BO is used */
306    uint32_t handle = panfrost_bo_handle(rsrc->bo);
307    unsigned size = util_dynarray_num_elements(&batch->bos, pan_bo_access);
308 
309    /* If out of bounds, certainly not used */
310    if (handle >= size)
311       return false;
312 
313    /* Otherwise check if nonzero access */
314    return !!(*util_dynarray_element(&batch->bos, pan_bo_access, handle));
315 }
316 
317 static void
panfrost_batch_add_bo_old(struct panfrost_batch * batch,struct panfrost_bo * bo,uint32_t flags)318 panfrost_batch_add_bo_old(struct panfrost_batch *batch, struct panfrost_bo *bo,
319                           uint32_t flags)
320 {
321    if (!bo)
322       return;
323 
324    pan_bo_access *entry =
325       panfrost_batch_get_bo_access(batch, panfrost_bo_handle(bo));
326    pan_bo_access old_flags = *entry;
327 
328    if (!old_flags) {
329       batch->num_bos++;
330       panfrost_bo_reference(bo);
331    }
332 
333    if (old_flags == flags)
334       return;
335 
336    flags |= old_flags;
337    *entry = flags;
338 }
339 
340 static uint32_t
panfrost_access_for_stage(enum pipe_shader_type stage)341 panfrost_access_for_stage(enum pipe_shader_type stage)
342 {
343    return (stage == PIPE_SHADER_FRAGMENT) ? PAN_BO_ACCESS_FRAGMENT
344                                           : PAN_BO_ACCESS_VERTEX_TILER;
345 }
346 
347 void
panfrost_batch_add_bo(struct panfrost_batch * batch,struct panfrost_bo * bo,enum pipe_shader_type stage)348 panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo,
349                       enum pipe_shader_type stage)
350 {
351    panfrost_batch_add_bo_old(
352       batch, bo, PAN_BO_ACCESS_READ | panfrost_access_for_stage(stage));
353 }
354 
355 void
panfrost_batch_write_bo(struct panfrost_batch * batch,struct panfrost_bo * bo,enum pipe_shader_type stage)356 panfrost_batch_write_bo(struct panfrost_batch *batch, struct panfrost_bo *bo,
357                         enum pipe_shader_type stage)
358 {
359    panfrost_batch_add_bo_old(
360       batch, bo, PAN_BO_ACCESS_WRITE | panfrost_access_for_stage(stage));
361 }
362 
363 void
panfrost_batch_read_rsrc(struct panfrost_batch * batch,struct panfrost_resource * rsrc,enum pipe_shader_type stage)364 panfrost_batch_read_rsrc(struct panfrost_batch *batch,
365                          struct panfrost_resource *rsrc,
366                          enum pipe_shader_type stage)
367 {
368    uint32_t access = PAN_BO_ACCESS_READ | panfrost_access_for_stage(stage);
369 
370    panfrost_batch_add_bo_old(batch, rsrc->bo, access);
371 
372    if (rsrc->separate_stencil)
373       panfrost_batch_add_bo_old(batch, rsrc->separate_stencil->bo, access);
374 
375    panfrost_batch_update_access(batch, rsrc, false);
376 }
377 
378 void
panfrost_batch_write_rsrc(struct panfrost_batch * batch,struct panfrost_resource * rsrc,enum pipe_shader_type stage)379 panfrost_batch_write_rsrc(struct panfrost_batch *batch,
380                           struct panfrost_resource *rsrc,
381                           enum pipe_shader_type stage)
382 {
383    uint32_t access = PAN_BO_ACCESS_WRITE | panfrost_access_for_stage(stage);
384 
385    panfrost_batch_add_bo_old(batch, rsrc->bo, access);
386 
387    if (rsrc->separate_stencil)
388       panfrost_batch_add_bo_old(batch, rsrc->separate_stencil->bo, access);
389 
390    panfrost_batch_update_access(batch, rsrc, true);
391 }
392 
393 struct panfrost_bo *
panfrost_batch_create_bo(struct panfrost_batch * batch,size_t size,uint32_t create_flags,enum pipe_shader_type stage,const char * label)394 panfrost_batch_create_bo(struct panfrost_batch *batch, size_t size,
395                          uint32_t create_flags, enum pipe_shader_type stage,
396                          const char *label)
397 {
398    struct panfrost_bo *bo;
399 
400    bo = panfrost_bo_create(pan_device(batch->ctx->base.screen), size,
401                            create_flags, label);
402    assert(bo);
403    panfrost_batch_add_bo(batch, bo, stage);
404 
405    /* panfrost_batch_add_bo() has retained a reference and
406     * panfrost_bo_create() initialize the refcnt to 1, so let's
407     * unreference the BO here so it gets released when the batch is
408     * destroyed (unless it's retained by someone else in the meantime).
409     */
410    panfrost_bo_unreference(bo);
411    return bo;
412 }
413 
414 struct panfrost_bo *
panfrost_batch_get_scratchpad(struct panfrost_batch * batch,unsigned size_per_thread,unsigned thread_tls_alloc,unsigned core_id_range)415 panfrost_batch_get_scratchpad(struct panfrost_batch *batch,
416                               unsigned size_per_thread,
417                               unsigned thread_tls_alloc, unsigned core_id_range)
418 {
419    unsigned size = panfrost_get_total_stack_size(
420       size_per_thread, thread_tls_alloc, core_id_range);
421 
422    if (batch->scratchpad) {
423       assert(panfrost_bo_size(batch->scratchpad) >= size);
424    } else {
425       batch->scratchpad =
426          panfrost_batch_create_bo(batch, size, PAN_BO_INVISIBLE,
427                                   PIPE_SHADER_VERTEX, "Thread local storage");
428 
429       panfrost_batch_add_bo(batch, batch->scratchpad, PIPE_SHADER_FRAGMENT);
430    }
431 
432    return batch->scratchpad;
433 }
434 
435 struct panfrost_bo *
panfrost_batch_get_shared_memory(struct panfrost_batch * batch,unsigned size,unsigned workgroup_count)436 panfrost_batch_get_shared_memory(struct panfrost_batch *batch, unsigned size,
437                                  unsigned workgroup_count)
438 {
439    if (batch->shared_memory) {
440       assert(panfrost_bo_size(batch->shared_memory) >= size);
441    } else {
442       batch->shared_memory = panfrost_batch_create_bo(
443          batch, size, PAN_BO_INVISIBLE, PIPE_SHADER_VERTEX,
444          "Workgroup shared memory");
445    }
446 
447    return batch->shared_memory;
448 }
449 
450 static void
panfrost_batch_to_fb_info(const struct panfrost_batch * batch,struct pan_fb_info * fb,struct pan_image_view * rts,struct pan_image_view * zs,struct pan_image_view * s,bool reserve)451 panfrost_batch_to_fb_info(const struct panfrost_batch *batch,
452                           struct pan_fb_info *fb, struct pan_image_view *rts,
453                           struct pan_image_view *zs, struct pan_image_view *s,
454                           bool reserve)
455 {
456    struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
457 
458    memset(fb, 0, sizeof(*fb));
459    memset(rts, 0, sizeof(*rts) * 8);
460    memset(zs, 0, sizeof(*zs));
461    memset(s, 0, sizeof(*s));
462 
463    fb->tile_buf_budget = dev->optimal_tib_size;
464    fb->width = batch->key.width;
465    fb->height = batch->key.height;
466    fb->extent.minx = batch->minx;
467    fb->extent.miny = batch->miny;
468    fb->extent.maxx = batch->maxx - 1;
469    fb->extent.maxy = batch->maxy - 1;
470    fb->nr_samples = util_framebuffer_get_num_samples(&batch->key);
471    fb->force_samples = pan_tristate_get(batch->line_smoothing) ? 16 : 0;
472    fb->rt_count = batch->key.nr_cbufs;
473    fb->sprite_coord_origin = pan_tristate_get(batch->sprite_coord_origin);
474    fb->first_provoking_vertex = pan_tristate_get(batch->first_provoking_vertex);
475 
476    static const unsigned char id_swz[] = {
477       PIPE_SWIZZLE_X,
478       PIPE_SWIZZLE_Y,
479       PIPE_SWIZZLE_Z,
480       PIPE_SWIZZLE_W,
481    };
482 
483    for (unsigned i = 0; i < fb->rt_count; i++) {
484       struct pipe_surface *surf = batch->key.cbufs[i];
485 
486       if (!surf)
487          continue;
488 
489       struct panfrost_resource *prsrc = pan_resource(surf->texture);
490       unsigned mask = PIPE_CLEAR_COLOR0 << i;
491 
492       if (batch->clear & mask) {
493          fb->rts[i].clear = true;
494          memcpy(fb->rts[i].clear_value, batch->clear_color[i],
495                 sizeof((fb->rts[i].clear_value)));
496       }
497 
498       fb->rts[i].discard = !reserve && !(batch->resolve & mask);
499 
500       /* Clamp the rendering area to the damage extent. The
501        * KHR_partial_update spec states that trying to render outside of
502        * the damage region is "undefined behavior", so we should be safe.
503        */
504       if (!fb->rts[i].discard) {
505          fb->extent.minx = MAX2(fb->extent.minx, prsrc->damage.extent.minx);
506          fb->extent.miny = MAX2(fb->extent.miny, prsrc->damage.extent.miny);
507          fb->extent.maxx = MIN2(fb->extent.maxx, prsrc->damage.extent.maxx - 1);
508          fb->extent.maxy = MIN2(fb->extent.maxy, prsrc->damage.extent.maxy - 1);
509          assert(fb->extent.minx <= fb->extent.maxx);
510          assert(fb->extent.miny <= fb->extent.maxy);
511       }
512 
513       rts[i].format = surf->format;
514       rts[i].dim = MALI_TEXTURE_DIMENSION_2D;
515       rts[i].last_level = rts[i].first_level = surf->u.tex.level;
516       rts[i].first_layer = surf->u.tex.first_layer;
517       rts[i].last_layer = surf->u.tex.last_layer;
518       panfrost_set_image_view_planes(&rts[i], surf->texture);
519       rts[i].nr_samples =
520          surf->nr_samples ?: MAX2(surf->texture->nr_samples, 1);
521       memcpy(rts[i].swizzle, id_swz, sizeof(rts[i].swizzle));
522       fb->rts[i].crc_valid = &prsrc->valid.crc;
523       fb->rts[i].view = &rts[i];
524 
525       /* Preload if the RT is read or updated */
526       if (!(batch->clear & mask) &&
527           ((batch->read & mask) ||
528            ((batch->draws & mask) &&
529             BITSET_TEST(prsrc->valid.data, fb->rts[i].view->first_level))))
530          fb->rts[i].preload = true;
531    }
532 
533    const struct pan_image_view *s_view = NULL, *z_view = NULL;
534    struct panfrost_resource *z_rsrc = NULL, *s_rsrc = NULL;
535 
536    if (batch->key.zsbuf) {
537       struct pipe_surface *surf = batch->key.zsbuf;
538       z_rsrc = pan_resource(surf->texture);
539 
540       zs->format = surf->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
541                       ? PIPE_FORMAT_Z32_FLOAT
542                       : surf->format;
543       zs->dim = MALI_TEXTURE_DIMENSION_2D;
544       zs->last_level = zs->first_level = surf->u.tex.level;
545       zs->first_layer = surf->u.tex.first_layer;
546       zs->last_layer = surf->u.tex.last_layer;
547       zs->planes[0] = &z_rsrc->image;
548       zs->nr_samples = surf->nr_samples ?: MAX2(surf->texture->nr_samples, 1);
549       memcpy(zs->swizzle, id_swz, sizeof(zs->swizzle));
550       fb->zs.view.zs = zs;
551       z_view = zs;
552       if (util_format_is_depth_and_stencil(zs->format)) {
553          s_view = zs;
554          s_rsrc = z_rsrc;
555       }
556 
557       if (z_rsrc->separate_stencil) {
558          s_rsrc = z_rsrc->separate_stencil;
559          s->format = PIPE_FORMAT_S8_UINT;
560          s->dim = MALI_TEXTURE_DIMENSION_2D;
561          s->last_level = s->first_level = surf->u.tex.level;
562          s->first_layer = surf->u.tex.first_layer;
563          s->last_layer = surf->u.tex.last_layer;
564          s->planes[0] = &s_rsrc->image;
565          s->nr_samples = surf->nr_samples ?: MAX2(surf->texture->nr_samples, 1);
566          memcpy(s->swizzle, id_swz, sizeof(s->swizzle));
567          fb->zs.view.s = s;
568          s_view = s;
569       }
570    }
571 
572    if (batch->clear & PIPE_CLEAR_DEPTH) {
573       fb->zs.clear.z = true;
574       fb->zs.clear_value.depth = batch->clear_depth;
575    }
576 
577    if (batch->clear & PIPE_CLEAR_STENCIL) {
578       fb->zs.clear.s = true;
579       fb->zs.clear_value.stencil = batch->clear_stencil;
580    }
581 
582    fb->zs.discard.z = !reserve && !(batch->resolve & PIPE_CLEAR_DEPTH);
583    fb->zs.discard.s = !reserve && !(batch->resolve & PIPE_CLEAR_STENCIL);
584 
585    if (!fb->zs.clear.z && z_rsrc &&
586        ((batch->read & PIPE_CLEAR_DEPTH) ||
587         ((batch->draws & PIPE_CLEAR_DEPTH) &&
588          BITSET_TEST(z_rsrc->valid.data, z_view->first_level))))
589       fb->zs.preload.z = true;
590 
591    if (!fb->zs.clear.s && s_rsrc &&
592        ((batch->read & PIPE_CLEAR_STENCIL) ||
593         ((batch->draws & PIPE_CLEAR_STENCIL) &&
594          BITSET_TEST(s_rsrc->valid.data, s_view->first_level))))
595       fb->zs.preload.s = true;
596 
597    /* Preserve both component if we have a combined ZS view and
598     * one component needs to be preserved.
599     */
600    if (z_view && z_view == s_view && fb->zs.discard.z != fb->zs.discard.s) {
601       bool valid = BITSET_TEST(z_rsrc->valid.data, z_view->first_level);
602 
603       fb->zs.discard.z = false;
604       fb->zs.discard.s = false;
605       fb->zs.preload.z = !fb->zs.clear.z && valid;
606       fb->zs.preload.s = !fb->zs.clear.s && valid;
607    }
608 }
609 
610 static void
panfrost_emit_tile_map(struct panfrost_batch * batch,struct pan_fb_info * fb)611 panfrost_emit_tile_map(struct panfrost_batch *batch, struct pan_fb_info *fb)
612 {
613    if (batch->key.nr_cbufs < 1 || !batch->key.cbufs[0])
614       return;
615 
616    struct pipe_surface *surf = batch->key.cbufs[0];
617    struct panfrost_resource *pres = surf ? pan_resource(surf->texture) : NULL;
618 
619    if (pres && pres->damage.tile_map.enable) {
620       fb->tile_map.base =
621          pan_pool_upload_aligned(&batch->pool.base, pres->damage.tile_map.data,
622                                  pres->damage.tile_map.size, 64);
623       fb->tile_map.stride = pres->damage.tile_map.stride;
624    }
625 }
626 
627 static void
panfrost_batch_submit(struct panfrost_context * ctx,struct panfrost_batch * batch)628 panfrost_batch_submit(struct panfrost_context *ctx,
629                       struct panfrost_batch *batch)
630 {
631    struct pipe_screen *pscreen = ctx->base.screen;
632    struct panfrost_screen *screen = pan_screen(pscreen);
633    bool has_frag = panfrost_has_fragment_job(batch);
634    int ret;
635 
636    /* Nothing to do! */
637    if (!has_frag && batch->compute_count == 0 && !batch->has_time_query)
638       goto out;
639 
640    if (batch->key.zsbuf && has_frag) {
641       struct pipe_surface *surf = batch->key.zsbuf;
642       struct panfrost_resource *z_rsrc = pan_resource(surf->texture);
643 
644       /* if there are multiple levels or layers, we optimize only the first */
645       if (surf->u.tex.level == 0 && surf->u.tex.first_layer == 0) {
646          /* Shared depth/stencil resources are not supported, and would
647           * break this optimisation. */
648          assert(!(z_rsrc->base.bind & PAN_BIND_SHARED_MASK));
649 
650          if (batch->clear & PIPE_CLEAR_STENCIL) {
651             z_rsrc->stencil_value = batch->clear_stencil;
652             z_rsrc->constant_stencil = true;
653          } else if (z_rsrc->constant_stencil) {
654             batch->clear_stencil = z_rsrc->stencil_value;
655             batch->clear |= PIPE_CLEAR_STENCIL;
656          }
657       }
658 
659       if (batch->draws & PIPE_CLEAR_STENCIL)
660          z_rsrc->constant_stencil = false;
661    }
662 
663    struct pan_fb_info fb;
664    struct pan_image_view rts[8], zs, s;
665 
666    panfrost_batch_to_fb_info(batch, &fb, rts, &zs, &s, false);
667    panfrost_emit_tile_map(batch, &fb);
668 
669    ret = screen->vtbl.submit_batch(batch, &fb);
670    if (ret)
671       fprintf(stderr, "panfrost_batch_submit failed: %d\n", ret);
672 
673    /* We must reset the damage info of our render targets here even
674     * though a damage reset normally happens when the DRI layer swaps
675     * buffers. That's because there can be implicit flushes the GL
676     * app is not aware of, and those might impact the damage region: if
677     * part of the damaged portion is drawn during those implicit flushes,
678     * you have to reload those areas before next draws are pushed, and
679     * since the driver can't easily know what's been modified by the draws
680     * it flushed, the easiest solution is to reload everything.
681     */
682    for (unsigned i = 0; i < batch->key.nr_cbufs; i++) {
683       if (!batch->key.cbufs[i])
684          continue;
685 
686       panfrost_resource_set_damage_region(
687          ctx->base.screen, batch->key.cbufs[i]->texture, 0, NULL);
688    }
689 
690 out:
691    panfrost_batch_cleanup(ctx, batch);
692 }
693 
694 /* Submit all batches */
695 
696 void
panfrost_flush_all_batches(struct panfrost_context * ctx,const char * reason)697 panfrost_flush_all_batches(struct panfrost_context *ctx, const char *reason)
698 {
699    if (reason)
700       perf_debug(ctx, "Flushing everything due to: %s", reason);
701 
702    struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
703    panfrost_batch_submit(ctx, batch);
704 
705    for (unsigned i = 0; i < PAN_MAX_BATCHES; i++) {
706       if (ctx->batches.slots[i].seqnum)
707          panfrost_batch_submit(ctx, &ctx->batches.slots[i]);
708    }
709 }
710 
711 void
panfrost_flush_writer(struct panfrost_context * ctx,struct panfrost_resource * rsrc,const char * reason)712 panfrost_flush_writer(struct panfrost_context *ctx,
713                       struct panfrost_resource *rsrc, const char *reason)
714 {
715    struct hash_entry *entry = _mesa_hash_table_search(ctx->writers, rsrc);
716 
717    if (entry) {
718       perf_debug(ctx, "Flushing writer due to: %s", reason);
719       panfrost_batch_submit(ctx, entry->data);
720    }
721 }
722 
723 void
panfrost_flush_batches_accessing_rsrc(struct panfrost_context * ctx,struct panfrost_resource * rsrc,const char * reason)724 panfrost_flush_batches_accessing_rsrc(struct panfrost_context *ctx,
725                                       struct panfrost_resource *rsrc,
726                                       const char *reason)
727 {
728    unsigned i;
729    foreach_batch(ctx, i) {
730       struct panfrost_batch *batch = &ctx->batches.slots[i];
731 
732       if (!panfrost_batch_uses_resource(batch, rsrc))
733          continue;
734 
735       perf_debug(ctx, "Flushing user due to: %s", reason);
736       panfrost_batch_submit(ctx, batch);
737    }
738 }
739 
740 bool
panfrost_any_batch_reads_rsrc(struct panfrost_context * ctx,struct panfrost_resource * rsrc)741 panfrost_any_batch_reads_rsrc(struct panfrost_context *ctx,
742                               struct panfrost_resource *rsrc)
743 {
744    unsigned i;
745    foreach_batch(ctx, i) {
746       struct panfrost_batch *batch = &ctx->batches.slots[i];
747 
748       if (panfrost_batch_uses_resource(batch, rsrc))
749          return true;
750    }
751 
752    return false;
753 }
754 
755 bool
panfrost_any_batch_writes_rsrc(struct panfrost_context * ctx,struct panfrost_resource * rsrc)756 panfrost_any_batch_writes_rsrc(struct panfrost_context *ctx,
757                                struct panfrost_resource *rsrc)
758 {
759    return _mesa_hash_table_search(ctx->writers, rsrc) != NULL;
760 }
761 
762 void
panfrost_batch_adjust_stack_size(struct panfrost_batch * batch)763 panfrost_batch_adjust_stack_size(struct panfrost_batch *batch)
764 {
765    struct panfrost_context *ctx = batch->ctx;
766 
767    for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) {
768       struct panfrost_compiled_shader *ss = ctx->prog[i];
769 
770       if (!ss)
771          continue;
772 
773       batch->stack_size = MAX2(batch->stack_size, ss->info.tls_size);
774    }
775 }
776 
777 void
panfrost_batch_clear(struct panfrost_batch * batch,unsigned buffers,const union pipe_color_union * color,double depth,unsigned stencil)778 panfrost_batch_clear(struct panfrost_batch *batch, unsigned buffers,
779                      const union pipe_color_union *color, double depth,
780                      unsigned stencil)
781 {
782    struct panfrost_context *ctx = batch->ctx;
783    struct panfrost_device *dev = pan_device(ctx->base.screen);
784 
785    if (buffers & PIPE_CLEAR_COLOR) {
786       for (unsigned i = 0; i < ctx->pipe_framebuffer.nr_cbufs; ++i) {
787          if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
788             continue;
789 
790          enum pipe_format format = ctx->pipe_framebuffer.cbufs[i]->format;
791          pan_pack_color(dev->blendable_formats, batch->clear_color[i], color,
792                         format, false);
793       }
794    }
795 
796    if (buffers & PIPE_CLEAR_DEPTH) {
797       batch->clear_depth = depth;
798    }
799 
800    if (buffers & PIPE_CLEAR_STENCIL) {
801       batch->clear_stencil = stencil;
802    }
803 
804    batch->clear |= buffers;
805    batch->resolve |= buffers;
806 
807    /* Clearing affects the entire framebuffer (by definition -- this is
808     * the Gallium clear callback, which clears the whole framebuffer. If
809     * the scissor test were enabled from the GL side, the gallium frontend
810     * would emit a quad instead and we wouldn't go down this code path) */
811 
812    panfrost_batch_union_scissor(batch, 0, 0, ctx->pipe_framebuffer.width,
813                                 ctx->pipe_framebuffer.height);
814 }
815 
816 /* Given a new bounding rectangle (scissor), let the job cover the union of the
817  * new and old bounding rectangles */
818 
819 void
panfrost_batch_union_scissor(struct panfrost_batch * batch,unsigned minx,unsigned miny,unsigned maxx,unsigned maxy)820 panfrost_batch_union_scissor(struct panfrost_batch *batch, unsigned minx,
821                              unsigned miny, unsigned maxx, unsigned maxy)
822 {
823    batch->minx = MIN2(batch->minx, minx);
824    batch->miny = MIN2(batch->miny, miny);
825    batch->maxx = MAX2(batch->maxx, maxx);
826    batch->maxy = MAX2(batch->maxy, maxy);
827 }
828 
829 /**
830  * Checks if rasterization should be skipped. If not, a TILER job must be
831  * created for each draw, or the IDVS flow must be used.
832  *
833  * As a special case, if there is no vertex shader, no primitives are generated,
834  * meaning the whole pipeline (including rasterization) should be skipped.
835  */
836 bool
panfrost_batch_skip_rasterization(struct panfrost_batch * batch)837 panfrost_batch_skip_rasterization(struct panfrost_batch *batch)
838 {
839    struct panfrost_context *ctx = batch->ctx;
840    struct pipe_rasterizer_state *rast = (void *)ctx->rasterizer;
841 
842    return (rast->rasterizer_discard || batch->scissor_culls_everything ||
843            !batch->rsd[PIPE_SHADER_VERTEX]);
844 }
845