xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/freedreno/freedreno_context.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2012 Rob Clark <[email protected]>
3  * SPDX-License-Identifier: MIT
4  *
5  * Authors:
6  *    Rob Clark <[email protected]>
7  */
8 
9 #ifndef FREEDRENO_CONTEXT_H_
10 #define FREEDRENO_CONTEXT_H_
11 
12 #include "pipe/p_context.h"
13 #include "util/libsync.h"
14 #include "util/list.h"
15 #include "util/slab.h"
16 #include "util/u_blitter.h"
17 #include "util/u_string.h"
18 #include "util/u_threaded_context.h"
19 #include "util/perf/u_trace.h"
20 
21 #include "freedreno_autotune.h"
22 #include "freedreno_gmem.h"
23 #include "freedreno_perfetto.h"
24 #include "freedreno_screen.h"
25 #include "freedreno_util.h"
26 
27 #ifdef __cplusplus
28 extern "C" {
29 #endif
30 
31 #define BORDER_COLOR_UPLOAD_SIZE (2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE)
32 
33 struct fd_vertex_stateobj;
34 struct fd_batch;
35 
36 struct fd_texture_stateobj {
37    struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS];
38    unsigned num_textures;
39    unsigned valid_textures;
40    struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS];
41    unsigned num_samplers;
42    unsigned valid_samplers;
43 };
44 
45 struct fd_program_stateobj {
46    void *vs, *hs, *ds, *gs, *fs;
47 };
48 
49 struct fd_constbuf_stateobj {
50    struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS];
51    uint32_t enabled_mask;
52 };
53 
54 struct fd_shaderbuf_stateobj {
55    struct pipe_shader_buffer sb[PIPE_MAX_SHADER_BUFFERS];
56    uint32_t enabled_mask;
57    uint32_t writable_mask;
58 };
59 
60 struct fd_shaderimg_stateobj {
61    struct pipe_image_view si[PIPE_MAX_SHADER_IMAGES];
62    uint32_t enabled_mask;
63 };
64 
65 struct fd_vertexbuf_stateobj {
66    struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
67    unsigned count;
68    uint32_t enabled_mask;
69 };
70 
71 struct fd_vertex_stateobj {
72    struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS];
73    unsigned strides[PIPE_MAX_ATTRIBS];
74    unsigned num_elements;
75 };
76 
77 struct fd_stream_output_target {
78    struct pipe_stream_output_target base;
79    struct pipe_resource *offset_buf;
80    /* stride of the last stream out recorded to this target, for
81     * glDrawTransformFeedback(). */
82    uint32_t stride;
83 };
84 
85 struct fd_streamout_stateobj {
86    struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
87    /* Bitmask of stream that should be reset. */
88    unsigned reset;
89 
90    unsigned num_targets;
91    /* Track offset from vtxcnt for streamout data.  This counter
92     * is just incremented by # of vertices on each draw until
93     * reset or new streamout buffer bound.
94     *
95     * When we eventually have GS, the CPU won't actually know the
96     * number of vertices per draw, so I think we'll have to do
97     * something more clever.
98     */
99    unsigned offsets[PIPE_MAX_SO_BUFFERS];
100 
101    /* Pre-a6xx, the maximum number of vertices that could be recorded to this
102     * set of targets with the current vertex shader.  a6xx and newer, hardware
103     * queries are used.
104     */
105    unsigned max_tf_vtx;
106 
107    /* Pre-a6xx, the number of verts written to the buffers since the last
108     * Begin.  Used for overflow checking for SW queries.
109     */
110    unsigned verts_written;
111 };
112 
113 #define MAX_GLOBAL_BUFFERS 16
114 struct fd_global_bindings_stateobj {
115    struct pipe_resource *buf[MAX_GLOBAL_BUFFERS];
116    uint32_t enabled_mask;
117 };
118 
119 /* group together the vertex and vertexbuf state.. for ease of passing
120  * around, and because various internal operations (gmem<->mem, etc)
121  * need their own vertex state:
122  */
123 struct fd_vertex_state {
124    struct fd_vertex_stateobj *vtx;
125    struct fd_vertexbuf_stateobj vertexbuf;
126 };
127 
128 /* global 3d pipeline dirty state: */
129 enum fd_dirty_3d_state {
130    FD_DIRTY_BLEND = BIT(0),
131    FD_DIRTY_RASTERIZER = BIT(1),
132    FD_DIRTY_ZSA = BIT(2),
133    FD_DIRTY_BLEND_COLOR = BIT(3),
134    FD_DIRTY_STENCIL_REF = BIT(4),
135    FD_DIRTY_SAMPLE_MASK = BIT(5),
136    FD_DIRTY_FRAMEBUFFER = BIT(6),
137    FD_DIRTY_STIPPLE = BIT(7),
138    FD_DIRTY_VIEWPORT = BIT(8),
139    FD_DIRTY_VTXSTATE = BIT(9),
140    FD_DIRTY_VTXBUF = BIT(10),
141    FD_DIRTY_MIN_SAMPLES = BIT(11),
142    FD_DIRTY_SCISSOR = BIT(12),
143    FD_DIRTY_STREAMOUT = BIT(13),
144    FD_DIRTY_UCP = BIT(14),
145    FD_DIRTY_PROG = BIT(15),
146    FD_DIRTY_CONST = BIT(16),
147    FD_DIRTY_TEX = BIT(17),
148    FD_DIRTY_IMAGE = BIT(18),
149    FD_DIRTY_SSBO = BIT(19),
150    FD_DIRTY_QUERY = BIT(20),
151    FD_DIRTY_SAMPLE_LOCATIONS = BIT(21),
152 
153    /* only used by a2xx.. possibly can be removed.. */
154    FD_DIRTY_TEXSTATE = BIT(22),
155 
156    /* fine grained state changes, for cases where state is not orthogonal
157     * from hw perspective:
158     */
159    FD_DIRTY_RASTERIZER_DISCARD = BIT(24),
160    FD_DIRTY_RASTERIZER_CLIP_PLANE_ENABLE = BIT(25),
161    FD_DIRTY_BLEND_DUAL = BIT(26),
162    FD_DIRTY_BLEND_COHERENT = BIT(27),
163 #define NUM_DIRTY_BITS 28
164 };
165 
166 static inline void
fd_print_dirty_state(BITMASK_ENUM (fd_dirty_3d_state)dirty)167 fd_print_dirty_state(BITMASK_ENUM(fd_dirty_3d_state) dirty)
168 {
169 #if MESA_DEBUG
170    if (!FD_DBG(MSGS))
171       return;
172 
173    struct {
174       enum fd_dirty_3d_state state;
175       const char *name;
176    } tbl[] = {
177 #define STATE(n) { FD_DIRTY_ ## n, #n }
178          STATE(BLEND),
179          STATE(RASTERIZER),
180          STATE(ZSA),
181          STATE(BLEND_COLOR),
182          STATE(STENCIL_REF),
183          STATE(SAMPLE_MASK),
184          STATE(FRAMEBUFFER),
185          STATE(STIPPLE),
186          STATE(VIEWPORT),
187          STATE(VTXSTATE),
188          STATE(VTXBUF),
189          STATE(MIN_SAMPLES),
190          STATE(SCISSOR),
191          STATE(STREAMOUT),
192          STATE(UCP),
193          STATE(PROG),
194          STATE(CONST),
195          STATE(TEX),
196          STATE(IMAGE),
197          STATE(SSBO),
198          STATE(QUERY),
199          STATE(TEXSTATE),
200          STATE(RASTERIZER_DISCARD),
201          STATE(RASTERIZER_CLIP_PLANE_ENABLE),
202          STATE(BLEND_DUAL),
203          STATE(BLEND_COHERENT),
204 #undef STATE
205    };
206 
207    struct log_stream *s = mesa_log_streami();
208 
209    mesa_log_stream_printf(s, "dirty:");
210 
211    if ((uint32_t)dirty == ~0) {
212       mesa_log_stream_printf(s, " ALL");
213       dirty = 0;
214    }
215 
216    for (unsigned i = 0; i < ARRAY_SIZE(tbl); i++) {
217       if (dirty & tbl[i].state) {
218          mesa_log_stream_printf(s, " %s", tbl[i].name);
219          dirty &= ~tbl[i].state;
220       }
221    }
222 
223    assert(!dirty);
224 
225    mesa_log_stream_destroy(s);
226 #endif
227 }
228 
229 /* per shader-stage dirty state: */
230 enum fd_dirty_shader_state {
231    FD_DIRTY_SHADER_PROG = BIT(0),
232    FD_DIRTY_SHADER_CONST = BIT(1),
233    FD_DIRTY_SHADER_TEX = BIT(2),
234    FD_DIRTY_SHADER_SSBO = BIT(3),
235    FD_DIRTY_SHADER_IMAGE = BIT(4),
236 #define NUM_DIRTY_SHADER_BITS 5
237 };
238 
239 enum fd_buffer_mask {
240    /* align bitmask values w/ PIPE_CLEAR_*.. since that is convenient.. */
241    FD_BUFFER_COLOR = PIPE_CLEAR_COLOR,
242    FD_BUFFER_DEPTH = PIPE_CLEAR_DEPTH,
243    FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL,
244    FD_BUFFER_ALL = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL,
245 
246    /* A special internal buffer bit to signify that the LRZ buffer needs
247     * clearing
248     */
249    FD_BUFFER_LRZ = BIT(15),
250 };
251 
252 #define MAX_HW_SAMPLE_PROVIDERS 10
253 struct fd_hw_sample_provider;
254 struct fd_hw_sample;
255 
256 struct ir3_shader_key;
257 
258 struct fd_context {
259    struct pipe_context base;
260 
261    unsigned flags;      /* PIPE_CONTEXT_x */
262 
263    struct threaded_context *tc;
264 
265    struct list_head node; /* node in screen->context_list */
266 
267    /* We currently need to serialize emitting GMEM batches, because of
268     * VSC state access in the context.
269     *
270     * In practice this lock should not be contended, since pipe_context
271     * use should be single threaded.  But it is needed to protect the
272     * case, with batch reordering where a ctxB batch triggers flushing
273     * a ctxA batch
274     */
275    simple_mtx_t gmem_lock;
276 
277    struct fd_device *dev;
278    struct fd_screen *screen;
279    struct fd_pipe *pipe;
280 
281    struct blitter_context *blitter dt;
282    void *clear_rs_state[2] dt;
283 
284    /* slab for pipe_transfer allocations: */
285    struct slab_child_pool transfer_pool dt;
286    struct slab_child_pool transfer_pool_unsync; /* for threaded_context */
287 
288    struct fd_autotune autotune dt;
289 
290    /**
291     * query related state:
292     */
293    /*@{*/
294    /* slabs for fd_hw_sample and fd_hw_sample_period allocations: */
295    struct slab_mempool sample_pool dt;
296    struct slab_mempool sample_period_pool dt;
297 
298    /* sample-providers for hw queries: */
299    const struct fd_hw_sample_provider
300       *hw_sample_providers[MAX_HW_SAMPLE_PROVIDERS];
301 
302    /* list of active queries: */
303    struct list_head hw_active_queries dt;
304 
305    /* sample-providers for accumulating hw queries: */
306    const struct fd_acc_sample_provider
307       *acc_sample_providers[MAX_HW_SAMPLE_PROVIDERS];
308 
309    /* list of active accumulating queries: */
310    struct list_head acc_active_queries dt;
311    /*@}*/
312 
313    float default_outer_level[4] dt;
314    float default_inner_level[2] dt;
315    uint8_t patch_vertices dt;
316 
317    /* Current state of pctx->set_active_query_state() (i.e. "should drawing
318     * be counted against non-perfcounter queries")
319     */
320    bool active_queries dt;
321 
322    /* shaders used by clear, and gmem->mem blits: */
323    struct fd_program_stateobj solid_prog; // TODO move to screen?
324    struct fd_program_stateobj solid_layered_prog;
325 
326    /* shaders used by mem->gmem blits: */
327    struct fd_program_stateobj
328       blit_prog[MAX_RENDER_TARGETS]; // TODO move to screen?
329    struct fd_program_stateobj blit_z, blit_zs;
330 
331    /* Stats/counters:
332     */
333    struct {
334       uint64_t prims_emitted;
335       uint64_t prims_generated;
336       uint64_t draw_calls;
337       uint64_t batch_total, batch_sysmem, batch_gmem, batch_nondraw,
338          batch_restore;
339       uint64_t staging_uploads, shadow_uploads;
340       uint64_t vs_regs, hs_regs, ds_regs, gs_regs, fs_regs;
341    } stats dt;
342 
343    /* Counter for number of users who need sw counters (so we can
344     * skip collecting them when not needed)
345     */
346    unsigned stats_users;
347 
348    /* Current batch.. the rule here is that you can deref ctx->batch
349     * in codepaths from pipe_context entrypoints.  But not in code-
350     * paths from fd_batch_flush() (basically, the stuff that gets
351     * called from GMEM code), since in those code-paths the batch
352     * you care about is not necessarily the same as ctx->batch.
353     */
354    struct fd_batch *batch dt;
355 
356    /* Current nondraw batch.  Rules are the same as for draw batch.
357     */
358    struct fd_batch *batch_nondraw dt;
359 
360    /* NULL if there has been rendering since last flush.  Otherwise
361     * keeps a reference to the last fence so we can re-use it rather
362     * than having to flush no-op batch.
363     */
364    struct pipe_fence_handle *last_fence dt;
365 
366    /*
367     * Counter to keep track of batch's most recent update.  Ie. the batch with
368     * the higher update count is the one that has been drawn/etc to the most
369     * recently (and therefore shouldn't have any other batch that should be
370     * flushed after it).  This is used to figure out which fence to use at
371     * context flush time.
372     */
373    uint32_t update_count;
374 
375    /* Fence fd we are told to wait on via ->fence_server_sync() (or -1
376     * if none).  The in-fence is transferred over to the batch on the
377     * next draw/blit/grid.
378     *
379     * The reason for this extra complexity is that apps will typically
380     * do eglWaitSyncKHR()/etc at the beginning of the frame, before the
381     * first draw.  But mesa/st doesn't flush down framebuffer state
382     * change until we hit a draw, so at ->fence_server_sync() time, we
383     * don't yet have the correct batch.  If we created a batch at that
384     * point, it would be the wrong one, and we'd have to flush it pre-
385     * maturely, causing us to stall early in the frame where we could
386     * be building up cmdstream.
387     */
388    int in_fence_fd dt;
389 
390    /**
391     * If we *ever* see an in-fence-fd, assume that userspace is
392     * not relying on implicit fences.
393     */
394    bool no_implicit_sync;
395 
396    /* track last known reset status globally and per-context to
397     * determine if more resets occurred since then.  If global reset
398     * count increases, it means some other context crashed.  If
399     * per-context reset count increases, it means we crashed the
400     * gpu.
401     *
402     * Only accessed by front-end thread, never accessed by TC driver
403     * thread.
404     */
405    uint32_t context_reset_count;
406    uint32_t global_reset_count;
407 
408    /* Context sequence #, used for batch-cache key: */
409    uint16_t seqno;
410 
411    /* Cost per draw, used in conjunction with samples-passed history to
412     * estimate whether GMEM or bypass is the better option.
413     */
414    uint8_t draw_cost;
415 
416    /* Are we in process of shadowing a resource? Used to detect recursion
417     * in transfer_map, and skip unneeded synchronization.
418     */
419    bool in_shadow : 1 dt;
420 
421    /* For catching recursion problems with blit fallback: */
422    bool in_blit : 1 dt;
423 
424    /* points to either scissor or disabled_scissor depending on rast state: */
425    struct pipe_scissor_state *current_scissor dt;
426 
427    /* Note that all the scissor state that is traced is inclusive, ie the
428     * maxiumum maxx is one less than the width.
429     */
430    struct pipe_scissor_state scissor[PIPE_MAX_VIEWPORTS] dt;
431 
432    /* we don't have a disable/enable bit for scissor, so instead we keep
433     * a disabled-scissor state which matches the entire bound framebuffer
434     * and use that when scissor is not enabled.
435     */
436    struct pipe_scissor_state disabled_scissor[PIPE_MAX_VIEWPORTS] dt;
437 
438    /* Per vsc pipe bo's (a2xx-a5xx): */
439    struct fd_bo *vsc_pipe_bo[32] dt;
440 
441    /* Table of bo's attached to all batches up-front (because they
442     * are commonly used, and that is easier than attaching on-use).
443     * In particular, these are driver internal buffers which do not
444     * participate in batch resource tracking.
445     */
446    struct fd_bo *private_bos[3];
447    unsigned num_private_bos;
448 
449    /* Maps generic gallium oriented fd_dirty_3d_state bits to generation
450     * specific bitmask of state "groups".
451     */
452    uint32_t gen_dirty_map[NUM_DIRTY_BITS];
453    uint32_t gen_dirty_shader_map[PIPE_SHADER_TYPES][NUM_DIRTY_SHADER_BITS];
454 
455    /* Bitmask of all possible gen_dirty bits: */
456    uint32_t gen_all_dirty;
457 
458    /* Generation specific bitmask of dirty state groups: */
459    uint32_t gen_dirty;
460 
461    /* which state objects need to be re-emit'd: */
462    BITMASK_ENUM(fd_dirty_3d_state) dirty dt;
463 
464    /* As above, but also needs draw time resource tracking: */
465    BITMASK_ENUM(fd_dirty_3d_state) dirty_resource dt;
466 
467    /* per shader-stage dirty status: */
468    BITMASK_ENUM(fd_dirty_shader_state) dirty_shader[PIPE_SHADER_TYPES] dt;
469 
470    /* As above, but also needs draw time resource tracking: */
471    BITMASK_ENUM(fd_dirty_shader_state) dirty_shader_resource[PIPE_SHADER_TYPES] dt;
472 
473    void *compute dt;
474    struct pipe_blend_state *blend dt;
475    struct pipe_rasterizer_state *rasterizer dt;
476    struct pipe_depth_stencil_alpha_state *zsa dt;
477 
478    struct fd_texture_stateobj tex[PIPE_SHADER_TYPES] dt;
479 
480    struct fd_program_stateobj prog dt;
481    uint32_t bound_shader_stages dt;
482 
483    struct fd_vertex_state vtx dt;
484 
485    struct pipe_blend_color blend_color dt;
486    struct pipe_stencil_ref stencil_ref dt;
487    unsigned sample_mask dt;
488    unsigned min_samples dt;
489 
490    /* 1x1 grid, max 4x MSAA: */
491    uint8_t sample_locations[4] dt;
492    bool sample_locations_enabled dt;
493 
494    /* local context fb state, for when ctx->batch is null: */
495    struct pipe_framebuffer_state framebuffer dt;
496    uint32_t all_mrt_channel_mask dt;
497 
498    struct pipe_poly_stipple stipple dt;
499    struct pipe_viewport_state viewport[PIPE_MAX_VIEWPORTS] dt;
500    struct pipe_scissor_state viewport_scissor[PIPE_MAX_VIEWPORTS] dt;
501    struct {
502       unsigned x, y;
503    } guardband dt;
504    struct fd_constbuf_stateobj constbuf[PIPE_SHADER_TYPES] dt;
505    struct fd_shaderbuf_stateobj shaderbuf[PIPE_SHADER_TYPES] dt;
506    struct fd_shaderimg_stateobj shaderimg[PIPE_SHADER_TYPES] dt;
507    struct fd_streamout_stateobj streamout dt;
508    struct fd_global_bindings_stateobj global_bindings dt;
509    struct pipe_clip_state ucp dt;
510 
511    struct pipe_query *cond_query dt;
512    bool cond_cond dt; /* inverted rendering condition */
513    uint cond_mode dt;
514 
515    /* Private memory is a memory space where each fiber gets its own piece of
516     * memory, in addition to registers. It is backed by a buffer which needs
517     * to be large enough to hold the contents of every possible wavefront in
518     * every core of the GPU. Because it allocates space via the internal
519     * wavefront ID which is shared between all currently executing shaders,
520     * the same buffer can be reused by all shaders, as long as all shaders
521     * sharing the same buffer use the exact same configuration. There are two
522     * inputs to the configuration, the amount of per-fiber space and whether
523     * to use the newer per-wave or older per-fiber layout. We only ever
524     * increase the size, and shaders with a smaller size requirement simply
525     * use the larger existing buffer, so that we only need to keep track of
526     * one buffer and its size, but we still need to keep track of per-fiber
527     * and per-wave buffers separately so that we never use the same buffer
528     * for different layouts. pvtmem[0] is for per-fiber, and pvtmem[1] is for
529     * per-wave.
530     */
531    struct {
532       struct fd_bo *bo;
533       uint32_t per_fiber_size;
534       uint32_t per_sp_size;
535    } pvtmem[2] dt;
536 
537    /* maps per-shader-stage state plus variant key to hw
538     * program stateobj:
539     */
540    struct ir3_cache *shader_cache;
541 
542    struct util_debug_callback debug;
543 
544    struct u_trace_context trace_context dt;
545 
546 #ifdef HAVE_PERFETTO
547    struct fd_perfetto_state perfetto;
548 #endif
549 
550    /*
551     * Counter to generate submit-ids
552     */
553    uint32_t submit_count;
554 
555    /* Called on rebind_resource() for any per-gen cleanup required: */
556    void (*rebind_resource)(struct fd_context *ctx, struct fd_resource *rsc) dt;
557 
558    /* GMEM/tile handling fxns: */
559    void (*emit_tile_init)(struct fd_batch *batch) dt;
560    void (*emit_tile_prep)(struct fd_batch *batch,
561                           const struct fd_tile *tile) dt;
562    void (*emit_tile_mem2gmem)(struct fd_batch *batch,
563                               const struct fd_tile *tile) dt;
564    void (*emit_tile_renderprep)(struct fd_batch *batch,
565                                 const struct fd_tile *tile) dt;
566    void (*emit_tile)(struct fd_batch *batch, const struct fd_tile *tile) dt;
567    void (*emit_tile_gmem2mem)(struct fd_batch *batch,
568                               const struct fd_tile *tile) dt;
569    void (*emit_tile_fini)(struct fd_batch *batch) dt; /* optional */
570 
571    /* optional, for GMEM bypass: */
572    void (*emit_sysmem_prep)(struct fd_batch *batch) dt;
573    void (*emit_sysmem)(struct fd_batch *batch) dt;
574    void (*emit_sysmem_fini)(struct fd_batch *batch) dt;
575 
576    /* draw: */
577    void (*draw_vbos)(struct fd_context *ctx, const struct pipe_draw_info *info,
578                      unsigned drawid_offset,
579                      const struct pipe_draw_indirect_info *indirect,
580                      const struct pipe_draw_start_count_bias *draws,
581                      unsigned num_draws,
582                      unsigned index_offset) dt;
583    bool (*clear)(struct fd_context *ctx, enum fd_buffer_mask buffers,
584                  const union pipe_color_union *color, double depth,
585                  unsigned stencil) dt;
586 
587    /* called to update draw_vbo func after bound shader stages change, etc: */
588    void (*update_draw)(struct fd_context *ctx);
589 
590    /* compute: */
591    void (*launch_grid)(struct fd_context *ctx,
592                        const struct pipe_grid_info *info) dt;
593 
594    /* query: */
595    struct fd_query *(*create_query)(struct fd_context *ctx, unsigned query_type,
596                                     unsigned index);
597    void (*query_prepare)(struct fd_batch *batch, uint32_t num_tiles) dt;
598    void (*query_prepare_tile)(struct fd_batch *batch, uint32_t n,
599                               struct fd_ringbuffer *ring) dt;
600    void (*query_update_batch)(struct fd_batch *batch, bool disable_all) dt;
601 
602    /* blitter: */
603    bool (*blit)(struct fd_context *ctx, const struct pipe_blit_info *info) dt;
604    void (*clear_ubwc)(struct fd_batch *batch, struct fd_resource *rsc) dt;
605 
606    /* uncompress resource, if necessary, to use as the specified format: */
607    void (*validate_format)(struct fd_context *ctx, struct fd_resource *rsc,
608                            enum pipe_format format) dt;
609 
610    /* logger: */
611    void (*record_timestamp)(struct fd_ringbuffer *ring, struct fd_bo *bo,
612                             unsigned offset);
613    uint64_t (*ts_to_ns)(uint64_t ts);
614 
615    /*
616     * Common pre-cooked VBO state (used for a3xx and later):
617     */
618 
619    /* for clear/gmem->mem vertices, and mem->gmem */
620    struct pipe_resource *solid_vbuf;
621 
622    /* for mem->gmem tex coords: */
623    struct pipe_resource *blit_texcoord_vbuf;
624 
625    /* vertex state for solid_vbuf:
626     *    - solid_vbuf / 12 / R32G32B32_FLOAT
627     */
628    struct fd_vertex_state solid_vbuf_state;
629 
630    /* vertex state for blit_prog:
631     *    - blit_texcoord_vbuf / 8 / R32G32_FLOAT
632     *    - solid_vbuf / 12 / R32G32B32_FLOAT
633     */
634    struct fd_vertex_state blit_vbuf_state;
635 
636    /*
637     * Info about state of previous draw, for state that comes from
638     * pipe_draw_info (ie. not part of a CSO).  This allows us to
639     * skip some register emit when the state doesn't change from
640     * draw-to-draw
641     */
642    struct {
643       bool dirty; /* last draw state unknown */
644       bool primitive_restart;
645       uint32_t index_start;
646       uint32_t instance_start;
647       uint32_t restart_index;
648       uint32_t streamout_mask;
649 
650       /* some state changes require a different shader variant.  Keep
651        * track of this so we know when we need to re-emit shader state
652        * due to variant change.  See ir3_fixup_shader_state()
653        *
654        * (used for a3xx+, NULL otherwise)
655        */
656       struct ir3_shader_key *key;
657 
658    } last dt;
659 };
660 
661 static inline struct fd_context *
fd_context(struct pipe_context * pctx)662 fd_context(struct pipe_context *pctx)
663 {
664    return (struct fd_context *)pctx;
665 }
666 
667 static inline struct fd_stream_output_target *
fd_stream_output_target(struct pipe_stream_output_target * target)668 fd_stream_output_target(struct pipe_stream_output_target *target)
669 {
670    return (struct fd_stream_output_target *)target;
671 }
672 
673 void fd_context_add_private_bo(struct fd_context *ctx, struct fd_bo *bo);
674 
675 /* Mark specified non-shader-stage related state as dirty: */
676 static inline void
fd_context_dirty(struct fd_context * ctx,BITMASK_ENUM (fd_dirty_3d_state)dirty)677 fd_context_dirty(struct fd_context *ctx, BITMASK_ENUM(fd_dirty_3d_state) dirty)
678    assert_dt
679 {
680    assert(util_is_power_of_two_nonzero(dirty));
681    assert(ffs(dirty) <= ARRAY_SIZE(ctx->gen_dirty_map));
682 
683    ctx->gen_dirty |= ctx->gen_dirty_map[ffs(dirty) - 1];
684    ctx->dirty |= dirty;
685 
686    /* These are still not handled at bind time: */
687    if (dirty & (FD_DIRTY_FRAMEBUFFER | FD_DIRTY_QUERY | FD_DIRTY_ZSA))
688       ctx->dirty_resource |= dirty;
689 }
690 
691 static inline enum fd_dirty_3d_state
dirty_shader_to_dirty_state(BITMASK_ENUM (fd_dirty_shader_state)dirty)692 dirty_shader_to_dirty_state(BITMASK_ENUM(fd_dirty_shader_state) dirty)
693 {
694    const enum fd_dirty_3d_state map[] = {
695       FD_DIRTY_PROG, FD_DIRTY_CONST, FD_DIRTY_TEX,
696       FD_DIRTY_SSBO, FD_DIRTY_IMAGE,
697    };
698 
699    /* Need to update the table above if these shift: */
700    STATIC_ASSERT(FD_DIRTY_SHADER_PROG == BIT(0));
701    STATIC_ASSERT(FD_DIRTY_SHADER_CONST == BIT(1));
702    STATIC_ASSERT(FD_DIRTY_SHADER_TEX == BIT(2));
703    STATIC_ASSERT(FD_DIRTY_SHADER_SSBO == BIT(3));
704    STATIC_ASSERT(FD_DIRTY_SHADER_IMAGE == BIT(4));
705 
706    assert(ffs(dirty) <= ARRAY_SIZE(map));
707 
708    return map[ffs(dirty) - 1];
709 }
710 
711 static inline void
fd_context_dirty_shader(struct fd_context * ctx,enum pipe_shader_type shader,BITMASK_ENUM (fd_dirty_shader_state)dirty)712 fd_context_dirty_shader(struct fd_context *ctx, enum pipe_shader_type shader,
713                         BITMASK_ENUM(fd_dirty_shader_state) dirty)
714    assert_dt
715 {
716    assert(util_is_power_of_two_nonzero(dirty));
717    ctx->gen_dirty |= ctx->gen_dirty_shader_map[shader][ffs(dirty) - 1];
718    ctx->dirty_shader[shader] |= dirty;
719    fd_context_dirty(ctx, dirty_shader_to_dirty_state(dirty));
720 }
721 
722 /* mark all state dirty: */
723 static inline void
fd_context_all_dirty(struct fd_context * ctx)724 fd_context_all_dirty(struct fd_context *ctx) assert_dt
725 {
726    ctx->last.dirty = true;
727    ctx->dirty = (enum fd_dirty_3d_state) ~0;
728    ctx->dirty_resource = (enum fd_dirty_3d_state) ~0;
729 
730    /* NOTE: don't use ~0 for gen_dirty, because the gen specific
731     * emit code will loop over all the bits:
732     */
733    ctx->gen_dirty = ctx->gen_all_dirty;
734 
735    for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++) {
736       ctx->dirty_shader[i] = (enum fd_dirty_shader_state) ~0;
737       ctx->dirty_shader_resource[i] = (enum fd_dirty_shader_state) ~0;
738    }
739 }
740 
741 static inline void
fd_context_all_clean(struct fd_context * ctx)742 fd_context_all_clean(struct fd_context *ctx) assert_dt
743 {
744    ctx->last.dirty = false;
745    ctx->dirty = (enum fd_dirty_3d_state)0;
746    ctx->dirty_resource = (enum fd_dirty_3d_state)0;
747    ctx->gen_dirty = 0;
748    for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++) {
749       ctx->dirty_shader[i] = (enum fd_dirty_shader_state)0;
750       ctx->dirty_shader_resource[i] = (enum fd_dirty_shader_state)0;
751    }
752 }
753 
754 /**
755  * Add mapping between global dirty bit and generation specific dirty
756  * bit.
757  */
758 static inline void
fd_context_add_map(struct fd_context * ctx,uint32_t dirty,uint32_t gen_dirty)759 fd_context_add_map(struct fd_context *ctx, uint32_t dirty, uint32_t gen_dirty)
760 {
761    u_foreach_bit (b, dirty) {
762       ctx->gen_dirty_map[b] |= gen_dirty;
763    }
764    ctx->gen_all_dirty |= gen_dirty;
765 }
766 
767 /**
768  * Add mapping between shader stage specific dirty bit and generation
769  * specific dirty bit
770  */
771 static inline void
fd_context_add_shader_map(struct fd_context * ctx,enum pipe_shader_type shader,BITMASK_ENUM (fd_dirty_shader_state)dirty,uint32_t gen_dirty)772 fd_context_add_shader_map(struct fd_context *ctx, enum pipe_shader_type shader,
773                           BITMASK_ENUM(fd_dirty_shader_state) dirty, uint32_t gen_dirty)
774 {
775    u_foreach_bit (b, dirty) {
776       ctx->gen_dirty_shader_map[shader][b] |= gen_dirty;
777    }
778    ctx->gen_all_dirty |= gen_dirty;
779 }
780 
781 static inline struct pipe_scissor_state *
fd_context_get_scissor(struct fd_context * ctx)782 fd_context_get_scissor(struct fd_context *ctx) assert_dt
783 {
784    return ctx->current_scissor;
785 }
786 
787 void fd_context_switch_from(struct fd_context *ctx) assert_dt;
788 void fd_context_switch_to(struct fd_context *ctx,
789                           struct fd_batch *batch) assert_dt;
790 struct fd_batch *fd_context_batch(struct fd_context *ctx) assert_dt;
791 struct fd_batch *fd_context_batch_locked(struct fd_context *ctx) assert_dt;
792 struct fd_batch *fd_context_batch_nondraw(struct fd_context *ctx) assert_dt;
793 
794 void fd_context_setup_common_vbos(struct fd_context *ctx);
795 void fd_context_cleanup_common_vbos(struct fd_context *ctx);
796 void fd_emit_string(struct fd_ringbuffer *ring, const char *string, int len);
797 void fd_emit_string5(struct fd_ringbuffer *ring, const char *string, int len);
798 __attribute__((format(printf, 3, 4))) void
799 fd_cs_trace_msg(struct u_trace_context *utctx, void *cs, const char *fmt, ...);
800 __attribute__((format(printf, 3, 4))) void
801 fd_cs_trace_start(struct u_trace_context *utctx, void *cs, const char *fmt,
802                   ...);
803 __attribute__((format(printf, 3, 4))) void
804 fd_cs_trace_end(struct u_trace_context *utctx, void *cs, const char *fmt, ...);
805 
806 struct pipe_context *fd_context_init(struct fd_context *ctx,
807                                      struct pipe_screen *pscreen,
808                                      void *priv, unsigned flags);
809 struct pipe_context *fd_context_init_tc(struct pipe_context *pctx,
810                                         unsigned flags);
811 
812 void fd_context_destroy(struct pipe_context *pctx) assert_dt;
813 
814 #ifdef __cplusplus
815 }
816 #endif
817 
818 #endif /* FREEDRENO_CONTEXT_H_ */
819