1 /*
2 * © Copyright 2018 Alyssa Rosenzweig
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 */
24
25 #ifndef __BUILDER_H__
26 #define __BUILDER_H__
27
28 #define _LARGEFILE64_SOURCE 1
29 #include <assert.h>
30 #include <sys/mman.h>
31 #include "pan_afbc_cso.h"
32 #include "pan_blend_cso.h"
33 #include "pan_earlyzs.h"
34 #include "pan_encoder.h"
35 #include "pan_job.h"
36 #include "pan_resource.h"
37 #include "pan_texture.h"
38
39 #include "pipe/p_context.h"
40 #include "pipe/p_defines.h"
41 #include "pipe/p_screen.h"
42 #include "pipe/p_state.h"
43 #include "util/compiler.h"
44 #include "util/detect.h"
45 #include "util/format/u_formats.h"
46 #include "util/hash_table.h"
47 #include "util/simple_mtx.h"
48 #include "util/u_blitter.h"
49
50 #include "compiler/shader_enums.h"
51 #include "midgard/midgard_compile.h"
52
53 #include "pan_csf.h"
54
55 #define SET_BIT(lval, bit, cond) \
56 if (cond) \
57 lval |= (bit); \
58 else \
59 lval &= ~(bit);
60
61 /* Dirty tracking flags. 3D is for general 3D state. Shader flags are
62 * per-stage. Renderer refers to Renderer State Descriptors. Vertex refers to
63 * vertex attributes/elements. */
64
65 enum pan_dirty_3d {
66 PAN_DIRTY_VIEWPORT = BITFIELD_BIT(0),
67 PAN_DIRTY_SCISSOR = BITFIELD_BIT(1),
68 PAN_DIRTY_VERTEX = BITFIELD_BIT(2),
69 PAN_DIRTY_PARAMS = BITFIELD_BIT(3),
70 PAN_DIRTY_DRAWID = BITFIELD_BIT(4),
71 PAN_DIRTY_TLS_SIZE = BITFIELD_BIT(5),
72 PAN_DIRTY_ZS = BITFIELD_BIT(6),
73 PAN_DIRTY_BLEND = BITFIELD_BIT(7),
74 PAN_DIRTY_MSAA = BITFIELD_BIT(8),
75 PAN_DIRTY_OQ = BITFIELD_BIT(9),
76 PAN_DIRTY_RASTERIZER = BITFIELD_BIT(10),
77 PAN_DIRTY_POINTS = BITFIELD_BIT(11),
78 PAN_DIRTY_SO = BITFIELD_BIT(12),
79 };
80
81 enum pan_dirty_shader {
82 PAN_DIRTY_STAGE_SHADER = BITFIELD_BIT(0),
83 PAN_DIRTY_STAGE_TEXTURE = BITFIELD_BIT(1),
84 PAN_DIRTY_STAGE_SAMPLER = BITFIELD_BIT(2),
85 PAN_DIRTY_STAGE_IMAGE = BITFIELD_BIT(3),
86 PAN_DIRTY_STAGE_CONST = BITFIELD_BIT(4),
87 PAN_DIRTY_STAGE_SSBO = BITFIELD_BIT(5),
88 };
89
90 struct panfrost_constant_buffer {
91 struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS];
92 uint32_t enabled_mask;
93 };
94
95 struct panfrost_query {
96 /* Passthrough from Gallium */
97 unsigned type;
98 unsigned index;
99
100 /* For computed queries. 64-bit to prevent overflow */
101 struct {
102 uint64_t start;
103 uint64_t end;
104 };
105
106 /* Memory for the GPU to writeback the value of the query */
107 struct pipe_resource *rsrc;
108
109 /* Whether an occlusion query is for a MSAA framebuffer */
110 bool msaa;
111 };
112
113 struct panfrost_streamout_target {
114 struct pipe_stream_output_target base;
115 uint32_t offset;
116 };
117
118 struct panfrost_streamout {
119 struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
120 unsigned num_targets;
121 };
122
123 struct panfrost_context {
124 /* Gallium context */
125 struct pipe_context base;
126
127 /* Dirty global state */
128 enum pan_dirty_3d dirty;
129
130 /* Per shader stage dirty state */
131 enum pan_dirty_shader dirty_shader[PIPE_SHADER_TYPES];
132
133 /* Unowned pools, so manage yourself. */
134 struct panfrost_pool descs, shaders;
135
136 /* Sync obj used to keep track of in-flight jobs. */
137 uint32_t syncobj;
138
139 /* Set of 32 batches. When the set is full, the LRU entry (the batch
140 * with the smallest seqnum) is flushed to free a slot.
141 */
142 struct {
143 uint64_t seqnum;
144 struct panfrost_batch slots[PAN_MAX_BATCHES];
145
146 /** Set of active batches for faster traversal */
147 BITSET_DECLARE(active, PAN_MAX_BATCHES);
148 } batches;
149
150 /* Map from resources to panfrost_batches */
151 struct hash_table *writers;
152
153 /* Bound job batch */
154 struct panfrost_batch *batch;
155
156 /* Within a launch_grid call.. */
157 const struct pipe_grid_info *compute_grid;
158
159 struct pipe_framebuffer_state pipe_framebuffer;
160 struct panfrost_streamout streamout;
161
162 bool active_queries;
163 uint64_t prims_generated;
164 uint64_t tf_prims_generated;
165 uint64_t draw_calls;
166 struct panfrost_query *occlusion_query;
167
168 unsigned drawid;
169 unsigned vertex_count;
170 unsigned instance_count;
171 unsigned offset_start;
172 unsigned base_vertex;
173 unsigned base_instance;
174 enum mesa_prim active_prim;
175
176 /* If instancing is enabled, vertex count padded for instance; if
177 * it is disabled, just equal to plain vertex count */
178 unsigned padded_count;
179
180 struct panfrost_constant_buffer constant_buffer[PIPE_SHADER_TYPES];
181 struct panfrost_rasterizer *rasterizer;
182 struct panfrost_vertex_state *vertex;
183
184 struct panfrost_uncompiled_shader *uncompiled[PIPE_SHADER_TYPES];
185 struct panfrost_compiled_shader *prog[PIPE_SHADER_TYPES];
186
187 struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
188 uint32_t vb_mask;
189
190 /* Bound CL global buffers */
191 struct util_dynarray global_buffers;
192
193 struct pipe_shader_buffer ssbo[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_BUFFERS];
194 uint32_t ssbo_mask[PIPE_SHADER_TYPES];
195
196 struct pipe_image_view images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES];
197 uint32_t image_mask[PIPE_SHADER_TYPES];
198
199 struct panfrost_sampler_state *samplers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
200 unsigned sampler_count[PIPE_SHADER_TYPES];
201 uint32_t valid_samplers[PIPE_SHADER_TYPES];
202
203 struct panfrost_sampler_view
204 *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS];
205 unsigned sampler_view_count[PIPE_SHADER_TYPES];
206
207 struct blitter_context *blitter;
208
209 struct pan_afbc_shaders afbc_shaders;
210
211 struct panfrost_blend_state *blend;
212
213 /* On Valhall, does the current blend state use a blend shader for any
214 * output? We need this information in a hot path to decide if
215 * per-sample shading should be enabled.
216 */
217 bool valhall_has_blend_shader;
218
219 struct pipe_viewport_state pipe_viewport;
220 struct pipe_scissor_state scissor;
221 struct pipe_blend_color blend_color;
222 struct panfrost_zsa_state *depth_stencil;
223 struct pipe_stencil_ref stencil_ref;
224 uint16_t sample_mask;
225 unsigned min_samples;
226
227 struct panfrost_query *cond_query;
228 bool cond_cond;
229 enum pipe_render_cond_flag cond_mode;
230
231 bool is_noop;
232
233 /* Mask of active render targets */
234 uint8_t fb_rt_mask;
235
236 int in_sync_fd;
237 uint32_t in_sync_obj;
238
239 union {
240 struct panfrost_csf_context csf;
241 };
242 };
243
244 /* Corresponds to the CSO */
245
246 struct panfrost_rasterizer;
247
248 /* Linked varyings */
249 struct pan_linkage {
250 /* If the upload is owned by the CSO instead
251 * of the pool, the referenced BO. Else,
252 * NULL. */
253 struct panfrost_bo *bo;
254
255 /* Uploaded attribute descriptors */
256 mali_ptr producer, consumer;
257
258 /* Varyings buffers required */
259 uint32_t present;
260
261 /* Per-vertex stride for general varying buffer */
262 uint32_t stride;
263 };
264
265 /* System value infrastructure */
266 #define MAX_SYSVAL_COUNT 32
267
268 /* Allow 2D of sysval IDs, while allowing nonparametric sysvals to equal
269 * their class for equal comparison */
270
271 #define PAN_SYSVAL(type, no) (((no) << 16) | PAN_SYSVAL_##type)
272 #define PAN_SYSVAL_TYPE(sysval) ((sysval)&0xffff)
273 #define PAN_SYSVAL_ID(sysval) ((sysval) >> 16)
274
275 /* Define some common types. We start at one for easy indexing of hash
276 * tables internal to the compiler */
277
278 enum {
279 PAN_SYSVAL_VIEWPORT_SCALE = 1,
280 PAN_SYSVAL_VIEWPORT_OFFSET = 2,
281 PAN_SYSVAL_TEXTURE_SIZE = 3,
282 PAN_SYSVAL_SSBO = 4,
283 PAN_SYSVAL_NUM_WORK_GROUPS = 5,
284 PAN_SYSVAL_SAMPLER = 7,
285 PAN_SYSVAL_LOCAL_GROUP_SIZE = 8,
286 PAN_SYSVAL_WORK_DIM = 9,
287 PAN_SYSVAL_IMAGE_SIZE = 10,
288 PAN_SYSVAL_SAMPLE_POSITIONS = 11,
289 PAN_SYSVAL_MULTISAMPLED = 12,
290 PAN_SYSVAL_RT_CONVERSION = 13,
291 PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS = 14,
292 PAN_SYSVAL_DRAWID = 15,
293 PAN_SYSVAL_BLEND_CONSTANTS = 16,
294 PAN_SYSVAL_XFB = 17,
295 PAN_SYSVAL_NUM_VERTICES = 18,
296 };
297
298 #define PAN_TXS_SYSVAL_ID(texidx, dim, is_array) \
299 ((texidx) | ((dim) << 7) | ((is_array) ? (1 << 9) : 0))
300
301 #define PAN_SYSVAL_ID_TO_TXS_TEX_IDX(id) ((id)&0x7f)
302 #define PAN_SYSVAL_ID_TO_TXS_DIM(id) (((id) >> 7) & 0x3)
303 #define PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(id) !!((id) & (1 << 9))
304
305 struct panfrost_sysvals {
306 /* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */
307 unsigned sysvals[MAX_SYSVAL_COUNT];
308 unsigned sysval_count;
309 };
310
311 /* On Valhall, the driver gives the hardware a table of resource tables.
312 * Resources are addressed as the index of the table together with the index of
313 * the resource within the table. For simplicity, we put one type of resource
314 * in each table and fix the numbering of the tables.
315 *
316 * This numbering is arbitrary.
317 */
318 enum panfrost_resource_table {
319 PAN_TABLE_UBO = 0,
320 PAN_TABLE_ATTRIBUTE,
321 PAN_TABLE_ATTRIBUTE_BUFFER,
322 PAN_TABLE_SAMPLER,
323 PAN_TABLE_TEXTURE,
324 PAN_TABLE_IMAGE,
325 PAN_TABLE_SSBO,
326
327 PAN_NUM_RESOURCE_TABLES
328 };
329
330 #define RSD_WORDS 16
331
332 /* Variants bundle together to form the backing CSO, bundling multiple
333 * shaders with varying emulated features baked in
334 */
335 struct panfrost_fs_key {
336 /* Number of colour buffers if gl_FragColor is written */
337 unsigned nr_cbufs_for_fragcolor;
338
339 /* On Valhall, fixed_varying_mask of the linked vertex shader */
340 uint32_t fixed_varying_mask;
341
342 /* Midgard shaders that read the tilebuffer must be keyed for
343 * non-blendable formats
344 */
345 enum pipe_format rt_formats[8];
346
347 /* From rasterize state, to lower point sprites */
348 uint16_t sprite_coord_enable;
349
350 /* User clip plane lowering */
351 uint8_t clip_plane_enable;
352
353 bool line_smooth;
354 };
355
356 struct panfrost_shader_key {
357 union {
358 /* Vertex shaders do not use shader keys. However, we have a
359 * special "transform feedback" vertex program derived from a
360 * vertex shader. If vs_is_xfb is set on a vertex shader, this
361 * is a transform feedback shader, else it is a regular
362 * (unkeyed) vertex shader.
363 */
364 bool vs_is_xfb;
365
366 /* Fragment shaders use regular shader keys */
367 struct panfrost_fs_key fs;
368 };
369 };
370
371 struct panfrost_compiled_shader {
372 /* Respectively, shader binary and Renderer State Descriptor */
373 struct panfrost_pool_ref bin, state;
374
375 /* For fragment shaders, a prepared (but not uploaded RSD) */
376 uint32_t partial_rsd[RSD_WORDS];
377
378 struct pan_shader_info info;
379 struct panfrost_sysvals sysvals;
380
381 struct pan_earlyzs_lut earlyzs;
382
383 /* Linked varyings, for non-separable programs */
384 struct pan_linkage linkage;
385
386 struct pipe_stream_output_info stream_output;
387
388 struct panfrost_shader_key key;
389
390 /* Mask of state that dirties the sysvals */
391 unsigned dirty_3d, dirty_shader;
392 };
393
394 /* Shader CSO */
395 struct panfrost_uncompiled_shader {
396 /* NIR for the shader. For graphics, this will be non-NULL even for
397 * TGSI. For compute, this will be NULL after the shader is compiled,
398 * as we don't need any compute variants.
399 */
400 const nir_shader *nir;
401
402 /* A SHA1 of the serialized NIR for the disk cache. */
403 unsigned char nir_sha1[20];
404
405 /* Stream output information */
406 struct pipe_stream_output_info stream_output;
407
408 /** Lock for the variants array */
409 simple_mtx_t lock;
410
411 /* Array of panfrost_compiled_shader */
412 struct util_dynarray variants;
413
414 /* Compiled transform feedback program, if one is required */
415 struct panfrost_compiled_shader *xfb;
416
417 /* On vertex shaders, bit mask of special desktop-only varyings to link
418 * with the fragment shader. Used on Valhall to implement separable
419 * shaders for desktop GL.
420 */
421 uint32_t fixed_varying_mask;
422
423 /* If gl_FragColor was lowered, we need to optimize the stores later */
424 bool fragcolor_lowered;
425 };
426
427 /* The binary artefacts of compiling a shader. This differs from
428 * panfrost_compiled_shader, which adds extra metadata beyond compiling but
429 * throws away information not needed after the initial compile.
430 *
431 * This structure is serialized for the shader disk cache.
432 */
433 struct panfrost_shader_binary {
434 /* Collected information about the compiled shader */
435 struct pan_shader_info info;
436 struct panfrost_sysvals sysvals;
437
438 /* The binary itself */
439 struct util_dynarray binary;
440 };
441
442 void
443 panfrost_disk_cache_store(struct disk_cache *cache,
444 const struct panfrost_uncompiled_shader *uncompiled,
445 const struct panfrost_shader_key *key,
446 const struct panfrost_shader_binary *binary);
447
448 bool panfrost_disk_cache_retrieve(
449 struct disk_cache *cache,
450 const struct panfrost_uncompiled_shader *uncompiled,
451 const struct panfrost_shader_key *key,
452 struct panfrost_shader_binary *binary);
453
454 void panfrost_disk_cache_init(struct panfrost_screen *screen);
455
456 bool panfrost_nir_remove_fragcolor_stores(nir_shader *s, unsigned nr_cbufs);
457
458 bool panfrost_nir_lower_sysvals(nir_shader *s, unsigned arch,
459 struct panfrost_sysvals *sysvals);
460
461 bool panfrost_nir_lower_res_indices(nir_shader *shader,
462 struct panfrost_compile_inputs *inputs);
463
464 /** (Vertex buffer index, divisor) tuple that will become an Attribute Buffer
465 * Descriptor at draw-time on Midgard
466 */
467 struct pan_vertex_buffer {
468 unsigned vbi;
469 unsigned divisor;
470 };
471
472 unsigned pan_assign_vertex_buffer(struct pan_vertex_buffer *buffers,
473 unsigned *nr_bufs, unsigned vbi,
474 unsigned divisor);
475
476 struct panfrost_zsa_state;
477 struct panfrost_sampler_state;
478 struct panfrost_sampler_view;
479 struct panfrost_vertex_state;
480
481 static inline struct panfrost_context *
pan_context(struct pipe_context * pcontext)482 pan_context(struct pipe_context *pcontext)
483 {
484 return (struct panfrost_context *)pcontext;
485 }
486
487 static inline struct panfrost_streamout_target *
pan_so_target(struct pipe_stream_output_target * target)488 pan_so_target(struct pipe_stream_output_target *target)
489 {
490 return (struct panfrost_streamout_target *)target;
491 }
492
493 struct pipe_context *panfrost_create_context(struct pipe_screen *screen,
494 void *priv, unsigned flags);
495
496 bool panfrost_writes_point_size(struct panfrost_context *ctx);
497
498 struct panfrost_ptr panfrost_vertex_tiler_job(struct panfrost_context *ctx,
499 bool is_tiler);
500
501 void panfrost_flush(struct pipe_context *pipe, struct pipe_fence_handle **fence,
502 unsigned flags);
503
504 bool panfrost_render_condition_check(struct panfrost_context *ctx);
505
506 void panfrost_update_shader_variant(struct panfrost_context *ctx,
507 enum pipe_shader_type type);
508
509 void panfrost_analyze_sysvals(struct panfrost_compiled_shader *ss);
510
511 mali_ptr
512 panfrost_get_index_buffer(struct panfrost_batch *batch,
513 const struct pipe_draw_info *info,
514 const struct pipe_draw_start_count_bias *draw);
515
516 mali_ptr
517 panfrost_get_index_buffer_bounded(struct panfrost_batch *batch,
518 const struct pipe_draw_info *info,
519 const struct pipe_draw_start_count_bias *draw,
520 unsigned *min_index, unsigned *max_index);
521
522 /* Instancing */
523
524 mali_ptr panfrost_vertex_buffer_address(struct panfrost_context *ctx,
525 unsigned i);
526
527 void panfrost_shader_context_init(struct pipe_context *pctx);
528
529 static inline void
panfrost_dirty_state_all(struct panfrost_context * ctx)530 panfrost_dirty_state_all(struct panfrost_context *ctx)
531 {
532 ctx->dirty = ~0;
533
534 for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i)
535 ctx->dirty_shader[i] = ~0;
536 }
537
538 static inline void
panfrost_clean_state_3d(struct panfrost_context * ctx)539 panfrost_clean_state_3d(struct panfrost_context *ctx)
540 {
541 ctx->dirty = 0;
542
543 for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) {
544 if (i != PIPE_SHADER_COMPUTE)
545 ctx->dirty_shader[i] = 0;
546 }
547 }
548
549 void panfrost_set_batch_masks_blend(struct panfrost_batch *batch);
550
551 void panfrost_set_batch_masks_zs(struct panfrost_batch *batch);
552
553 void panfrost_track_image_access(struct panfrost_batch *batch,
554 enum pipe_shader_type stage,
555 struct pipe_image_view *image);
556
557 void panfrost_context_reinit(struct panfrost_context *ctx);
558
559 #endif
560