1 /*
2 * Copyright © 2014 Rob Clark <[email protected]>
3 * SPDX-License-Identifier: MIT
4 *
5 * Authors:
6 * Rob Clark <[email protected]>
7 */
8
9 #include "pipe/p_state.h"
10 #include "util/format/u_format.h"
11 #include "util/u_helpers.h"
12 #include "util/u_memory.h"
13 #include "util/u_string.h"
14 #include "util/u_viewport.h"
15
16 #include "freedreno_query_hw.h"
17 #include "freedreno_resource.h"
18
19 #include "fd4_blend.h"
20 #include "fd4_context.h"
21 #include "fd4_emit.h"
22 #include "fd4_format.h"
23 #include "fd4_image.h"
24 #include "fd4_program.h"
25 #include "fd4_rasterizer.h"
26 #include "fd4_texture.h"
27 #include "fd4_zsa.h"
28
29 #define emit_const_user fd4_emit_const_user
30 #define emit_const_bo fd4_emit_const_bo
31 #include "ir3_const.h"
32
33 /* regid: base const register
34 * prsc or dwords: buffer containing constant values
35 * sizedwords: size of const value buffer
36 */
37 static void
fd4_emit_const_user(struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,uint32_t regid,uint32_t sizedwords,const uint32_t * dwords)38 fd4_emit_const_user(struct fd_ringbuffer *ring,
39 const struct ir3_shader_variant *v, uint32_t regid,
40 uint32_t sizedwords, const uint32_t *dwords)
41 {
42 emit_const_asserts(ring, v, regid, sizedwords);
43
44 OUT_PKT3(ring, CP_LOAD_STATE4, 2 + sizedwords);
45 OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(regid / 4) |
46 CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
47 CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(v->type)) |
48 CP_LOAD_STATE4_0_NUM_UNIT(sizedwords / 4));
49 OUT_RING(ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0) |
50 CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS));
51 for (int i = 0; i < sizedwords; i++)
52 OUT_RING(ring, dwords[i]);
53 }
54
55 static void
fd4_emit_const_bo(struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,uint32_t regid,uint32_t offset,uint32_t sizedwords,struct fd_bo * bo)56 fd4_emit_const_bo(struct fd_ringbuffer *ring,
57 const struct ir3_shader_variant *v, uint32_t regid,
58 uint32_t offset, uint32_t sizedwords, struct fd_bo *bo)
59 {
60 uint32_t dst_off = regid / 4;
61 assert(dst_off % 4 == 0);
62 uint32_t num_unit = sizedwords / 4;
63 assert(num_unit % 4 == 0);
64
65 emit_const_asserts(ring, v, regid, sizedwords);
66
67 OUT_PKT3(ring, CP_LOAD_STATE4, 2);
68 OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(dst_off) |
69 CP_LOAD_STATE4_0_STATE_SRC(SS4_INDIRECT) |
70 CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(v->type)) |
71 CP_LOAD_STATE4_0_NUM_UNIT(num_unit));
72 OUT_RELOC(ring, bo, offset, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS), 0);
73 }
74
75 static void
fd4_emit_const_ptrs(struct fd_ringbuffer * ring,gl_shader_stage type,uint32_t regid,uint32_t num,struct fd_bo ** bos,uint32_t * offsets)76 fd4_emit_const_ptrs(struct fd_ringbuffer *ring, gl_shader_stage type,
77 uint32_t regid, uint32_t num, struct fd_bo **bos,
78 uint32_t *offsets)
79 {
80 uint32_t anum = align(num, 4);
81 uint32_t i;
82
83 assert((regid % 4) == 0);
84
85 OUT_PKT3(ring, CP_LOAD_STATE4, 2 + anum);
86 OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(regid / 4) |
87 CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
88 CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(type)) |
89 CP_LOAD_STATE4_0_NUM_UNIT(anum / 4));
90 OUT_RING(ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0) |
91 CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS));
92
93 for (i = 0; i < num; i++) {
94 if (bos[i]) {
95 OUT_RELOC(ring, bos[i], offsets[i], 0, 0);
96 } else {
97 OUT_RING(ring, 0xbad00000 | (i << 16));
98 }
99 }
100
101 for (; i < anum; i++)
102 OUT_RING(ring, 0xffffffff);
103 }
104
105 static bool
is_stateobj(struct fd_ringbuffer * ring)106 is_stateobj(struct fd_ringbuffer *ring)
107 {
108 return false;
109 }
110
111 static void
emit_const_ptrs(struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,uint32_t dst_offset,uint32_t num,struct fd_bo ** bos,uint32_t * offsets)112 emit_const_ptrs(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
113 uint32_t dst_offset, uint32_t num, struct fd_bo **bos,
114 uint32_t *offsets)
115 {
116 /* TODO inline this */
117 assert(dst_offset + num <= v->constlen * 4);
118 fd4_emit_const_ptrs(ring, v->type, dst_offset, num, bos, offsets);
119 }
120
121 void
fd4_emit_cs_consts(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_context * ctx,const struct pipe_grid_info * info)122 fd4_emit_cs_consts(const struct ir3_shader_variant *v,
123 struct fd_ringbuffer *ring, struct fd_context *ctx,
124 const struct pipe_grid_info *info)
125 {
126 ir3_emit_cs_consts(v, ring, ctx, info);
127 }
128
129 static void
emit_textures(struct fd_context * ctx,struct fd_ringbuffer * ring,enum a4xx_state_block sb,struct fd_texture_stateobj * tex,const struct ir3_shader_variant * v)130 emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
131 enum a4xx_state_block sb, struct fd_texture_stateobj *tex,
132 const struct ir3_shader_variant *v)
133 {
134 static const uint32_t bcolor_reg[] = {
135 [SB4_VS_TEX] = REG_A4XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR,
136 [SB4_FS_TEX] = REG_A4XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR,
137 [SB4_CS_TEX] = REG_A4XX_TPL1_TP_CS_BORDER_COLOR_BASE_ADDR,
138 };
139 struct fd4_context *fd4_ctx = fd4_context(ctx);
140 bool needs_border = false;
141 unsigned i;
142
143 if (tex->num_samplers > 0 || tex->num_textures > 0) {
144 int num_samplers = tex->num_samplers;
145
146 /* We want to always make sure that there's at least one sampler if
147 * there are going to be texture accesses. Gallium might not upload a
148 * sampler for e.g. buffer textures.
149 */
150 if (num_samplers == 0)
151 num_samplers++;
152
153 /* not sure if this is an a420.0 workaround, but we seem
154 * to need to emit these in pairs.. emit a final dummy
155 * entry if odd # of samplers:
156 */
157 num_samplers = align(num_samplers, 2);
158
159 /* output sampler state: */
160 OUT_PKT3(ring, CP_LOAD_STATE4, 2 + (2 * num_samplers));
161 OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
162 CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
163 CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
164 CP_LOAD_STATE4_0_NUM_UNIT(num_samplers));
165 OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER) |
166 CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
167 for (i = 0; i < tex->num_samplers; i++) {
168 static const struct fd4_sampler_stateobj dummy_sampler = {};
169 const struct fd4_sampler_stateobj *sampler =
170 tex->samplers[i] ? fd4_sampler_stateobj(tex->samplers[i])
171 : &dummy_sampler;
172 OUT_RING(ring, sampler->texsamp0);
173 OUT_RING(ring, sampler->texsamp1);
174
175 needs_border |= sampler->needs_border;
176 }
177
178 for (; i < num_samplers; i++) {
179 OUT_RING(ring, 0x00000000);
180 OUT_RING(ring, 0x00000000);
181 }
182 }
183
184 if (tex->num_textures > 0) {
185 unsigned num_textures = tex->num_textures + v->astc_srgb.count + v->tg4.count;
186
187 /* emit texture state: */
188 OUT_PKT3(ring, CP_LOAD_STATE4, 2 + (8 * num_textures));
189 OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
190 CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
191 CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
192 CP_LOAD_STATE4_0_NUM_UNIT(num_textures));
193 OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS) |
194 CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
195 for (i = 0; i < tex->num_textures; i++) {
196 static const struct fd4_pipe_sampler_view dummy_view = {};
197 const struct fd4_pipe_sampler_view *view =
198 tex->textures[i] ? fd4_pipe_sampler_view(tex->textures[i])
199 : &dummy_view;
200
201 OUT_RING(ring, view->texconst0);
202 OUT_RING(ring, view->texconst1);
203 OUT_RING(ring, view->texconst2);
204 OUT_RING(ring, view->texconst3);
205 if (view->base.texture) {
206 struct fd_resource *rsc = fd_resource(view->base.texture);
207 if (view->base.format == PIPE_FORMAT_X32_S8X24_UINT)
208 rsc = rsc->stencil;
209 OUT_RELOC(ring, rsc->bo, view->offset, view->texconst4, 0);
210 } else {
211 OUT_RING(ring, 0x00000000);
212 }
213 OUT_RING(ring, 0x00000000);
214 OUT_RING(ring, 0x00000000);
215 OUT_RING(ring, 0x00000000);
216 }
217
218 for (i = 0; i < v->astc_srgb.count; i++) {
219 static const struct fd4_pipe_sampler_view dummy_view = {};
220 const struct fd4_pipe_sampler_view *view;
221 unsigned idx = v->astc_srgb.orig_idx[i];
222
223 view = tex->textures[idx] ? fd4_pipe_sampler_view(tex->textures[idx])
224 : &dummy_view;
225
226 assert(view->texconst0 & A4XX_TEX_CONST_0_SRGB);
227
228 OUT_RING(ring, view->texconst0 & ~A4XX_TEX_CONST_0_SRGB);
229 OUT_RING(ring, view->texconst1);
230 OUT_RING(ring, view->texconst2);
231 OUT_RING(ring, view->texconst3);
232 if (view->base.texture) {
233 struct fd_resource *rsc = fd_resource(view->base.texture);
234 OUT_RELOC(ring, rsc->bo, view->offset, view->texconst4, 0);
235 } else {
236 OUT_RING(ring, 0x00000000);
237 }
238 OUT_RING(ring, 0x00000000);
239 OUT_RING(ring, 0x00000000);
240 OUT_RING(ring, 0x00000000);
241 }
242
243 for (i = 0; i < v->tg4.count; i++) {
244 static const struct fd4_pipe_sampler_view dummy_view = {};
245 const struct fd4_pipe_sampler_view *view;
246 unsigned idx = v->tg4.orig_idx[i];
247
248 view = tex->textures[idx] ? fd4_pipe_sampler_view(tex->textures[idx])
249 : &dummy_view;
250
251 unsigned texconst0 = view->texconst0 & ~(0xfff << 4);
252 texconst0 |= A4XX_TEX_CONST_0_SWIZ_X(A4XX_TEX_X) |
253 A4XX_TEX_CONST_0_SWIZ_Y(A4XX_TEX_Y) |
254 A4XX_TEX_CONST_0_SWIZ_Z(A4XX_TEX_Z) |
255 A4XX_TEX_CONST_0_SWIZ_W(A4XX_TEX_W);
256
257 /* Remap integer formats as unorm (will be fixed up in shader) */
258 if (util_format_is_pure_integer(view->base.format)) {
259 texconst0 &= ~A4XX_TEX_CONST_0_FMT__MASK;
260 switch (fd4_pipe2tex(view->base.format)) {
261 case TFMT4_8_8_8_8_UINT:
262 case TFMT4_8_8_8_8_SINT:
263 texconst0 |= A4XX_TEX_CONST_0_FMT(TFMT4_8_8_8_8_UNORM);
264 break;
265 case TFMT4_8_8_UINT:
266 case TFMT4_8_8_SINT:
267 texconst0 |= A4XX_TEX_CONST_0_FMT(TFMT4_8_8_UNORM);
268 break;
269 case TFMT4_8_UINT:
270 case TFMT4_8_SINT:
271 texconst0 |= A4XX_TEX_CONST_0_FMT(TFMT4_8_UNORM);
272 break;
273
274 case TFMT4_16_16_16_16_UINT:
275 case TFMT4_16_16_16_16_SINT:
276 texconst0 |= A4XX_TEX_CONST_0_FMT(TFMT4_16_16_16_16_UNORM);
277 break;
278 case TFMT4_16_16_UINT:
279 case TFMT4_16_16_SINT:
280 texconst0 |= A4XX_TEX_CONST_0_FMT(TFMT4_16_16_UNORM);
281 break;
282 case TFMT4_16_UINT:
283 case TFMT4_16_SINT:
284 texconst0 |= A4XX_TEX_CONST_0_FMT(TFMT4_16_UNORM);
285 break;
286
287 case TFMT4_32_32_32_32_UINT:
288 case TFMT4_32_32_32_32_SINT:
289 texconst0 |= A4XX_TEX_CONST_0_FMT(TFMT4_32_32_32_32_FLOAT);
290 break;
291 case TFMT4_32_32_UINT:
292 case TFMT4_32_32_SINT:
293 texconst0 |= A4XX_TEX_CONST_0_FMT(TFMT4_32_32_FLOAT);
294 break;
295 case TFMT4_32_UINT:
296 case TFMT4_32_SINT:
297 texconst0 |= A4XX_TEX_CONST_0_FMT(TFMT4_32_FLOAT);
298 break;
299
300 case TFMT4_10_10_10_2_UINT:
301 texconst0 |= A4XX_TEX_CONST_0_FMT(TFMT4_10_10_10_2_UNORM);
302 break;
303
304 default:
305 assert(0);
306 }
307 }
308
309 OUT_RING(ring, texconst0);
310 OUT_RING(ring, view->texconst1);
311 OUT_RING(ring, view->texconst2);
312 OUT_RING(ring, view->texconst3);
313 if (view->base.texture) {
314 struct fd_resource *rsc = fd_resource(view->base.texture);
315 OUT_RELOC(ring, rsc->bo, view->offset, view->texconst4, 0);
316 } else {
317 OUT_RING(ring, 0x00000000);
318 }
319 OUT_RING(ring, 0x00000000);
320 OUT_RING(ring, 0x00000000);
321 OUT_RING(ring, 0x00000000);
322 }
323 } else {
324 assert(v->astc_srgb.count == 0);
325 assert(v->tg4.count == 0);
326 }
327
328 if (needs_border) {
329 unsigned off;
330 void *ptr;
331
332 u_upload_alloc(fd4_ctx->border_color_uploader, 0,
333 BORDER_COLOR_UPLOAD_SIZE, BORDER_COLOR_UPLOAD_SIZE, &off,
334 &fd4_ctx->border_color_buf, &ptr);
335
336 fd_setup_border_colors(tex, ptr, 0);
337 OUT_PKT0(ring, bcolor_reg[sb], 1);
338 OUT_RELOC(ring, fd_resource(fd4_ctx->border_color_buf)->bo, off, 0, 0);
339
340 u_upload_unmap(fd4_ctx->border_color_uploader);
341 }
342 }
343
344 /* emit texture state for mem->gmem restore operation.. eventually it would
345 * be good to get rid of this and use normal CSO/etc state for more of these
346 * special cases..
347 */
348 void
fd4_emit_gmem_restore_tex(struct fd_ringbuffer * ring,unsigned nr_bufs,struct pipe_surface ** bufs)349 fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, unsigned nr_bufs,
350 struct pipe_surface **bufs)
351 {
352 unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS];
353 int i;
354
355 for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
356 mrt_comp[i] = (i < nr_bufs) ? 0xf : 0;
357 }
358
359 /* output sampler state: */
360 OUT_PKT3(ring, CP_LOAD_STATE4, 2 + (2 * nr_bufs));
361 OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
362 CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
363 CP_LOAD_STATE4_0_STATE_BLOCK(SB4_FS_TEX) |
364 CP_LOAD_STATE4_0_NUM_UNIT(nr_bufs));
365 OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER) |
366 CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
367 for (i = 0; i < nr_bufs; i++) {
368 OUT_RING(ring, A4XX_TEX_SAMP_0_XY_MAG(A4XX_TEX_NEAREST) |
369 A4XX_TEX_SAMP_0_XY_MIN(A4XX_TEX_NEAREST) |
370 A4XX_TEX_SAMP_0_WRAP_S(A4XX_TEX_CLAMP_TO_EDGE) |
371 A4XX_TEX_SAMP_0_WRAP_T(A4XX_TEX_CLAMP_TO_EDGE) |
372 A4XX_TEX_SAMP_0_WRAP_R(A4XX_TEX_REPEAT));
373 OUT_RING(ring, 0x00000000);
374 }
375
376 /* emit texture state: */
377 OUT_PKT3(ring, CP_LOAD_STATE4, 2 + (8 * nr_bufs));
378 OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
379 CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
380 CP_LOAD_STATE4_0_STATE_BLOCK(SB4_FS_TEX) |
381 CP_LOAD_STATE4_0_NUM_UNIT(nr_bufs));
382 OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS) |
383 CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
384 for (i = 0; i < nr_bufs; i++) {
385 if (bufs[i]) {
386 struct fd_resource *rsc = fd_resource(bufs[i]->texture);
387 enum pipe_format format = fd_gmem_restore_format(bufs[i]->format);
388
389 /* The restore blit_zs shader expects stencil in sampler 0,
390 * and depth in sampler 1
391 */
392 if (rsc->stencil && (i == 0)) {
393 rsc = rsc->stencil;
394 format = fd_gmem_restore_format(rsc->b.b.format);
395 }
396
397 /* note: PIPE_BUFFER disallowed for surfaces */
398 unsigned lvl = bufs[i]->u.tex.level;
399 unsigned offset =
400 fd_resource_offset(rsc, lvl, bufs[i]->u.tex.first_layer);
401
402 /* z32 restore is accomplished using depth write. If there is
403 * no stencil component (ie. PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
404 * then no render target:
405 *
406 * (The same applies for z32_s8x24, since for stencil sampler
407 * state the above 'if' will replace 'format' with s8)
408 */
409 if ((format == PIPE_FORMAT_Z32_FLOAT) ||
410 (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT))
411 mrt_comp[i] = 0;
412
413 assert(bufs[i]->u.tex.first_layer == bufs[i]->u.tex.last_layer);
414
415 OUT_RING(ring, A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(format)) |
416 A4XX_TEX_CONST_0_TYPE(A4XX_TEX_2D) |
417 fd4_tex_swiz(format, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
418 PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W));
419 OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(bufs[i]->width) |
420 A4XX_TEX_CONST_1_HEIGHT(bufs[i]->height));
421 OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(fd_resource_pitch(rsc, lvl)));
422 OUT_RING(ring, 0x00000000);
423 OUT_RELOC(ring, rsc->bo, offset, 0, 0);
424 OUT_RING(ring, 0x00000000);
425 OUT_RING(ring, 0x00000000);
426 OUT_RING(ring, 0x00000000);
427 } else {
428 OUT_RING(ring, A4XX_TEX_CONST_0_FMT(0) |
429 A4XX_TEX_CONST_0_TYPE(A4XX_TEX_2D) |
430 A4XX_TEX_CONST_0_SWIZ_X(A4XX_TEX_ONE) |
431 A4XX_TEX_CONST_0_SWIZ_Y(A4XX_TEX_ONE) |
432 A4XX_TEX_CONST_0_SWIZ_Z(A4XX_TEX_ONE) |
433 A4XX_TEX_CONST_0_SWIZ_W(A4XX_TEX_ONE));
434 OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(0) | A4XX_TEX_CONST_1_HEIGHT(0));
435 OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(0));
436 OUT_RING(ring, 0x00000000);
437 OUT_RING(ring, 0x00000000);
438 OUT_RING(ring, 0x00000000);
439 OUT_RING(ring, 0x00000000);
440 OUT_RING(ring, 0x00000000);
441 }
442 }
443
444 OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
445 OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
446 A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
447 A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
448 A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
449 A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
450 A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
451 A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
452 A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
453 }
454
455 static void
emit_ssbos(struct fd_context * ctx,struct fd_ringbuffer * ring,enum a4xx_state_block sb,struct fd_shaderbuf_stateobj * so)456 emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring,
457 enum a4xx_state_block sb, struct fd_shaderbuf_stateobj *so)
458 {
459 unsigned count = util_last_bit(so->enabled_mask);
460
461 if (count == 0)
462 return;
463
464 OUT_PKT3(ring, CP_LOAD_STATE4, 2 + (4 * count));
465 OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
466 CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
467 CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
468 CP_LOAD_STATE4_0_NUM_UNIT(count));
469 OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(0) |
470 CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
471 for (unsigned i = 0; i < count; i++) {
472 struct pipe_shader_buffer *buf = &so->sb[i];
473 if (buf->buffer) {
474 struct fd_resource *rsc = fd_resource(buf->buffer);
475 OUT_RELOC(ring, rsc->bo, buf->buffer_offset, 0, 0);
476 } else {
477 OUT_RING(ring, 0x00000000);
478 }
479 OUT_RING(ring, 0x00000000);
480 OUT_RING(ring, 0x00000000);
481 OUT_RING(ring, 0x00000000);
482 }
483
484 OUT_PKT3(ring, CP_LOAD_STATE4, 2 + (2 * count));
485 OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
486 CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
487 CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
488 CP_LOAD_STATE4_0_NUM_UNIT(count));
489 OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(1) |
490 CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
491 for (unsigned i = 0; i < count; i++) {
492 struct pipe_shader_buffer *buf = &so->sb[i];
493 unsigned sz = buf->buffer_size;
494
495 /* width is in dwords, overflows into height: */
496 sz /= 4;
497
498 OUT_RING(ring, A4XX_SSBO_1_0_WIDTH(sz));
499 OUT_RING(ring, A4XX_SSBO_1_1_HEIGHT(sz >> 16));
500 }
501 }
502
503 void
fd4_emit_vertex_bufs(struct fd_ringbuffer * ring,struct fd4_emit * emit)504 fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
505 {
506 int32_t i, j, last = -1;
507 uint32_t total_in = 0;
508 const struct fd_vertex_state *vtx = emit->vtx;
509 const struct ir3_shader_variant *vp = fd4_emit_get_vp(emit);
510 unsigned vertex_regid = regid(63, 0);
511 unsigned instance_regid = regid(63, 0);
512 unsigned vtxcnt_regid = regid(63, 0);
513
514 /* Note that sysvals come *after* normal inputs: */
515 for (i = 0; i < vp->inputs_count; i++) {
516 if (!vp->inputs[i].compmask)
517 continue;
518 if (vp->inputs[i].sysval) {
519 switch (vp->inputs[i].slot) {
520 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
521 vertex_regid = vp->inputs[i].regid;
522 break;
523 case SYSTEM_VALUE_INSTANCE_ID:
524 instance_regid = vp->inputs[i].regid;
525 break;
526 case SYSTEM_VALUE_VERTEX_CNT:
527 vtxcnt_regid = vp->inputs[i].regid;
528 break;
529 default:
530 unreachable("invalid system value");
531 break;
532 }
533 } else if (i < vtx->vtx->num_elements) {
534 last = i;
535 }
536 }
537
538 for (i = 0, j = 0; i <= last; i++) {
539 assert(!vp->inputs[i].sysval);
540 if (vp->inputs[i].compmask) {
541 struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
542 const struct pipe_vertex_buffer *vb =
543 &vtx->vertexbuf.vb[elem->vertex_buffer_index];
544 struct fd_resource *rsc = fd_resource(vb->buffer.resource);
545 enum pipe_format pfmt = elem->src_format;
546 enum a4xx_vtx_fmt fmt = fd4_pipe2vtx(pfmt);
547 bool switchnext = (i != last) || (vertex_regid != regid(63, 0)) ||
548 (instance_regid != regid(63, 0)) ||
549 (vtxcnt_regid != regid(63, 0));
550 bool isint = util_format_is_pure_integer(pfmt);
551 uint32_t fs = util_format_get_blocksize(pfmt);
552 uint32_t off = vb->buffer_offset + elem->src_offset;
553 uint32_t size = vb->buffer.resource->width0 - off;
554 assert(fmt != VFMT4_NONE);
555
556 OUT_PKT0(ring, REG_A4XX_VFD_FETCH(j), 4);
557 OUT_RING(ring, A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) |
558 A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(elem->src_stride) |
559 COND(elem->instance_divisor,
560 A4XX_VFD_FETCH_INSTR_0_INSTANCED) |
561 COND(switchnext, A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT));
562 OUT_RELOC(ring, rsc->bo, off, 0, 0);
563 OUT_RING(ring, A4XX_VFD_FETCH_INSTR_2_SIZE(size));
564 OUT_RING(ring, A4XX_VFD_FETCH_INSTR_3_STEPRATE(
565 MAX2(1, elem->instance_divisor)));
566
567 OUT_PKT0(ring, REG_A4XX_VFD_DECODE_INSTR(j), 1);
568 OUT_RING(ring,
569 A4XX_VFD_DECODE_INSTR_CONSTFILL |
570 A4XX_VFD_DECODE_INSTR_WRITEMASK(vp->inputs[i].compmask) |
571 A4XX_VFD_DECODE_INSTR_FORMAT(fmt) |
572 A4XX_VFD_DECODE_INSTR_SWAP(fd4_pipe2swap(pfmt)) |
573 A4XX_VFD_DECODE_INSTR_REGID(vp->inputs[i].regid) |
574 A4XX_VFD_DECODE_INSTR_SHIFTCNT(fs) |
575 A4XX_VFD_DECODE_INSTR_LASTCOMPVALID |
576 COND(isint, A4XX_VFD_DECODE_INSTR_INT) |
577 COND(switchnext, A4XX_VFD_DECODE_INSTR_SWITCHNEXT));
578
579 total_in += util_bitcount(vp->inputs[i].compmask);
580 j++;
581 }
582 }
583
584 /* hw doesn't like to be configured for zero vbo's, it seems: */
585 if (last < 0) {
586 /* just recycle the shader bo, we just need to point to *something*
587 * valid:
588 */
589 struct fd_bo *dummy_vbo = vp->bo;
590 bool switchnext = (vertex_regid != regid(63, 0)) ||
591 (instance_regid != regid(63, 0)) ||
592 (vtxcnt_regid != regid(63, 0));
593
594 OUT_PKT0(ring, REG_A4XX_VFD_FETCH(0), 4);
595 OUT_RING(ring, A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(0) |
596 A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(0) |
597 COND(switchnext, A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT));
598 OUT_RELOC(ring, dummy_vbo, 0, 0, 0);
599 OUT_RING(ring, A4XX_VFD_FETCH_INSTR_2_SIZE(1));
600 OUT_RING(ring, A4XX_VFD_FETCH_INSTR_3_STEPRATE(1));
601
602 OUT_PKT0(ring, REG_A4XX_VFD_DECODE_INSTR(0), 1);
603 OUT_RING(ring, A4XX_VFD_DECODE_INSTR_CONSTFILL |
604 A4XX_VFD_DECODE_INSTR_WRITEMASK(0x1) |
605 A4XX_VFD_DECODE_INSTR_FORMAT(VFMT4_8_UNORM) |
606 A4XX_VFD_DECODE_INSTR_SWAP(XYZW) |
607 A4XX_VFD_DECODE_INSTR_REGID(regid(0, 0)) |
608 A4XX_VFD_DECODE_INSTR_SHIFTCNT(1) |
609 A4XX_VFD_DECODE_INSTR_LASTCOMPVALID |
610 COND(switchnext, A4XX_VFD_DECODE_INSTR_SWITCHNEXT));
611
612 total_in = 1;
613 j = 1;
614 }
615
616 OUT_PKT0(ring, REG_A4XX_VFD_CONTROL_0, 5);
617 OUT_RING(ring, A4XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) |
618 0xa0000 | /* XXX */
619 A4XX_VFD_CONTROL_0_STRMDECINSTRCNT(j) |
620 A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(j));
621 OUT_RING(ring, A4XX_VFD_CONTROL_1_MAXSTORAGE(129) | // XXX
622 A4XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) |
623 A4XX_VFD_CONTROL_1_REGID4INST(instance_regid));
624 OUT_RING(ring, 0x00000000); /* XXX VFD_CONTROL_2 */
625 OUT_RING(ring, A4XX_VFD_CONTROL_3_REGID_VTXCNT(vtxcnt_regid));
626 OUT_RING(ring, 0x00000000); /* XXX VFD_CONTROL_4 */
627
628 /* cache invalidate, otherwise vertex fetch could see
629 * stale vbo contents:
630 */
631 OUT_PKT0(ring, REG_A4XX_UCHE_INVALIDATE0, 2);
632 OUT_RING(ring, 0x00000000);
633 OUT_RING(ring, 0x00000012);
634 }
635
636 void
fd4_emit_state(struct fd_context * ctx,struct fd_ringbuffer * ring,struct fd4_emit * emit)637 fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
638 struct fd4_emit *emit)
639 {
640 const struct ir3_shader_variant *vp = fd4_emit_get_vp(emit);
641 const struct ir3_shader_variant *fp = fd4_emit_get_fp(emit);
642 const enum fd_dirty_3d_state dirty = emit->dirty;
643
644 emit_marker(ring, 5);
645
646 if ((dirty & FD_DIRTY_FRAMEBUFFER) && !emit->binning_pass) {
647 struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
648 unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0};
649
650 for (unsigned i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
651 mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0;
652 }
653
654 OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
655 OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
656 A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
657 A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
658 A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
659 A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
660 A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
661 A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
662 A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
663 }
664
665 if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_FRAMEBUFFER)) {
666 struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
667 struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
668 uint32_t rb_alpha_control = zsa->rb_alpha_control;
669
670 if (util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0])))
671 rb_alpha_control &= ~A4XX_RB_ALPHA_CONTROL_ALPHA_TEST;
672
673 OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
674 OUT_RING(ring, rb_alpha_control);
675
676 OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
677 OUT_RING(ring, zsa->rb_stencil_control);
678 OUT_RING(ring, zsa->rb_stencil_control2);
679 }
680
681 if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
682 struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
683 struct pipe_stencil_ref *sr = &ctx->stencil_ref;
684
685 OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
686 OUT_RING(ring, zsa->rb_stencilrefmask |
687 A4XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
688 OUT_RING(ring, zsa->rb_stencilrefmask_bf |
689 A4XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
690 }
691
692 if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
693 struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
694 bool fragz = fp->no_earlyz || fp->has_kill || fp->writes_pos;
695 bool latez = !fp->fs.early_fragment_tests && fragz;
696 bool clamp = !ctx->rasterizer->depth_clip_near;
697
698 OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
699 OUT_RING(ring, zsa->rb_depth_control |
700 COND(clamp, A4XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE) |
701 COND(latez, A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE) |
702 COND(fragz && fp->fragcoord_compmask != 0,
703 A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS));
704
705 /* maybe this register/bitfield needs a better name.. this
706 * appears to be just disabling early-z
707 */
708 OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
709 OUT_RING(ring, zsa->gras_alpha_control |
710 COND(latez, A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE) |
711 COND(fragz && fp->fragcoord_compmask != 0,
712 A4XX_GRAS_ALPHA_CONTROL_FORCE_FRAGZ_TO_FS));
713 }
714
715 if (dirty & FD_DIRTY_RASTERIZER) {
716 struct fd4_rasterizer_stateobj *rasterizer =
717 fd4_rasterizer_stateobj(ctx->rasterizer);
718
719 OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
720 OUT_RING(ring, rasterizer->gras_su_mode_control |
721 A4XX_GRAS_SU_MODE_CONTROL_RENDERING_PASS);
722
723 OUT_PKT0(ring, REG_A4XX_GRAS_SU_POINT_MINMAX, 2);
724 OUT_RING(ring, rasterizer->gras_su_point_minmax);
725 OUT_RING(ring, rasterizer->gras_su_point_size);
726
727 OUT_PKT0(ring, REG_A4XX_GRAS_SU_POLY_OFFSET_SCALE, 3);
728 OUT_RING(ring, rasterizer->gras_su_poly_offset_scale);
729 OUT_RING(ring, rasterizer->gras_su_poly_offset_offset);
730 OUT_RING(ring, rasterizer->gras_su_poly_offset_clamp);
731
732 OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
733 OUT_RING(ring, rasterizer->gras_cl_clip_cntl);
734 }
735
736 /* NOTE: since primitive_restart is not actually part of any
737 * state object, we need to make sure that we always emit
738 * PRIM_VTX_CNTL.. either that or be more clever and detect
739 * when it changes.
740 */
741 if (emit->info) {
742 const struct pipe_draw_info *info = emit->info;
743 struct fd4_rasterizer_stateobj *rast =
744 fd4_rasterizer_stateobj(ctx->rasterizer);
745 uint32_t val = rast->pc_prim_vtx_cntl;
746
747 if (info->index_size && info->primitive_restart)
748 val |= A4XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART;
749
750 val |= COND(vp->writes_psize, A4XX_PC_PRIM_VTX_CNTL_PSIZE);
751
752 if (fp->total_in > 0) {
753 uint32_t varout = align(fp->total_in, 16) / 16;
754 if (varout > 1)
755 varout = align(varout, 2);
756 val |= A4XX_PC_PRIM_VTX_CNTL_VAROUT(varout);
757 }
758
759 OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 2);
760 OUT_RING(ring, val);
761 OUT_RING(ring, rast->pc_prim_vtx_cntl2);
762 }
763
764 /* NOTE: scissor enabled bit is part of rasterizer state: */
765 if (dirty & (FD_DIRTY_SCISSOR | FD_DIRTY_RASTERIZER)) {
766 struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
767
768 OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
769 OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(scissor->maxx) |
770 A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(scissor->maxy));
771 OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(scissor->minx) |
772 A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(scissor->miny));
773
774 ctx->batch->max_scissor.minx =
775 MIN2(ctx->batch->max_scissor.minx, scissor->minx);
776 ctx->batch->max_scissor.miny =
777 MIN2(ctx->batch->max_scissor.miny, scissor->miny);
778 ctx->batch->max_scissor.maxx =
779 MAX2(ctx->batch->max_scissor.maxx, scissor->maxx);
780 ctx->batch->max_scissor.maxy =
781 MAX2(ctx->batch->max_scissor.maxy, scissor->maxy);
782 }
783
784 if (dirty & FD_DIRTY_VIEWPORT) {
785 struct pipe_viewport_state *vp = & ctx->viewport[0];
786
787 fd_wfi(ctx->batch, ring);
788
789 OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
790 OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0(vp->translate[0]));
791 OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0(vp->scale[0]));
792 OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0(vp->translate[1]));
793 OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(vp->scale[1]));
794 OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(vp->translate[2]));
795 OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(vp->scale[2]));
796 }
797
798 if (dirty &
799 (FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER | FD_DIRTY_FRAMEBUFFER)) {
800 float zmin, zmax;
801 int depth = 24;
802 if (ctx->batch->framebuffer.zsbuf) {
803 depth = util_format_get_component_bits(
804 pipe_surface_format(ctx->batch->framebuffer.zsbuf),
805 UTIL_FORMAT_COLORSPACE_ZS, 0);
806 }
807 util_viewport_zmin_zmax(&ctx->viewport[0], ctx->rasterizer->clip_halfz,
808 &zmin, &zmax);
809
810 OUT_PKT0(ring, REG_A4XX_RB_VPORT_Z_CLAMP(0), 2);
811 if (depth == 32) {
812 OUT_RING(ring, fui(zmin));
813 OUT_RING(ring, fui(zmax));
814 } else if (depth == 16) {
815 OUT_RING(ring, (uint32_t)(zmin * 0xffff));
816 OUT_RING(ring, (uint32_t)(zmax * 0xffff));
817 } else {
818 OUT_RING(ring, (uint32_t)(zmin * 0xffffff));
819 OUT_RING(ring, (uint32_t)(zmax * 0xffffff));
820 }
821 }
822
823 if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) {
824 struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
825 unsigned n = pfb->nr_cbufs;
826 /* if we have depth/stencil, we need at least on MRT: */
827 if (pfb->zsbuf)
828 n = MAX2(1, n);
829 fd4_program_emit(ring, emit, n, pfb->cbufs);
830 }
831
832 if (!emit->skip_consts) { /* evil hack to deal sanely with clear path */
833 ir3_emit_vs_consts(vp, ring, ctx, emit->info, emit->indirect, emit->draw);
834 if (!emit->binning_pass)
835 ir3_emit_fs_consts(fp, ring, ctx);
836 }
837
838 if ((dirty & FD_DIRTY_BLEND)) {
839 struct fd4_blend_stateobj *blend = fd4_blend_stateobj(ctx->blend);
840 uint32_t i;
841
842 for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
843 enum pipe_format format =
844 pipe_surface_format(ctx->batch->framebuffer.cbufs[i]);
845 bool is_int = util_format_is_pure_integer(format);
846 bool has_alpha = util_format_has_alpha(format);
847 uint32_t control = blend->rb_mrt[i].control;
848
849 if (is_int) {
850 control &= A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
851 control |= A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY);
852 }
853
854 if (!has_alpha) {
855 control &= ~A4XX_RB_MRT_CONTROL_BLEND2;
856 }
857
858 OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
859 OUT_RING(ring, control);
860
861 OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
862 OUT_RING(ring, blend->rb_mrt[i].blend_control);
863 }
864
865 OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1);
866 OUT_RING(ring,
867 blend->rb_fs_output | A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff));
868 }
869
870 if (dirty & FD_DIRTY_BLEND_COLOR) {
871 struct pipe_blend_color *bcolor = &ctx->blend_color;
872
873 OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 8);
874 OUT_RING(ring, A4XX_RB_BLEND_RED_FLOAT(bcolor->color[0]) |
875 A4XX_RB_BLEND_RED_UINT(CLAMP(bcolor->color[0], 0.f, 1.f) * 0xff) |
876 A4XX_RB_BLEND_RED_SINT(CLAMP(bcolor->color[0], -1.f, 1.f) * 0x7f));
877 OUT_RING(ring, A4XX_RB_BLEND_RED_F32(bcolor->color[0]));
878 OUT_RING(ring, A4XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1]) |
879 A4XX_RB_BLEND_GREEN_UINT(CLAMP(bcolor->color[1], 0.f, 1.f) * 0xff) |
880 A4XX_RB_BLEND_GREEN_SINT(CLAMP(bcolor->color[1], -1.f, 1.f) * 0x7f));
881 OUT_RING(ring, A4XX_RB_BLEND_GREEN_F32(bcolor->color[1]));
882 OUT_RING(ring, A4XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2]) |
883 A4XX_RB_BLEND_BLUE_UINT(CLAMP(bcolor->color[2], 0.f, 1.f) * 0xff) |
884 A4XX_RB_BLEND_BLUE_SINT(CLAMP(bcolor->color[2], -1.f, 1.f) * 0x7f));
885 OUT_RING(ring, A4XX_RB_BLEND_BLUE_F32(bcolor->color[2]));
886 OUT_RING(ring, A4XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3]) |
887 A4XX_RB_BLEND_ALPHA_UINT(CLAMP(bcolor->color[3], 0.f, 1.f) * 0xff) |
888 A4XX_RB_BLEND_ALPHA_SINT(CLAMP(bcolor->color[3], -1.f, 1.f) * 0x7f));
889 OUT_RING(ring, A4XX_RB_BLEND_ALPHA_F32(bcolor->color[3]));
890 }
891
892 if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_TEX)
893 emit_textures(ctx, ring, SB4_VS_TEX, &ctx->tex[PIPE_SHADER_VERTEX], vp);
894
895 if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX)
896 emit_textures(ctx, ring, SB4_FS_TEX, &ctx->tex[PIPE_SHADER_FRAGMENT], fp);
897
898 if (!emit->binning_pass) {
899 if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_SSBO)
900 emit_ssbos(ctx, ring, SB4_SSBO, &ctx->shaderbuf[PIPE_SHADER_FRAGMENT]);
901
902 if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_IMAGE)
903 fd4_emit_images(ctx, ring, PIPE_SHADER_FRAGMENT, fp);
904 }
905 }
906
907 void
fd4_emit_cs_state(struct fd_context * ctx,struct fd_ringbuffer * ring,struct ir3_shader_variant * cp)908 fd4_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
909 struct ir3_shader_variant *cp)
910 {
911 enum fd_dirty_shader_state dirty = ctx->dirty_shader[PIPE_SHADER_COMPUTE];
912 unsigned num_textures = ctx->tex[PIPE_SHADER_COMPUTE].num_textures +
913 cp->astc_srgb.count +
914 cp->tg4.count;
915
916 if (dirty & FD_DIRTY_SHADER_TEX) {
917 emit_textures(ctx, ring, SB4_CS_TEX, &ctx->tex[PIPE_SHADER_COMPUTE], cp);
918
919 OUT_PKT0(ring, REG_A4XX_TPL1_TP_TEX_COUNT, 1);
920 OUT_RING(ring, 0);
921 }
922
923 OUT_PKT0(ring, REG_A4XX_TPL1_TP_FS_TEX_COUNT, 1);
924 OUT_RING(ring, A4XX_TPL1_TP_FS_TEX_COUNT_CS(
925 ctx->shaderimg[PIPE_SHADER_COMPUTE].enabled_mask ? 0x80 : num_textures));
926
927 if (dirty & FD_DIRTY_SHADER_SSBO)
928 emit_ssbos(ctx, ring, SB4_CS_SSBO, &ctx->shaderbuf[PIPE_SHADER_COMPUTE]);
929
930 if (dirty & FD_DIRTY_SHADER_IMAGE)
931 fd4_emit_images(ctx, ring, PIPE_SHADER_COMPUTE, cp);
932 }
933
934 /* emit setup at begin of new cmdstream buffer (don't rely on previous
935 * state, there could have been a context switch between ioctls):
936 */
937 void
fd4_emit_restore(struct fd_batch * batch,struct fd_ringbuffer * ring)938 fd4_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
939 {
940 struct fd_context *ctx = batch->ctx;
941 struct fd4_context *fd4_ctx = fd4_context(ctx);
942
943 OUT_PKT0(ring, REG_A4XX_RBBM_PERFCTR_CTL, 1);
944 OUT_RING(ring, 0x00000001);
945
946 OUT_PKT0(ring, REG_A4XX_GRAS_DEBUG_ECO_CONTROL, 1);
947 OUT_RING(ring, 0x00000000);
948
949 OUT_PKT0(ring, REG_A4XX_SP_MODE_CONTROL, 1);
950 OUT_RING(ring, 0x0000001e);
951
952 OUT_PKT0(ring, REG_A4XX_TPL1_TP_MODE_CONTROL, 1);
953 OUT_RING(ring, 0x0000003a);
954
955 OUT_PKT0(ring, REG_A4XX_UNKNOWN_0D01, 1);
956 OUT_RING(ring, 0x00000001);
957
958 OUT_PKT0(ring, REG_A4XX_UNKNOWN_0E42, 1);
959 OUT_RING(ring, 0x00000000);
960
961 OUT_PKT0(ring, REG_A4XX_UCHE_CACHE_WAYS_VFD, 1);
962 OUT_RING(ring, 0x00000007);
963
964 OUT_PKT0(ring, REG_A4XX_UCHE_CACHE_MODE_CONTROL, 1);
965 OUT_RING(ring, 0x00000000);
966
967 OUT_PKT0(ring, REG_A4XX_UCHE_INVALIDATE0, 2);
968 OUT_RING(ring, 0x00000000);
969 OUT_RING(ring, 0x00000012);
970
971 OUT_PKT0(ring, REG_A4XX_HLSQ_MODE_CONTROL, 1);
972 OUT_RING(ring, 0x00000003);
973
974 OUT_PKT0(ring, REG_A4XX_UNKNOWN_0CC5, 1);
975 OUT_RING(ring, 0x00000006);
976
977 OUT_PKT0(ring, REG_A4XX_UNKNOWN_0CC6, 1);
978 OUT_RING(ring, 0x00000000);
979
980 OUT_PKT0(ring, REG_A4XX_UNKNOWN_0EC2, 1);
981 OUT_RING(ring, 0x00040000);
982
983 OUT_PKT0(ring, REG_A4XX_UNKNOWN_2001, 1);
984 OUT_RING(ring, 0x00000000);
985
986 OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
987 OUT_RING(ring, 0x00001000);
988
989 OUT_PKT0(ring, REG_A4XX_UNKNOWN_20EF, 1);
990 OUT_RING(ring, 0x00000000);
991
992 OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 4);
993 OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(0) | A4XX_RB_BLEND_RED_FLOAT(0.0f));
994 OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(0) | A4XX_RB_BLEND_GREEN_FLOAT(0.0f));
995 OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(0) | A4XX_RB_BLEND_BLUE_FLOAT(0.0f));
996 OUT_RING(ring,
997 A4XX_RB_BLEND_ALPHA_UINT(0x7fff) | A4XX_RB_BLEND_ALPHA_FLOAT(1.0f));
998
999 OUT_PKT0(ring, REG_A4XX_UNKNOWN_2152, 1);
1000 OUT_RING(ring, 0x00000000);
1001
1002 OUT_PKT0(ring, REG_A4XX_UNKNOWN_2153, 1);
1003 OUT_RING(ring, 0x00000000);
1004
1005 OUT_PKT0(ring, REG_A4XX_UNKNOWN_2154, 1);
1006 OUT_RING(ring, 0x00000000);
1007
1008 OUT_PKT0(ring, REG_A4XX_UNKNOWN_2155, 1);
1009 OUT_RING(ring, 0x00000000);
1010
1011 OUT_PKT0(ring, REG_A4XX_UNKNOWN_2156, 1);
1012 OUT_RING(ring, 0x00000000);
1013
1014 OUT_PKT0(ring, REG_A4XX_UNKNOWN_2157, 1);
1015 OUT_RING(ring, 0x00000000);
1016
1017 OUT_PKT0(ring, REG_A4XX_UNKNOWN_21C3, 1);
1018 OUT_RING(ring, 0x0000001d);
1019
1020 OUT_PKT0(ring, REG_A4XX_PC_GS_PARAM, 1);
1021 OUT_RING(ring, 0x00000000);
1022
1023 OUT_PKT0(ring, REG_A4XX_UNKNOWN_21E6, 1);
1024 OUT_RING(ring, 0x00000001);
1025
1026 OUT_PKT0(ring, REG_A4XX_PC_HS_PARAM, 1);
1027 OUT_RING(ring, 0x00000000);
1028
1029 OUT_PKT0(ring, REG_A4XX_UNKNOWN_22D7, 1);
1030 OUT_RING(ring, 0x00000000);
1031
1032 OUT_PKT0(ring, REG_A4XX_TPL1_TP_TEX_OFFSET, 1);
1033 OUT_RING(ring, 0x00000000);
1034
1035 OUT_PKT0(ring, REG_A4XX_TPL1_TP_TEX_COUNT, 1);
1036 OUT_RING(ring, A4XX_TPL1_TP_TEX_COUNT_VS(16) | A4XX_TPL1_TP_TEX_COUNT_HS(0) |
1037 A4XX_TPL1_TP_TEX_COUNT_DS(0) |
1038 A4XX_TPL1_TP_TEX_COUNT_GS(0));
1039
1040 OUT_PKT0(ring, REG_A4XX_TPL1_TP_FS_TEX_COUNT, 1);
1041 OUT_RING(ring, 16);
1042
1043 /* we don't use this yet.. probably best to disable.. */
1044 OUT_PKT3(ring, CP_SET_DRAW_STATE, 2);
1045 OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
1046 CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
1047 CP_SET_DRAW_STATE__0_GROUP_ID(0));
1048 OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
1049
1050 OUT_PKT0(ring, REG_A4XX_SP_VS_PVT_MEM_PARAM, 2);
1051 OUT_RING(ring, 0x08000001); /* SP_VS_PVT_MEM_PARAM */
1052 OUT_RELOC(ring, fd4_ctx->vs_pvt_mem, 0, 0, 0); /* SP_VS_PVT_MEM_ADDR */
1053
1054 OUT_PKT0(ring, REG_A4XX_SP_FS_PVT_MEM_PARAM, 2);
1055 OUT_RING(ring, 0x08000001); /* SP_FS_PVT_MEM_PARAM */
1056 OUT_RELOC(ring, fd4_ctx->fs_pvt_mem, 0, 0, 0); /* SP_FS_PVT_MEM_ADDR */
1057
1058 OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
1059 OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
1060 A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
1061 A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
1062 A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
1063
1064 OUT_PKT0(ring, REG_A4XX_RB_MSAA_CONTROL, 1);
1065 OUT_RING(ring, A4XX_RB_MSAA_CONTROL_DISABLE |
1066 A4XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE));
1067
1068 OUT_PKT0(ring, REG_A4XX_GRAS_CL_GB_CLIP_ADJ, 1);
1069 OUT_RING(ring, A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) |
1070 A4XX_GRAS_CL_GB_CLIP_ADJ_VERT(0));
1071
1072 OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
1073 OUT_RING(ring, A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS));
1074
1075 OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1);
1076 OUT_RING(ring, A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff));
1077
1078 OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
1079 OUT_RING(ring, 0x0);
1080
1081 fd_hw_query_enable(batch, ring);
1082 }
1083
1084 static void
fd4_mem_to_mem(struct fd_ringbuffer * ring,struct pipe_resource * dst,unsigned dst_off,struct pipe_resource * src,unsigned src_off,unsigned sizedwords)1085 fd4_mem_to_mem(struct fd_ringbuffer *ring, struct pipe_resource *dst,
1086 unsigned dst_off, struct pipe_resource *src, unsigned src_off,
1087 unsigned sizedwords)
1088 {
1089 struct fd_bo *src_bo = fd_resource(src)->bo;
1090 struct fd_bo *dst_bo = fd_resource(dst)->bo;
1091 unsigned i;
1092
1093 for (i = 0; i < sizedwords; i++) {
1094 OUT_PKT3(ring, CP_MEM_TO_MEM, 3);
1095 OUT_RING(ring, 0x00000000);
1096 OUT_RELOC(ring, dst_bo, dst_off, 0, 0);
1097 OUT_RELOC(ring, src_bo, src_off, 0, 0);
1098
1099 dst_off += 4;
1100 src_off += 4;
1101 }
1102 }
1103
1104 void
fd4_emit_init_screen(struct pipe_screen * pscreen)1105 fd4_emit_init_screen(struct pipe_screen *pscreen)
1106 {
1107 struct fd_screen *screen = fd_screen(pscreen);
1108
1109 screen->emit_ib = fd4_emit_ib;
1110 screen->mem_to_mem = fd4_mem_to_mem;
1111 }
1112
1113 void
fd4_emit_init(struct pipe_context * pctx)1114 fd4_emit_init(struct pipe_context *pctx)
1115 {
1116 }
1117