1 /*
2 * Copyright © 2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir/pipe_nir.h"
25 #include "util/format/u_format.h"
26 #include "util/u_surface.h"
27 #include "util/u_blitter.h"
28 #include "compiler/nir/nir_builder.h"
29 #include "vc4_context.h"
30
31 static struct pipe_surface *
vc4_get_blit_surface(struct pipe_context * pctx,struct pipe_resource * prsc,unsigned level,unsigned layer)32 vc4_get_blit_surface(struct pipe_context *pctx,
33 struct pipe_resource *prsc, unsigned level,
34 unsigned layer)
35 {
36 struct pipe_surface tmpl;
37
38 memset(&tmpl, 0, sizeof(tmpl));
39 tmpl.format = prsc->format;
40 tmpl.u.tex.level = level;
41 tmpl.u.tex.first_layer = tmpl.u.tex.last_layer = layer;
42
43 return pctx->create_surface(pctx, prsc, &tmpl);
44 }
45
46 static bool
is_tile_unaligned(unsigned size,unsigned tile_size)47 is_tile_unaligned(unsigned size, unsigned tile_size)
48 {
49 return size & (tile_size - 1);
50 }
51
52 static void
vc4_tile_blit(struct pipe_context * pctx,struct pipe_blit_info * info)53 vc4_tile_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
54 {
55 struct vc4_context *vc4 = vc4_context(pctx);
56 bool msaa = (info->src.resource->nr_samples > 1 ||
57 info->dst.resource->nr_samples > 1);
58 int tile_width = msaa ? 32 : 64;
59 int tile_height = msaa ? 32 : 64;
60
61 if (!info->mask)
62 return;
63
64 bool is_color_blit = info->mask & PIPE_MASK_RGBA;
65 bool is_depth_blit = info->mask & PIPE_MASK_Z;
66 bool is_stencil_blit = info->mask & PIPE_MASK_S;
67
68 /* Either we receive a depth/stencil blit, or color blit, but not both.
69 */
70 assert ((is_color_blit && !(is_depth_blit || is_stencil_blit)) ||
71 (!is_color_blit && (is_depth_blit || is_stencil_blit)));
72
73 if (info->scissor_enable)
74 return;
75
76 if (info->dst.box.x != info->src.box.x ||
77 info->dst.box.y != info->src.box.y ||
78 info->dst.box.width != info->src.box.width ||
79 info->dst.box.height != info->src.box.height ||
80 info->dst.box.depth != info->src.box.depth ||
81 info->dst.box.depth != 1) {
82 return;
83 }
84
85 if (is_color_blit &&
86 util_format_is_depth_or_stencil(info->dst.format))
87 return;
88
89 if ((is_depth_blit || is_stencil_blit) &&
90 !util_format_is_depth_or_stencil(info->dst.format))
91 return;
92
93 int dst_surface_width = u_minify(info->dst.resource->width0,
94 info->dst.level);
95 int dst_surface_height = u_minify(info->dst.resource->height0,
96 info->dst.level);
97 if (is_tile_unaligned(info->dst.box.x, tile_width) ||
98 is_tile_unaligned(info->dst.box.y, tile_height) ||
99 (is_tile_unaligned(info->dst.box.width, tile_width) &&
100 info->dst.box.x + info->dst.box.width != dst_surface_width) ||
101 (is_tile_unaligned(info->dst.box.height, tile_height) &&
102 info->dst.box.y + info->dst.box.height != dst_surface_height)) {
103 return;
104 }
105
106 /* VC4_PACKET_LOAD_TILE_BUFFER_GENERAL uses the
107 * VC4_PACKET_TILE_RENDERING_MODE_CONFIG's width (determined by our
108 * destination surface) to determine the stride. This may be wrong
109 * when reading from texture miplevels > 0, which are stored in
110 * POT-sized areas. For MSAA, the tile addresses are computed
111 * explicitly by the RCL, but still use the destination width to
112 * determine the stride (which could be fixed by explicitly supplying
113 * it in the ABI).
114 */
115 struct vc4_resource *rsc = vc4_resource(info->src.resource);
116
117 uint32_t stride;
118
119 if (info->src.resource->nr_samples > 1)
120 stride = align(dst_surface_width, 32) * 4 * rsc->cpp;
121 else if (rsc->slices[info->src.level].tiling == VC4_TILING_FORMAT_T)
122 stride = align(dst_surface_width * rsc->cpp, 128);
123 else
124 stride = align(dst_surface_width * rsc->cpp, 16);
125
126 if (stride != rsc->slices[info->src.level].stride)
127 return;
128
129 if (info->dst.resource->format != info->src.resource->format)
130 return;
131
132 if (false) {
133 fprintf(stderr, "RCL blit from %d,%d to %d,%d (%d,%d)\n",
134 info->src.box.x,
135 info->src.box.y,
136 info->dst.box.x,
137 info->dst.box.y,
138 info->dst.box.width,
139 info->dst.box.height);
140 }
141
142 struct pipe_surface *dst_surf =
143 vc4_get_blit_surface(pctx, info->dst.resource, info->dst.level,
144 info->dst.box.z);
145 struct pipe_surface *src_surf =
146 vc4_get_blit_surface(pctx, info->src.resource, info->src.level,
147 info->src.box.z);
148
149 vc4_flush_jobs_reading_resource(vc4, info->src.resource);
150
151 struct vc4_job *job;
152 if (is_color_blit) {
153 job = vc4_get_job(vc4, dst_surf, NULL);
154 pipe_surface_reference(&job->color_read, src_surf);
155 } else {
156 job = vc4_get_job(vc4, NULL, dst_surf);
157 pipe_surface_reference(&job->zs_read, src_surf);
158 }
159
160 job->draw_min_x = info->dst.box.x;
161 job->draw_min_y = info->dst.box.y;
162 job->draw_max_x = info->dst.box.x + info->dst.box.width;
163 job->draw_max_y = info->dst.box.y + info->dst.box.height;
164 job->draw_width = dst_surf->width;
165 job->draw_height = dst_surf->height;
166
167 job->tile_width = tile_width;
168 job->tile_height = tile_height;
169 job->msaa = msaa;
170 job->needs_flush = true;
171
172 if (is_color_blit) {
173 job->resolve |= PIPE_CLEAR_COLOR;
174 info->mask &= ~PIPE_MASK_RGBA;
175 }
176
177 if (is_depth_blit) {
178 job->resolve |= PIPE_CLEAR_DEPTH;
179 info->mask &= ~PIPE_MASK_Z;
180 }
181
182 if (is_stencil_blit) {
183 job->resolve |= PIPE_CLEAR_STENCIL;
184 info->mask &= ~PIPE_MASK_S;
185 }
186
187 vc4_job_submit(vc4, job);
188
189 pipe_surface_reference(&dst_surf, NULL);
190 pipe_surface_reference(&src_surf, NULL);
191 }
192
193 void
vc4_blitter_save(struct vc4_context * vc4)194 vc4_blitter_save(struct vc4_context *vc4)
195 {
196 util_blitter_save_fragment_constant_buffer_slot(vc4->blitter,
197 vc4->constbuf[PIPE_SHADER_FRAGMENT].cb);
198 util_blitter_save_vertex_buffers(vc4->blitter, vc4->vertexbuf.vb,
199 vc4->vertexbuf.count);
200 util_blitter_save_vertex_elements(vc4->blitter, vc4->vtx);
201 util_blitter_save_vertex_shader(vc4->blitter, vc4->prog.bind_vs);
202 util_blitter_save_rasterizer(vc4->blitter, vc4->rasterizer);
203 util_blitter_save_viewport(vc4->blitter, &vc4->viewport);
204 util_blitter_save_scissor(vc4->blitter, &vc4->scissor);
205 util_blitter_save_fragment_shader(vc4->blitter, vc4->prog.bind_fs);
206 util_blitter_save_blend(vc4->blitter, vc4->blend);
207 util_blitter_save_depth_stencil_alpha(vc4->blitter, vc4->zsa);
208 util_blitter_save_stencil_ref(vc4->blitter, &vc4->stencil_ref);
209 util_blitter_save_sample_mask(vc4->blitter, vc4->sample_mask, 0);
210 util_blitter_save_framebuffer(vc4->blitter, &vc4->framebuffer);
211 util_blitter_save_fragment_sampler_states(vc4->blitter,
212 vc4->fragtex.num_samplers,
213 (void **)vc4->fragtex.samplers);
214 util_blitter_save_fragment_sampler_views(vc4->blitter,
215 vc4->fragtex.num_textures, vc4->fragtex.textures);
216 }
217
vc4_get_yuv_vs(struct pipe_context * pctx)218 static void *vc4_get_yuv_vs(struct pipe_context *pctx)
219 {
220 struct vc4_context *vc4 = vc4_context(pctx);
221 struct pipe_screen *pscreen = pctx->screen;
222
223 if (vc4->yuv_linear_blit_vs)
224 return vc4->yuv_linear_blit_vs;
225
226 const struct nir_shader_compiler_options *options =
227 pscreen->get_compiler_options(pscreen,
228 PIPE_SHADER_IR_NIR,
229 PIPE_SHADER_VERTEX);
230
231 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
232 "linear_blit_vs");
233
234 const struct glsl_type *vec4 = glsl_vec4_type();
235 nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
236 vec4, "pos");
237
238 nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
239 vec4, "gl_Position");
240 pos_out->data.location = VARYING_SLOT_POS;
241
242 nir_store_var(&b, pos_out, nir_load_var(&b, pos_in), 0xf);
243
244 vc4->yuv_linear_blit_vs = pipe_shader_from_nir(pctx, b.shader);
245
246 return vc4->yuv_linear_blit_vs;
247 }
248
vc4_get_yuv_fs(struct pipe_context * pctx,int cpp)249 static void *vc4_get_yuv_fs(struct pipe_context *pctx, int cpp)
250 {
251 struct vc4_context *vc4 = vc4_context(pctx);
252 struct pipe_screen *pscreen = pctx->screen;
253 struct pipe_shader_state **cached_shader;
254 const char *name;
255
256 if (cpp == 1) {
257 cached_shader = &vc4->yuv_linear_blit_fs_8bit;
258 name = "linear_blit_8bit_fs";
259 } else {
260 cached_shader = &vc4->yuv_linear_blit_fs_16bit;
261 name = "linear_blit_16bit_fs";
262 }
263
264 if (*cached_shader)
265 return *cached_shader;
266
267 const struct nir_shader_compiler_options *options =
268 pscreen->get_compiler_options(pscreen,
269 PIPE_SHADER_IR_NIR,
270 PIPE_SHADER_FRAGMENT);
271
272 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
273 options, "%s", name);
274
275 const struct glsl_type *vec4 = glsl_vec4_type();
276 const struct glsl_type *glsl_int = glsl_int_type();
277
278 nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
279 vec4, "f_color");
280 color_out->data.location = FRAG_RESULT_COLOR;
281
282 nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
283 vec4, "pos");
284 pos_in->data.location = VARYING_SLOT_POS;
285 nir_def *pos = nir_load_var(&b, pos_in);
286
287 nir_def *one = nir_imm_int(&b, 1);
288 nir_def *two = nir_imm_int(&b, 2);
289
290 nir_def *x = nir_f2i32(&b, nir_channel(&b, pos, 0));
291 nir_def *y = nir_f2i32(&b, nir_channel(&b, pos, 1));
292
293 nir_variable *stride_in = nir_variable_create(b.shader, nir_var_uniform,
294 glsl_int, "stride");
295 nir_def *stride = nir_load_var(&b, stride_in);
296
297 nir_def *x_offset;
298 nir_def *y_offset;
299 if (cpp == 1) {
300 nir_def *intra_utile_x_offset =
301 nir_ishl(&b, nir_iand(&b, x, one), two);
302 nir_def *inter_utile_x_offset =
303 nir_ishl(&b, nir_iand(&b, x, nir_imm_int(&b, ~3)), one);
304
305 x_offset = nir_iadd(&b,
306 intra_utile_x_offset,
307 inter_utile_x_offset);
308 y_offset = nir_imul(&b,
309 nir_iadd(&b,
310 nir_ishl(&b, y, one),
311 nir_ushr(&b, nir_iand(&b, x, two), one)),
312 stride);
313 } else {
314 x_offset = nir_ishl(&b, x, two);
315 y_offset = nir_imul(&b, y, stride);
316 }
317
318 nir_def *load =
319 nir_load_ubo(&b, 1, 32, one, nir_iadd(&b, x_offset, y_offset),
320 .align_mul = 4,
321 .align_offset = 0,
322 .range_base = 0,
323 .range = ~0);
324
325 nir_store_var(&b, color_out,
326 nir_unpack_unorm_4x8(&b, load),
327 0xf);
328
329 *cached_shader = pipe_shader_from_nir(pctx, b.shader);
330
331 return *cached_shader;
332 }
333
334 static void
vc4_yuv_blit(struct pipe_context * pctx,struct pipe_blit_info * info)335 vc4_yuv_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
336 {
337 struct vc4_context *vc4 = vc4_context(pctx);
338 struct vc4_resource *src = vc4_resource(info->src.resource);
339 struct vc4_resource *dst = vc4_resource(info->dst.resource);
340 bool ok;
341
342 if (!(info->mask & PIPE_MASK_RGBA))
343 return;
344
345 if (src->tiled)
346 return;
347
348 if (src->base.format != PIPE_FORMAT_R8_UNORM &&
349 src->base.format != PIPE_FORMAT_R8G8_UNORM)
350 return;
351
352 /* YUV blits always turn raster-order to tiled */
353 assert(dst->base.format == src->base.format);
354 assert(dst->tiled);
355
356 /* Always 1:1 and at the origin */
357 assert(info->src.box.x == 0 && info->dst.box.x == 0);
358 assert(info->src.box.y == 0 && info->dst.box.y == 0);
359 assert(info->src.box.width == info->dst.box.width);
360 assert(info->src.box.height == info->dst.box.height);
361
362 if ((src->slices[info->src.level].offset & 3) ||
363 (src->slices[info->src.level].stride & 3)) {
364 perf_debug("YUV-blit src texture offset/stride misaligned: 0x%08x/%d\n",
365 src->slices[info->src.level].offset,
366 src->slices[info->src.level].stride);
367 goto fallback;
368 }
369
370 vc4_blitter_save(vc4);
371
372 /* Create a renderable surface mapping the T-tiled shadow buffer.
373 */
374 struct pipe_surface dst_tmpl;
375 util_blitter_default_dst_texture(&dst_tmpl, info->dst.resource,
376 info->dst.level, info->dst.box.z);
377 dst_tmpl.format = PIPE_FORMAT_RGBA8888_UNORM;
378 struct pipe_surface *dst_surf =
379 pctx->create_surface(pctx, info->dst.resource, &dst_tmpl);
380 if (!dst_surf) {
381 fprintf(stderr, "Failed to create YUV dst surface\n");
382 util_blitter_unset_running_flag(vc4->blitter);
383 return;
384 }
385 dst_surf->width = align(dst_surf->width, 8) / 2;
386 if (dst->cpp == 1)
387 dst_surf->height /= 2;
388
389 /* Set the constant buffer. */
390 uint32_t stride = src->slices[info->src.level].stride;
391 struct pipe_constant_buffer cb_uniforms = {
392 .user_buffer = &stride,
393 .buffer_size = sizeof(stride),
394 };
395 pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 0, false, &cb_uniforms);
396 struct pipe_constant_buffer cb_src = {
397 .buffer = info->src.resource,
398 .buffer_offset = src->slices[info->src.level].offset,
399 .buffer_size = (src->bo->size -
400 src->slices[info->src.level].offset),
401 };
402 pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, false, &cb_src);
403
404 /* Unbind the textures, to make sure we don't try to recurse into the
405 * shadow blit.
406 */
407 pctx->set_sampler_views(pctx, PIPE_SHADER_FRAGMENT, 0, 0, 0, false, NULL);
408 pctx->bind_sampler_states(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL);
409
410 util_blitter_custom_shader(vc4->blitter, dst_surf,
411 vc4_get_yuv_vs(pctx),
412 vc4_get_yuv_fs(pctx, src->cpp));
413
414 util_blitter_restore_textures(vc4->blitter);
415 util_blitter_restore_constant_buffer_state(vc4->blitter);
416 /* Restore cb1 (util_blitter doesn't handle this one). */
417 struct pipe_constant_buffer cb_disabled = { 0 };
418 pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, false, &cb_disabled);
419
420 pipe_surface_reference(&dst_surf, NULL);
421
422 info->mask &= ~PIPE_MASK_RGBA;
423
424 return;
425
426 fallback:
427 /* Do an immediate SW fallback, since the render blit path
428 * would just recurse.
429 */
430 ok = util_try_blit_via_copy_region(pctx, info, false);
431 assert(ok); (void)ok;
432
433 info->mask &= ~PIPE_MASK_RGBA;
434 }
435
436 static void
vc4_render_blit(struct pipe_context * ctx,struct pipe_blit_info * info)437 vc4_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info)
438 {
439 struct vc4_context *vc4 = vc4_context(ctx);
440
441 if (!info->mask)
442 return;
443
444 if (!util_blitter_is_blit_supported(vc4->blitter, info)) {
445 fprintf(stderr, "blit unsupported %s -> %s\n",
446 util_format_short_name(info->src.resource->format),
447 util_format_short_name(info->dst.resource->format));
448 return;
449 }
450
451 /* Enable the scissor, so we get a minimal set of tiles rendered. */
452 if (!info->scissor_enable) {
453 info->scissor_enable = true;
454 info->scissor.minx = info->dst.box.x;
455 info->scissor.miny = info->dst.box.y;
456 info->scissor.maxx = info->dst.box.x + info->dst.box.width;
457 info->scissor.maxy = info->dst.box.y + info->dst.box.height;
458 }
459
460 vc4_blitter_save(vc4);
461 util_blitter_blit(vc4->blitter, info, NULL);
462
463 info->mask = 0;
464 }
465
466 /* Implement stencil and stencil/depth blit by reinterpreting stencil data as
467 * an RGBA8888 texture.
468 */
469 static void
vc4_stencil_blit(struct pipe_context * ctx,struct pipe_blit_info * info)470 vc4_stencil_blit(struct pipe_context *ctx, struct pipe_blit_info *info)
471 {
472 struct vc4_context *vc4 = vc4_context(ctx);
473 struct vc4_resource *src = vc4_resource(info->src.resource);
474 struct vc4_resource *dst = vc4_resource(info->dst.resource);
475 enum pipe_format src_format, dst_format;
476
477 if ((info->mask & PIPE_MASK_S) == 0)
478 return;
479
480 src_format = (info->mask & PIPE_MASK_ZS) ?
481 PIPE_FORMAT_RGBA8888_UINT :
482 PIPE_FORMAT_R8_UINT;
483
484 dst_format = (info->mask & PIPE_MASK_ZS) ?
485 PIPE_FORMAT_RGBA8888_UINT :
486 PIPE_FORMAT_R8_UINT;
487
488 /* Initialize the surface */
489 struct pipe_surface dst_tmpl = {
490 .u.tex = {
491 .level = info->dst.level,
492 .first_layer = info->dst.box.z,
493 .last_layer = info->dst.box.z,
494 },
495 .format = dst_format,
496 };
497 struct pipe_surface *dst_surf =
498 ctx->create_surface(ctx, &dst->base, &dst_tmpl);
499
500 /* Initialize the sampler view */
501 struct pipe_sampler_view src_tmpl = {
502 .target = (src->base.target == PIPE_TEXTURE_CUBE_ARRAY) ?
503 PIPE_TEXTURE_2D_ARRAY :
504 src->base.target,
505 .format = src_format,
506 .u.tex = {
507 .first_level = info->src.level,
508 .last_level = info->src.level,
509 .first_layer = 0,
510 .last_layer = (PIPE_TEXTURE_2D ?
511 u_minify(src->base.depth0,
512 info->src.level) - 1 :
513 src->base.array_size - 1),
514 },
515 .swizzle_r = PIPE_SWIZZLE_X,
516 .swizzle_g = PIPE_SWIZZLE_Y,
517 .swizzle_b = PIPE_SWIZZLE_Z,
518 .swizzle_a = PIPE_SWIZZLE_W,
519 };
520 struct pipe_sampler_view *src_view =
521 ctx->create_sampler_view(ctx, &src->base, &src_tmpl);
522
523 vc4_blitter_save(vc4);
524 util_blitter_blit_generic(vc4->blitter, dst_surf, &info->dst.box,
525 src_view, &info->src.box,
526 src->base.width0, src->base.height0,
527 (info->mask & PIPE_MASK_ZS) ?
528 PIPE_MASK_RGBA : PIPE_MASK_R,
529 PIPE_TEX_FILTER_NEAREST,
530 info->scissor_enable ? &info->scissor : NULL,
531 info->alpha_blend, false, 0, NULL);
532
533 pipe_surface_reference(&dst_surf, NULL);
534 pipe_sampler_view_reference(&src_view, NULL);
535
536 info->mask &= ~PIPE_MASK_ZS;
537 }
538
539 /* Optimal hardware path for blitting pixels.
540 * Scaling, format conversion, up- and downsampling (resolve) are allowed.
541 */
542 void
vc4_blit(struct pipe_context * pctx,const struct pipe_blit_info * blit_info)543 vc4_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
544 {
545 struct pipe_blit_info info = *blit_info;
546
547 vc4_yuv_blit(pctx, &info);
548
549 vc4_tile_blit(pctx, &info);
550
551 if (info.mask &&
552 util_try_blit_via_copy_region(pctx, &info, false))
553 return;
554
555 vc4_stencil_blit(pctx, &info);
556
557 vc4_render_blit(pctx, &info);
558
559 if (info.mask)
560 fprintf(stderr, "Unsupported blit\n");
561 }
562