1 /*
2 * Copyright 2008 Ben Skeggs
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include <stdint.h>
24
25 #include "compiler/nir/nir.h"
26 #include "compiler/nir/nir_builder.h"
27
28 #include "nir/pipe_nir.h"
29 #include "pipe/p_defines.h"
30
31 #include "util/u_inlines.h"
32 #include "util/u_pack_color.h"
33 #include "util/format/u_format.h"
34 #include "util/u_surface.h"
35 #include "util/u_thread.h"
36
37 #include "nv50_ir_driver.h"
38
39 #include "nvc0/nvc0_context.h"
40 #include "nvc0/nvc0_resource.h"
41
42 #include "nv50/g80_defs.xml.h"
43 #include "nv50/g80_texture.xml.h"
44
45 /* these are used in nv50_blit.h */
46 #define NV50_ENG2D_SUPPORTED_FORMATS 0xff9ccfe1cce3ccc9ULL
47 #define NV50_ENG2D_NOCONVERT_FORMATS 0x009cc02000000000ULL
48 #define NV50_ENG2D_LUMINANCE_FORMATS 0x001cc02000000000ULL
49 #define NV50_ENG2D_INTENSITY_FORMATS 0x0080000000000000ULL
50 #define NV50_ENG2D_OPERATION_FORMATS 0x060001c000638000ULL
51
52 #define NOUVEAU_DRIVER 0xc0
53 #include "nv50/nv50_blit.h"
54
55 static inline uint8_t
nvc0_2d_format(enum pipe_format format,bool dst,bool dst_src_equal)56 nvc0_2d_format(enum pipe_format format, bool dst, bool dst_src_equal)
57 {
58 uint8_t id = nvc0_format_table[format].rt;
59
60 /* A8_UNORM is treated as I8_UNORM as far as the 2D engine is concerned. */
61 if (!dst && unlikely(format == PIPE_FORMAT_I8_UNORM) && !dst_src_equal)
62 return G80_SURFACE_FORMAT_A8_UNORM;
63
64 /* Hardware values for color formats range from 0xc0 to 0xff,
65 * but the 2D engine doesn't support all of them.
66 */
67 if (nv50_2d_format_supported(format))
68 return id;
69 assert(dst_src_equal);
70
71 switch (util_format_get_blocksize(format)) {
72 case 1:
73 return G80_SURFACE_FORMAT_R8_UNORM;
74 case 2:
75 return G80_SURFACE_FORMAT_RG8_UNORM;
76 case 4:
77 return G80_SURFACE_FORMAT_BGRA8_UNORM;
78 case 8:
79 return G80_SURFACE_FORMAT_RGBA16_UNORM;
80 case 16:
81 return G80_SURFACE_FORMAT_RGBA32_FLOAT;
82 default:
83 assert(0);
84 return 0;
85 }
86 }
87
88 static int
nvc0_2d_texture_set(struct nouveau_pushbuf * push,bool dst,struct nv50_miptree * mt,unsigned level,unsigned layer,enum pipe_format pformat,bool dst_src_pformat_equal)89 nvc0_2d_texture_set(struct nouveau_pushbuf *push, bool dst,
90 struct nv50_miptree *mt, unsigned level, unsigned layer,
91 enum pipe_format pformat, bool dst_src_pformat_equal)
92 {
93 struct nouveau_bo *bo = mt->base.bo;
94 uint32_t width, height, depth;
95 uint32_t format;
96 uint32_t mthd = dst ? NV50_2D_DST_FORMAT : NV50_2D_SRC_FORMAT;
97 uint32_t offset = mt->level[level].offset;
98
99 format = nvc0_2d_format(pformat, dst, dst_src_pformat_equal);
100 if (!format) {
101 NOUVEAU_ERR("invalid/unsupported surface format: %s\n",
102 util_format_name(pformat));
103 return 1;
104 }
105
106 width = u_minify(mt->base.base.width0, level) << mt->ms_x;
107 height = u_minify(mt->base.base.height0, level) << mt->ms_y;
108 depth = u_minify(mt->base.base.depth0, level);
109
110 /* layer has to be < depth, and depth > tile depth / 2 */
111
112 if (!mt->layout_3d) {
113 offset += mt->layer_stride * layer;
114 layer = 0;
115 depth = 1;
116 } else
117 if (!dst) {
118 offset += nvc0_mt_zslice_offset(mt, level, layer);
119 layer = 0;
120 }
121
122 if (!nouveau_bo_memtype(bo)) {
123 BEGIN_NVC0(push, SUBC_2D(mthd), 2);
124 PUSH_DATA (push, format);
125 PUSH_DATA (push, 1);
126 BEGIN_NVC0(push, SUBC_2D(mthd + 0x14), 5);
127 PUSH_DATA (push, mt->level[level].pitch);
128 PUSH_DATA (push, width);
129 PUSH_DATA (push, height);
130 PUSH_DATAh(push, bo->offset + offset);
131 PUSH_DATA (push, bo->offset + offset);
132 } else {
133 BEGIN_NVC0(push, SUBC_2D(mthd), 5);
134 PUSH_DATA (push, format);
135 PUSH_DATA (push, 0);
136 PUSH_DATA (push, mt->level[level].tile_mode);
137 PUSH_DATA (push, depth);
138 PUSH_DATA (push, layer);
139 BEGIN_NVC0(push, SUBC_2D(mthd + 0x18), 4);
140 PUSH_DATA (push, width);
141 PUSH_DATA (push, height);
142 PUSH_DATAh(push, bo->offset + offset);
143 PUSH_DATA (push, bo->offset + offset);
144 }
145
146 if (dst) {
147 IMMED_NVC0(push, SUBC_2D(NVC0_2D_SET_DST_COLOR_RENDER_TO_ZETA_SURFACE),
148 util_format_is_depth_or_stencil(pformat));
149 }
150
151 #if 0
152 if (dst) {
153 BEGIN_NVC0(push, SUBC_2D(NVC0_2D_CLIP_X), 4);
154 PUSH_DATA (push, 0);
155 PUSH_DATA (push, 0);
156 PUSH_DATA (push, width);
157 PUSH_DATA (push, height);
158 }
159 #endif
160 return 0;
161 }
162
163 static int
nvc0_2d_texture_do_copy(struct nouveau_pushbuf * push,struct nv50_miptree * dst,unsigned dst_level,unsigned dx,unsigned dy,unsigned dz,struct nv50_miptree * src,unsigned src_level,unsigned sx,unsigned sy,unsigned sz,unsigned w,unsigned h)164 nvc0_2d_texture_do_copy(struct nouveau_pushbuf *push,
165 struct nv50_miptree *dst, unsigned dst_level,
166 unsigned dx, unsigned dy, unsigned dz,
167 struct nv50_miptree *src, unsigned src_level,
168 unsigned sx, unsigned sy, unsigned sz,
169 unsigned w, unsigned h)
170 {
171 const enum pipe_format dfmt = dst->base.base.format;
172 const enum pipe_format sfmt = src->base.base.format;
173 int ret;
174 bool eqfmt = dfmt == sfmt;
175
176 if (!PUSH_SPACE(push, 2 * 16 + 32))
177 return PIPE_ERROR;
178
179 ret = nvc0_2d_texture_set(push, true, dst, dst_level, dz, dfmt, eqfmt);
180 if (ret)
181 return ret;
182
183 ret = nvc0_2d_texture_set(push, false, src, src_level, sz, sfmt, eqfmt);
184 if (ret)
185 return ret;
186
187 IMMED_NVC0(push, NVC0_2D(BLIT_CONTROL), 0x00);
188 BEGIN_NVC0(push, NVC0_2D(BLIT_DST_X), 4);
189 PUSH_DATA (push, dx << dst->ms_x);
190 PUSH_DATA (push, dy << dst->ms_y);
191 PUSH_DATA (push, w << dst->ms_x);
192 PUSH_DATA (push, h << dst->ms_y);
193 BEGIN_NVC0(push, NVC0_2D(BLIT_DU_DX_FRACT), 4);
194 PUSH_DATA (push, 0);
195 PUSH_DATA (push, 1);
196 PUSH_DATA (push, 0);
197 PUSH_DATA (push, 1);
198 BEGIN_NVC0(push, NVC0_2D(BLIT_SRC_X_FRACT), 4);
199 PUSH_DATA (push, 0);
200 PUSH_DATA (push, sx << src->ms_x);
201 PUSH_DATA (push, 0);
202 PUSH_DATA (push, sy << src->ms_y);
203
204 return 0;
205 }
206
207 static void
nvc0_resource_copy_region(struct pipe_context * pipe,struct pipe_resource * dst,unsigned dst_level,unsigned dstx,unsigned dsty,unsigned dstz,struct pipe_resource * src,unsigned src_level,const struct pipe_box * src_box)208 nvc0_resource_copy_region(struct pipe_context *pipe,
209 struct pipe_resource *dst, unsigned dst_level,
210 unsigned dstx, unsigned dsty, unsigned dstz,
211 struct pipe_resource *src, unsigned src_level,
212 const struct pipe_box *src_box)
213 {
214 struct nvc0_context *nvc0 = nvc0_context(pipe);
215 int ret;
216 bool m2mf;
217 unsigned dst_layer = dstz, src_layer = src_box->z;
218
219 if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
220 nouveau_copy_buffer(&nvc0->base,
221 nv04_resource(dst), dstx,
222 nv04_resource(src), src_box->x, src_box->width);
223 NOUVEAU_DRV_STAT(&nvc0->screen->base, buf_copy_bytes, src_box->width);
224 return;
225 }
226 NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_copy_count, 1);
227
228 /* 0 and 1 are equal, only supporting 0/1, 2, 4 and 8 */
229 assert((src->nr_samples | 1) == (dst->nr_samples | 1));
230
231 m2mf = (src->format == dst->format) ||
232 (util_format_get_blocksizebits(src->format) ==
233 util_format_get_blocksizebits(dst->format));
234
235 nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
236
237 if (m2mf) {
238 struct nv50_miptree *src_mt = nv50_miptree(src);
239 struct nv50_miptree *dst_mt = nv50_miptree(dst);
240 struct nv50_m2mf_rect drect, srect;
241 unsigned i;
242 unsigned nx = util_format_get_nblocksx(src->format, src_box->width)
243 << src_mt->ms_x;
244 unsigned ny = util_format_get_nblocksy(src->format, src_box->height)
245 << src_mt->ms_y;
246
247 nv50_m2mf_rect_setup(&drect, dst, dst_level, dstx, dsty, dstz);
248 nv50_m2mf_rect_setup(&srect, src, src_level,
249 src_box->x, src_box->y, src_box->z);
250
251 for (i = 0; i < src_box->depth; ++i) {
252 nvc0->m2mf_copy_rect(nvc0, &drect, &srect, nx, ny);
253
254 if (dst_mt->layout_3d)
255 drect.z++;
256 else
257 drect.base += dst_mt->layer_stride;
258
259 if (src_mt->layout_3d)
260 srect.z++;
261 else
262 srect.base += src_mt->layer_stride;
263 }
264 return;
265 }
266
267 assert(nv50_2d_dst_format_faithful(dst->format));
268 assert(nv50_2d_src_format_faithful(src->format));
269
270 BCTX_REFN(nvc0->bufctx, 2D, nv04_resource(src), RD);
271 BCTX_REFN(nvc0->bufctx, 2D, nv04_resource(dst), WR);
272 nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx);
273 PUSH_VAL(nvc0->base.pushbuf);
274
275 for (; dst_layer < dstz + src_box->depth; ++dst_layer, ++src_layer) {
276 ret = nvc0_2d_texture_do_copy(nvc0->base.pushbuf,
277 nv50_miptree(dst), dst_level,
278 dstx, dsty, dst_layer,
279 nv50_miptree(src), src_level,
280 src_box->x, src_box->y, src_layer,
281 src_box->width, src_box->height);
282 if (ret)
283 break;
284 }
285 nouveau_bufctx_reset(nvc0->bufctx, 0);
286 }
287
288 static void
nvc0_clear_render_target(struct pipe_context * pipe,struct pipe_surface * dst,const union pipe_color_union * color,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled)289 nvc0_clear_render_target(struct pipe_context *pipe,
290 struct pipe_surface *dst,
291 const union pipe_color_union *color,
292 unsigned dstx, unsigned dsty,
293 unsigned width, unsigned height,
294 bool render_condition_enabled)
295 {
296 struct nvc0_context *nvc0 = nvc0_context(pipe);
297 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
298 struct nv50_surface *sf = nv50_surface(dst);
299 struct nv04_resource *res = nv04_resource(sf->base.texture);
300 unsigned z;
301
302 assert(dst->texture->target != PIPE_BUFFER);
303
304 if (!PUSH_SPACE(push, 32 + sf->depth))
305 return;
306
307 PUSH_REF1 (push, res->bo, res->domain | NOUVEAU_BO_WR);
308
309 BEGIN_NVC0(push, NVC0_3D(CLEAR_COLOR(0)), 4);
310 PUSH_DATAf(push, color->f[0]);
311 PUSH_DATAf(push, color->f[1]);
312 PUSH_DATAf(push, color->f[2]);
313 PUSH_DATAf(push, color->f[3]);
314
315 BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2);
316 PUSH_DATA (push, ( width << 16) | dstx);
317 PUSH_DATA (push, (height << 16) | dsty);
318
319 BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
320 PUSH_DATA (push, 1);
321 BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(0)), 9);
322 PUSH_DATAh(push, res->address + sf->offset);
323 PUSH_DATA (push, res->address + sf->offset);
324 if (likely(nouveau_bo_memtype(res->bo))) {
325 struct nv50_miptree *mt = nv50_miptree(dst->texture);
326
327 PUSH_DATA(push, sf->width);
328 PUSH_DATA(push, sf->height);
329 PUSH_DATA(push, nvc0_format_table[dst->format].rt);
330 PUSH_DATA(push, (mt->layout_3d << 16) |
331 mt->level[sf->base.u.tex.level].tile_mode);
332 PUSH_DATA(push, dst->u.tex.first_layer + sf->depth);
333 PUSH_DATA(push, mt->layer_stride >> 2);
334 PUSH_DATA(push, dst->u.tex.first_layer);
335 IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), mt->ms_mode);
336 } else {
337 if (res->base.target == PIPE_BUFFER) {
338 PUSH_DATA(push, 262144);
339 PUSH_DATA(push, 1);
340 } else {
341 PUSH_DATA(push, nv50_miptree(&res->base)->level[0].pitch);
342 PUSH_DATA(push, sf->height);
343 }
344 PUSH_DATA(push, nvc0_format_table[sf->base.format].rt);
345 PUSH_DATA(push, 1 << 12);
346 PUSH_DATA(push, 1);
347 PUSH_DATA(push, 0);
348 PUSH_DATA(push, 0);
349
350 IMMED_NVC0(push, NVC0_3D(ZETA_ENABLE), 0);
351 IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), 0);
352
353 /* tiled textures don't have to be fenced, they're not mapped directly */
354 nvc0_resource_fence(nvc0, res, NOUVEAU_BO_WR);
355 }
356
357 if (!render_condition_enabled)
358 IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
359
360 BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth);
361 for (z = 0; z < sf->depth; ++z) {
362 PUSH_DATA (push, 0x3c |
363 (z << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT));
364 }
365
366 if (!render_condition_enabled)
367 IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
368
369 nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER;
370 }
371
372 static void
nvc0_clear_buffer_push_nvc0(struct pipe_context * pipe,struct pipe_resource * res,unsigned offset,unsigned size,const void * data,int data_size)373 nvc0_clear_buffer_push_nvc0(struct pipe_context *pipe,
374 struct pipe_resource *res,
375 unsigned offset, unsigned size,
376 const void *data, int data_size)
377 {
378 struct nvc0_context *nvc0 = nvc0_context(pipe);
379 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
380 struct nv04_resource *buf = nv04_resource(res);
381 unsigned i;
382
383 nouveau_bufctx_refn(nvc0->bufctx, 0, buf->bo, buf->domain | NOUVEAU_BO_WR);
384 nouveau_pushbuf_bufctx(push, nvc0->bufctx);
385 PUSH_VAL(push);
386
387 unsigned count = (size + 3) / 4;
388 unsigned data_words = data_size / 4;
389
390 while (count) {
391 unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / data_words;
392 unsigned nr = nr_data * data_words;
393
394 if (!PUSH_SPACE(push, nr + 9))
395 break;
396
397 BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2);
398 PUSH_DATAh(push, buf->address + offset);
399 PUSH_DATA (push, buf->address + offset);
400 BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2);
401 PUSH_DATA (push, MIN2(size, nr * 4));
402 PUSH_DATA (push, 1);
403 BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1);
404 PUSH_DATA (push, 0x100111);
405
406 /* must not be interrupted (trap on QUERY fence, 0x50 works however) */
407 BEGIN_NIC0(push, NVC0_M2MF(DATA), nr);
408 for (i = 0; i < nr_data; i++)
409 PUSH_DATAp(push, data, data_words);
410
411 count -= nr;
412 offset += nr * 4;
413 size -= nr * 4;
414 }
415
416 nvc0_resource_validate(nvc0, buf, NOUVEAU_BO_WR);
417
418 nouveau_bufctx_reset(nvc0->bufctx, 0);
419 }
420
421 static void
nvc0_clear_buffer_push_nve4(struct pipe_context * pipe,struct pipe_resource * res,unsigned offset,unsigned size,const void * data,int data_size)422 nvc0_clear_buffer_push_nve4(struct pipe_context *pipe,
423 struct pipe_resource *res,
424 unsigned offset, unsigned size,
425 const void *data, int data_size)
426 {
427 struct nvc0_context *nvc0 = nvc0_context(pipe);
428 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
429 struct nv04_resource *buf = nv04_resource(res);
430 unsigned i;
431
432 nouveau_bufctx_refn(nvc0->bufctx, 0, buf->bo, buf->domain | NOUVEAU_BO_WR);
433 nouveau_pushbuf_bufctx(push, nvc0->bufctx);
434 PUSH_VAL(push);
435
436 unsigned count = (size + 3) / 4;
437 unsigned data_words = data_size / 4;
438
439 while (count) {
440 unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / data_words;
441 unsigned nr = nr_data * data_words;
442
443 if (!PUSH_SPACE(push, nr + 10))
444 break;
445
446 BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_DST_ADDRESS_HIGH), 2);
447 PUSH_DATAh(push, buf->address + offset);
448 PUSH_DATA (push, buf->address + offset);
449 BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_LINE_LENGTH_IN), 2);
450 PUSH_DATA (push, MIN2(size, nr * 4));
451 PUSH_DATA (push, 1);
452 /* must not be interrupted (trap on QUERY fence, 0x50 works however) */
453 BEGIN_1IC0(push, NVE4_P2MF(UPLOAD_EXEC), nr + 1);
454 PUSH_DATA (push, 0x1001);
455 for (i = 0; i < nr_data; i++)
456 PUSH_DATAp(push, data, data_words);
457
458 count -= nr;
459 offset += nr * 4;
460 size -= nr * 4;
461 }
462
463 nvc0_resource_validate(nvc0, buf, NOUVEAU_BO_WR);
464
465 nouveau_bufctx_reset(nvc0->bufctx, 0);
466 }
467
468 static void
nvc0_clear_buffer_push(struct pipe_context * pipe,struct pipe_resource * res,unsigned offset,unsigned size,const void * data,int data_size)469 nvc0_clear_buffer_push(struct pipe_context *pipe,
470 struct pipe_resource *res,
471 unsigned offset, unsigned size,
472 const void *data, int data_size)
473 {
474 struct nvc0_context *nvc0 = nvc0_context(pipe);
475 unsigned tmp;
476
477 if (data_size == 1) {
478 tmp = *(unsigned char *)data;
479 tmp = (tmp << 24) | (tmp << 16) | (tmp << 8) | tmp;
480 data = &tmp;
481 data_size = 4;
482 } else if (data_size == 2) {
483 tmp = *(unsigned short *)data;
484 tmp = (tmp << 16) | tmp;
485 data = &tmp;
486 data_size = 4;
487 }
488
489 if (nvc0->screen->base.class_3d < NVE4_3D_CLASS)
490 nvc0_clear_buffer_push_nvc0(pipe, res, offset, size, data, data_size);
491 else
492 nvc0_clear_buffer_push_nve4(pipe, res, offset, size, data, data_size);
493 }
494
495 static void
nvc0_clear_buffer(struct pipe_context * pipe,struct pipe_resource * res,unsigned offset,unsigned size,const void * data,int data_size)496 nvc0_clear_buffer(struct pipe_context *pipe,
497 struct pipe_resource *res,
498 unsigned offset, unsigned size,
499 const void *data, int data_size)
500 {
501 struct nvc0_context *nvc0 = nvc0_context(pipe);
502 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
503 struct nv04_resource *buf = nv04_resource(res);
504 union pipe_color_union color;
505 enum pipe_format dst_fmt;
506 unsigned width, height, elements;
507
508 assert(res->target == PIPE_BUFFER);
509 assert(nouveau_bo_memtype(buf->bo) == 0);
510
511 switch (data_size) {
512 case 16:
513 dst_fmt = PIPE_FORMAT_R32G32B32A32_UINT;
514 memcpy(&color.ui, data, 16);
515 break;
516 case 12:
517 /* RGB32 is not a valid RT format. This will be handled by the pushbuf
518 * uploader.
519 */
520 dst_fmt = PIPE_FORMAT_NONE; /* Init dst_fmt to silence gcc warning */
521 break;
522 case 8:
523 dst_fmt = PIPE_FORMAT_R32G32_UINT;
524 memcpy(&color.ui, data, 8);
525 memset(&color.ui[2], 0, 8);
526 break;
527 case 4:
528 dst_fmt = PIPE_FORMAT_R32_UINT;
529 memcpy(&color.ui, data, 4);
530 memset(&color.ui[1], 0, 12);
531 break;
532 case 2:
533 dst_fmt = PIPE_FORMAT_R16_UINT;
534 color.ui[0] = util_cpu_to_le32(
535 util_le16_to_cpu(*(unsigned short *)data));
536 memset(&color.ui[1], 0, 12);
537 break;
538 case 1:
539 dst_fmt = PIPE_FORMAT_R8_UINT;
540 color.ui[0] = util_cpu_to_le32(*(unsigned char *)data);
541 memset(&color.ui[1], 0, 12);
542 break;
543 default:
544 assert(!"Unsupported element size");
545 return;
546 }
547
548 util_range_add(&buf->base, &buf->valid_buffer_range, offset, offset + size);
549
550 assert(size % data_size == 0);
551
552 if (data_size == 12) {
553 nvc0_clear_buffer_push(pipe, res, offset, size, data, data_size);
554 return;
555 }
556
557 if (offset & 0xff) {
558 unsigned fixup_size = MIN2(size, align(offset, 0x100) - offset);
559 assert(fixup_size % data_size == 0);
560 nvc0_clear_buffer_push(pipe, res, offset, fixup_size, data, data_size);
561 offset += fixup_size;
562 size -= fixup_size;
563 if (!size)
564 return;
565 }
566
567 elements = size / data_size;
568 height = (elements + 16383) / 16384;
569 width = elements / height;
570 if (height > 1)
571 width &= ~0xff;
572 assert(width > 0);
573
574 if (!PUSH_SPACE(push, 40))
575 return;
576
577 PUSH_REF1 (push, buf->bo, buf->domain | NOUVEAU_BO_WR);
578
579 BEGIN_NVC0(push, NVC0_3D(CLEAR_COLOR(0)), 4);
580 PUSH_DATA (push, color.ui[0]);
581 PUSH_DATA (push, color.ui[1]);
582 PUSH_DATA (push, color.ui[2]);
583 PUSH_DATA (push, color.ui[3]);
584 BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2);
585 PUSH_DATA (push, width << 16);
586 PUSH_DATA (push, height << 16);
587
588 IMMED_NVC0(push, NVC0_3D(RT_CONTROL), 1);
589
590 BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(0)), 9);
591 PUSH_DATAh(push, buf->address + offset);
592 PUSH_DATA (push, buf->address + offset);
593 PUSH_DATA (push, align(width * data_size, 0x100));
594 PUSH_DATA (push, height);
595 PUSH_DATA (push, nvc0_format_table[dst_fmt].rt);
596 PUSH_DATA (push, NVC0_3D_RT_TILE_MODE_LINEAR);
597 PUSH_DATA (push, 1);
598 PUSH_DATA (push, 0);
599 PUSH_DATA (push, 0);
600
601 IMMED_NVC0(push, NVC0_3D(ZETA_ENABLE), 0);
602 IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), 0);
603
604 IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
605
606 IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c);
607
608 IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
609
610 nvc0_resource_validate(nvc0, buf, NOUVEAU_BO_WR);
611
612 if (width * height != elements) {
613 offset += width * height * data_size;
614 width = elements - width * height;
615 nvc0_clear_buffer_push(pipe, res, offset, width * data_size,
616 data, data_size);
617 }
618
619 nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER;
620 }
621
622 static void
nvc0_clear_depth_stencil(struct pipe_context * pipe,struct pipe_surface * dst,unsigned clear_flags,double depth,unsigned stencil,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled)623 nvc0_clear_depth_stencil(struct pipe_context *pipe,
624 struct pipe_surface *dst,
625 unsigned clear_flags,
626 double depth,
627 unsigned stencil,
628 unsigned dstx, unsigned dsty,
629 unsigned width, unsigned height,
630 bool render_condition_enabled)
631 {
632 struct nvc0_context *nvc0 = nvc0_context(pipe);
633 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
634 struct nv50_miptree *mt = nv50_miptree(dst->texture);
635 struct nv50_surface *sf = nv50_surface(dst);
636 uint32_t mode = 0;
637 int unk = mt->base.base.target == PIPE_TEXTURE_2D;
638 unsigned z;
639
640 assert(dst->texture->target != PIPE_BUFFER);
641
642 if (!PUSH_SPACE(push, 32 + sf->depth))
643 return;
644
645 PUSH_REF1 (push, mt->base.bo, mt->base.domain | NOUVEAU_BO_WR);
646
647 if (clear_flags & PIPE_CLEAR_DEPTH) {
648 BEGIN_NVC0(push, NVC0_3D(CLEAR_DEPTH), 1);
649 PUSH_DATAf(push, depth);
650 mode |= NVC0_3D_CLEAR_BUFFERS_Z;
651 }
652
653 if (clear_flags & PIPE_CLEAR_STENCIL) {
654 BEGIN_NVC0(push, NVC0_3D(CLEAR_STENCIL), 1);
655 PUSH_DATA (push, stencil & 0xff);
656 mode |= NVC0_3D_CLEAR_BUFFERS_S;
657 }
658
659 BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2);
660 PUSH_DATA (push, ( width << 16) | dstx);
661 PUSH_DATA (push, (height << 16) | dsty);
662
663 BEGIN_NVC0(push, NVC0_3D(ZETA_ADDRESS_HIGH), 5);
664 PUSH_DATAh(push, mt->base.address + sf->offset);
665 PUSH_DATA (push, mt->base.address + sf->offset);
666 PUSH_DATA (push, nvc0_format_table[dst->format].rt);
667 PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode);
668 PUSH_DATA (push, mt->layer_stride >> 2);
669 BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1);
670 PUSH_DATA (push, 1);
671 BEGIN_NVC0(push, NVC0_3D(ZETA_HORIZ), 3);
672 PUSH_DATA (push, sf->width);
673 PUSH_DATA (push, sf->height);
674 PUSH_DATA (push, (unk << 16) | (dst->u.tex.first_layer + sf->depth));
675 BEGIN_NVC0(push, NVC0_3D(ZETA_BASE_LAYER), 1);
676 PUSH_DATA (push, dst->u.tex.first_layer);
677 IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), mt->ms_mode);
678
679 if (!render_condition_enabled)
680 IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
681
682 BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth);
683 for (z = 0; z < sf->depth; ++z) {
684 PUSH_DATA (push, mode |
685 (z << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT));
686 }
687
688 if (!render_condition_enabled)
689 IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
690
691 nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER;
692 }
693
694 void
nvc0_clear(struct pipe_context * pipe,unsigned buffers,const struct pipe_scissor_state * scissor_state,const union pipe_color_union * color,double depth,unsigned stencil)695 nvc0_clear(struct pipe_context *pipe, unsigned buffers,
696 const struct pipe_scissor_state *scissor_state,
697 const union pipe_color_union *color,
698 double depth, unsigned stencil)
699 {
700 struct nvc0_context *nvc0 = nvc0_context(pipe);
701 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
702 struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
703 unsigned i, j, k;
704 uint32_t mode = 0;
705
706 simple_mtx_lock(&nvc0->screen->state_lock);
707
708 /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */
709 if (!nvc0_state_validate_3d(nvc0, NVC0_NEW_3D_FRAMEBUFFER))
710 goto out;
711
712 if (scissor_state) {
713 uint32_t minx = scissor_state->minx;
714 uint32_t maxx = MIN2(fb->width, scissor_state->maxx);
715 uint32_t miny = scissor_state->miny;
716 uint32_t maxy = MIN2(fb->height, scissor_state->maxy);
717 if (maxx <= minx || maxy <= miny)
718 goto out;
719
720 BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2);
721 PUSH_DATA (push, minx | (maxx - minx) << 16);
722 PUSH_DATA (push, miny | (maxy - miny) << 16);
723 }
724
725 if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
726 BEGIN_NVC0(push, NVC0_3D(CLEAR_COLOR(0)), 4);
727 PUSH_DATAf(push, color->f[0]);
728 PUSH_DATAf(push, color->f[1]);
729 PUSH_DATAf(push, color->f[2]);
730 PUSH_DATAf(push, color->f[3]);
731 if (buffers & PIPE_CLEAR_COLOR0)
732 mode =
733 NVC0_3D_CLEAR_BUFFERS_R | NVC0_3D_CLEAR_BUFFERS_G |
734 NVC0_3D_CLEAR_BUFFERS_B | NVC0_3D_CLEAR_BUFFERS_A;
735 }
736
737 if (buffers & PIPE_CLEAR_DEPTH) {
738 BEGIN_NVC0(push, NVC0_3D(CLEAR_DEPTH), 1);
739 PUSH_DATA (push, fui(depth));
740 mode |= NVC0_3D_CLEAR_BUFFERS_Z;
741 }
742
743 if (buffers & PIPE_CLEAR_STENCIL) {
744 BEGIN_NVC0(push, NVC0_3D(CLEAR_STENCIL), 1);
745 PUSH_DATA (push, stencil & 0xff);
746 mode |= NVC0_3D_CLEAR_BUFFERS_S;
747 }
748
749 if (mode) {
750 int zs_layers = 0, color0_layers = 0;
751 if (fb->cbufs[0] && (mode & 0x3c))
752 color0_layers = fb->cbufs[0]->u.tex.last_layer -
753 fb->cbufs[0]->u.tex.first_layer + 1;
754 if (fb->zsbuf && (mode & ~0x3c))
755 zs_layers = fb->zsbuf->u.tex.last_layer -
756 fb->zsbuf->u.tex.first_layer + 1;
757
758 for (j = 0; j < MIN2(zs_layers, color0_layers); j++) {
759 BEGIN_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 1);
760 PUSH_DATA(push, mode | (j << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT));
761 }
762 for (k = j; k < zs_layers; k++) {
763 BEGIN_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 1);
764 PUSH_DATA(push, (mode & ~0x3c) | (k << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT));
765 }
766 for (k = j; k < color0_layers; k++) {
767 BEGIN_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 1);
768 PUSH_DATA(push, (mode & 0x3c) | (k << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT));
769 }
770 }
771
772 for (i = 1; i < fb->nr_cbufs; i++) {
773 struct pipe_surface *sf = fb->cbufs[i];
774 if (!sf || !(buffers & (PIPE_CLEAR_COLOR0 << i)))
775 continue;
776 for (j = 0; j <= sf->u.tex.last_layer - sf->u.tex.first_layer; j++) {
777 BEGIN_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 1);
778 PUSH_DATA (push, (i << 6) | 0x3c |
779 (j << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT));
780 }
781 }
782
783 /* restore screen scissor */
784 if (scissor_state) {
785 BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2);
786 PUSH_DATA (push, fb->width << 16);
787 PUSH_DATA (push, fb->height << 16);
788 }
789
790 out:
791 PUSH_KICK(push);
792 simple_mtx_unlock(&nvc0->screen->state_lock);
793 }
794
795 static void
gm200_evaluate_depth_buffer(struct pipe_context * pipe)796 gm200_evaluate_depth_buffer(struct pipe_context *pipe)
797 {
798 struct nvc0_context *nvc0 = nvc0_context(pipe);
799 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
800
801 simple_mtx_lock(&nvc0->screen->state_lock);
802 nvc0_state_validate_3d(nvc0, NVC0_NEW_3D_FRAMEBUFFER);
803 IMMED_NVC0(push, SUBC_3D(0x11fc), 1);
804 PUSH_KICK(push);
805 simple_mtx_unlock(&nvc0->screen->state_lock);
806 }
807
808
809 /* =============================== BLIT CODE ===================================
810 */
811
812 struct nvc0_blitter
813 {
814 struct nvc0_program *fp[NV50_BLIT_MAX_TEXTURE_TYPES][NV50_BLIT_MODES];
815 struct nvc0_program *vp;
816
817 struct nv50_tsc_entry sampler[2]; /* nearest, bilinear */
818
819 mtx_t mutex;
820
821 struct nvc0_screen *screen;
822 };
823
824 struct nvc0_blitctx
825 {
826 struct nvc0_context *nvc0;
827 struct nvc0_program *fp;
828 struct nvc0_program *vp;
829 uint8_t mode;
830 uint16_t color_mask;
831 uint8_t filter;
832 uint8_t render_condition_enable;
833 enum pipe_texture_target target;
834 struct {
835 struct pipe_framebuffer_state fb;
836 struct nvc0_window_rect_stateobj window_rect;
837 struct nvc0_rasterizer_stateobj *rast;
838 struct nvc0_program *vp;
839 struct nvc0_program *tcp;
840 struct nvc0_program *tep;
841 struct nvc0_program *gp;
842 struct nvc0_program *fp;
843 unsigned num_textures[5];
844 unsigned num_samplers[5];
845 struct pipe_sampler_view *texture[2];
846 struct nv50_tsc_entry *sampler[2];
847 unsigned min_samples;
848 uint32_t dirty_3d;
849 } saved;
850 struct nvc0_rasterizer_stateobj rast;
851 };
852
853 static void *
nvc0_blitter_make_vp(struct pipe_context * pipe)854 nvc0_blitter_make_vp(struct pipe_context *pipe)
855 {
856 const nir_shader_compiler_options *options =
857 nv50_ir_nir_shader_compiler_options(nouveau_screen(pipe->screen)->device->chipset,
858 PIPE_SHADER_VERTEX);
859
860 struct nir_builder b =
861 nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
862 "blitter_vp");
863
864 const struct glsl_type* float2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
865 const struct glsl_type* float3 = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
866
867 nir_variable *ipos =
868 nir_variable_create(b.shader, nir_var_shader_in, float2, "ipos");
869 ipos->data.location = VERT_ATTRIB_GENERIC0;
870 ipos->data.driver_location = 0;
871
872 nir_variable *opos =
873 nir_variable_create(b.shader, nir_var_shader_out, float2, "opos");
874 opos->data.location = VARYING_SLOT_POS;
875 opos->data.driver_location = 0;
876
877 nir_variable *itex =
878 nir_variable_create(b.shader, nir_var_shader_in, float3, "itex");
879 itex->data.location = VERT_ATTRIB_GENERIC1;
880 itex->data.driver_location = 1;
881
882 nir_variable *otex =
883 nir_variable_create(b.shader, nir_var_shader_out, float3, "otex");
884 otex->data.location = VARYING_SLOT_VAR0;
885 otex->data.driver_location = 1;
886
887 nir_copy_var(&b, opos, ipos);
888 nir_copy_var(&b, otex, itex);
889
890 NIR_PASS_V(b.shader, nir_lower_var_copies);
891
892 return pipe_shader_from_nir(pipe, b.shader);
893 }
894
895 static void
nvc0_blitter_make_sampler(struct nvc0_blitter * blit)896 nvc0_blitter_make_sampler(struct nvc0_blitter *blit)
897 {
898 /* clamp to edge, min/max lod = 0, nearest filtering */
899
900 blit->sampler[0].id = -1;
901
902 blit->sampler[0].tsc[0] = G80_TSC_0_SRGB_CONVERSION |
903 (G80_TSC_WRAP_CLAMP_TO_EDGE << G80_TSC_0_ADDRESS_U__SHIFT) |
904 (G80_TSC_WRAP_CLAMP_TO_EDGE << G80_TSC_0_ADDRESS_V__SHIFT) |
905 (G80_TSC_WRAP_CLAMP_TO_EDGE << G80_TSC_0_ADDRESS_P__SHIFT);
906 blit->sampler[0].tsc[1] =
907 G80_TSC_1_MAG_FILTER_NEAREST |
908 G80_TSC_1_MIN_FILTER_NEAREST |
909 G80_TSC_1_MIP_FILTER_NONE;
910
911 /* clamp to edge, min/max lod = 0, bilinear filtering */
912
913 blit->sampler[1].id = -1;
914
915 blit->sampler[1].tsc[0] = blit->sampler[0].tsc[0];
916 blit->sampler[1].tsc[1] =
917 G80_TSC_1_MAG_FILTER_LINEAR |
918 G80_TSC_1_MIN_FILTER_LINEAR |
919 G80_TSC_1_MIP_FILTER_NONE;
920 }
921
922 static void
nvc0_blit_select_vp(struct nvc0_blitctx * ctx)923 nvc0_blit_select_vp(struct nvc0_blitctx *ctx)
924 {
925 struct nvc0_blitter *blitter = ctx->nvc0->screen->blitter;
926
927 if (!blitter->vp) {
928 mtx_lock(&blitter->mutex);
929 if (!blitter->vp)
930 blitter->vp = nvc0_blitter_make_vp(&ctx->nvc0->base.pipe);
931 mtx_unlock(&blitter->mutex);
932 }
933 ctx->vp = blitter->vp;
934 }
935
936 static void
nvc0_blit_select_fp(struct nvc0_blitctx * ctx,const struct pipe_blit_info * info)937 nvc0_blit_select_fp(struct nvc0_blitctx *ctx, const struct pipe_blit_info *info)
938 {
939 struct nvc0_blitter *blitter = ctx->nvc0->screen->blitter;
940
941 const enum pipe_texture_target ptarg =
942 nv50_blit_reinterpret_pipe_texture_target(info->src.resource->target);
943
944 const unsigned targ = nv50_blit_texture_type(ptarg);
945 const unsigned mode = ctx->mode;
946
947 if (!blitter->fp[targ][mode]) {
948 mtx_lock(&blitter->mutex);
949 if (!blitter->fp[targ][mode])
950 blitter->fp[targ][mode] =
951 nv50_blitter_make_fp(&ctx->nvc0->base.pipe, mode, ptarg);
952 mtx_unlock(&blitter->mutex);
953 }
954 ctx->fp = blitter->fp[targ][mode];
955 }
956
957 static void
nvc0_blit_set_dst(struct nvc0_blitctx * ctx,struct pipe_resource * res,unsigned level,unsigned layer,enum pipe_format format)958 nvc0_blit_set_dst(struct nvc0_blitctx *ctx,
959 struct pipe_resource *res, unsigned level, unsigned layer,
960 enum pipe_format format)
961 {
962 struct nvc0_context *nvc0 = ctx->nvc0;
963 struct pipe_context *pipe = &nvc0->base.pipe;
964 struct pipe_surface templ;
965
966 if (util_format_is_depth_or_stencil(format))
967 templ.format = nv50_blit_zeta_to_colour_format(format);
968 else
969 templ.format = format;
970
971 templ.u.tex.level = level;
972 templ.u.tex.first_layer = templ.u.tex.last_layer = layer;
973
974 if (layer == -1) {
975 templ.u.tex.first_layer = 0;
976 templ.u.tex.last_layer =
977 (res->target == PIPE_TEXTURE_3D ? res->depth0 : res->array_size) - 1;
978 }
979
980 nvc0->framebuffer.cbufs[0] = nvc0_miptree_surface_new(pipe, res, &templ);
981 nvc0->framebuffer.nr_cbufs = 1;
982 nvc0->framebuffer.zsbuf = NULL;
983 nvc0->framebuffer.width = nvc0->framebuffer.cbufs[0]->width;
984 nvc0->framebuffer.height = nvc0->framebuffer.cbufs[0]->height;
985 }
986
987 static void
nvc0_blit_set_src(struct nvc0_blitctx * ctx,struct pipe_resource * res,unsigned level,unsigned layer,enum pipe_format format,const uint8_t filter)988 nvc0_blit_set_src(struct nvc0_blitctx *ctx,
989 struct pipe_resource *res, unsigned level, unsigned layer,
990 enum pipe_format format, const uint8_t filter)
991 {
992 struct nvc0_context *nvc0 = ctx->nvc0;
993 struct pipe_context *pipe = &nvc0->base.pipe;
994 struct pipe_sampler_view templ = {0};
995 uint32_t flags;
996 unsigned s;
997 enum pipe_texture_target target;
998
999 target = nv50_blit_reinterpret_pipe_texture_target(res->target);
1000
1001 templ.target = target;
1002 templ.format = format;
1003 templ.u.tex.first_layer = templ.u.tex.last_layer = layer;
1004 templ.u.tex.first_level = templ.u.tex.last_level = level;
1005 templ.swizzle_r = PIPE_SWIZZLE_X;
1006 templ.swizzle_g = PIPE_SWIZZLE_Y;
1007 templ.swizzle_b = PIPE_SWIZZLE_Z;
1008 templ.swizzle_a = PIPE_SWIZZLE_W;
1009
1010 if (layer == -1) {
1011 templ.u.tex.first_layer = 0;
1012 templ.u.tex.last_layer =
1013 (res->target == PIPE_TEXTURE_3D ? res->depth0 : res->array_size) - 1;
1014 }
1015
1016 flags = res->last_level ? 0 : NV50_TEXVIEW_SCALED_COORDS;
1017 flags |= NV50_TEXVIEW_ACCESS_RESOLVE;
1018 if (filter && res->nr_samples == 8)
1019 flags |= NV50_TEXVIEW_FILTER_MSAA8;
1020
1021 nvc0->textures[4][0] = nvc0_create_texture_view(
1022 pipe, res, &templ, flags);
1023 nvc0->textures[4][1] = NULL;
1024
1025 for (s = 0; s <= 3; ++s)
1026 nvc0->num_textures[s] = 0;
1027 nvc0->num_textures[4] = 1;
1028
1029 templ.format = nv50_zs_to_s_format(format);
1030 if (templ.format != format) {
1031 nvc0->textures[4][1] = nvc0_create_texture_view(
1032 pipe, res, &templ, flags);
1033 nvc0->num_textures[4] = 2;
1034 }
1035 }
1036
1037 static void
nvc0_blitctx_prepare_state(struct nvc0_blitctx * blit)1038 nvc0_blitctx_prepare_state(struct nvc0_blitctx *blit)
1039 {
1040 struct nouveau_pushbuf *push = blit->nvc0->base.pushbuf;
1041
1042 /* TODO: maybe make this a MACRO (if we need more logic) ? */
1043
1044 if (blit->nvc0->cond_query && !blit->render_condition_enable)
1045 IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
1046
1047 /* blend state */
1048 BEGIN_NVC0(push, NVC0_3D(COLOR_MASK(0)), 1);
1049 PUSH_DATA (push, blit->color_mask);
1050 IMMED_NVC0(push, NVC0_3D(BLEND_ENABLE(0)), 0);
1051 IMMED_NVC0(push, NVC0_3D(LOGIC_OP_ENABLE), 0);
1052
1053 /* rasterizer state */
1054 IMMED_NVC0(push, NVC0_3D(FRAG_COLOR_CLAMP_EN), 0);
1055 IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_ENABLE), 0);
1056 BEGIN_NVC0(push, NVC0_3D(MSAA_MASK(0)), 4);
1057 PUSH_DATA (push, 0xffff);
1058 PUSH_DATA (push, 0xffff);
1059 PUSH_DATA (push, 0xffff);
1060 PUSH_DATA (push, 0xffff);
1061 BEGIN_NVC0(push, NVC0_3D(MACRO_POLYGON_MODE_FRONT), 1);
1062 PUSH_DATA (push, NVC0_3D_MACRO_POLYGON_MODE_FRONT_FILL);
1063 BEGIN_NVC0(push, NVC0_3D(MACRO_POLYGON_MODE_BACK), 1);
1064 PUSH_DATA (push, NVC0_3D_MACRO_POLYGON_MODE_BACK_FILL);
1065 IMMED_NVC0(push, NVC0_3D(POLYGON_SMOOTH_ENABLE), 0);
1066 IMMED_NVC0(push, NVC0_3D(POLYGON_OFFSET_FILL_ENABLE), 0);
1067 IMMED_NVC0(push, NVC0_3D(POLYGON_STIPPLE_ENABLE), 0);
1068 IMMED_NVC0(push, NVC0_3D(CULL_FACE_ENABLE), 0);
1069
1070 /* zsa state */
1071 IMMED_NVC0(push, NVC0_3D(DEPTH_TEST_ENABLE), 0);
1072 IMMED_NVC0(push, NVC0_3D(DEPTH_BOUNDS_EN), 0);
1073 IMMED_NVC0(push, NVC0_3D(STENCIL_ENABLE), 0);
1074 IMMED_NVC0(push, NVC0_3D(ALPHA_TEST_ENABLE), 0);
1075
1076 /* disable transform feedback */
1077 IMMED_NVC0(push, NVC0_3D(TFB_ENABLE), 0);
1078 }
1079
1080 static void
nvc0_blitctx_pre_blit(struct nvc0_blitctx * ctx,const struct pipe_blit_info * info)1081 nvc0_blitctx_pre_blit(struct nvc0_blitctx *ctx,
1082 const struct pipe_blit_info *info)
1083 {
1084 struct nvc0_context *nvc0 = ctx->nvc0;
1085 struct nvc0_blitter *blitter = nvc0->screen->blitter;
1086 int s;
1087
1088 ctx->saved.fb.width = nvc0->framebuffer.width;
1089 ctx->saved.fb.height = nvc0->framebuffer.height;
1090 ctx->saved.fb.samples = nvc0->framebuffer.samples;
1091 ctx->saved.fb.layers = nvc0->framebuffer.layers;
1092 ctx->saved.fb.nr_cbufs = nvc0->framebuffer.nr_cbufs;
1093 ctx->saved.fb.cbufs[0] = nvc0->framebuffer.cbufs[0];
1094 ctx->saved.fb.zsbuf = nvc0->framebuffer.zsbuf;
1095
1096 ctx->saved.rast = nvc0->rast;
1097
1098 ctx->saved.vp = nvc0->vertprog;
1099 ctx->saved.tcp = nvc0->tctlprog;
1100 ctx->saved.tep = nvc0->tevlprog;
1101 ctx->saved.gp = nvc0->gmtyprog;
1102 ctx->saved.fp = nvc0->fragprog;
1103
1104 ctx->saved.min_samples = nvc0->min_samples;
1105 ctx->saved.window_rect = nvc0->window_rect;
1106
1107 nvc0->rast = &ctx->rast;
1108
1109 nvc0->vertprog = ctx->vp;
1110 nvc0->tctlprog = NULL;
1111 nvc0->tevlprog = NULL;
1112 nvc0->gmtyprog = NULL;
1113 nvc0->fragprog = ctx->fp;
1114
1115 nvc0->window_rect.rects =
1116 MIN2(info->num_window_rectangles, NVC0_MAX_WINDOW_RECTANGLES);
1117 nvc0->window_rect.inclusive = info->window_rectangle_include;
1118 if (nvc0->window_rect.rects)
1119 memcpy(nvc0->window_rect.rect, info->window_rectangles,
1120 sizeof(struct pipe_scissor_state) * nvc0->window_rect.rects);
1121
1122 for (s = 0; s <= 4; ++s) {
1123 ctx->saved.num_textures[s] = nvc0->num_textures[s];
1124 ctx->saved.num_samplers[s] = nvc0->num_samplers[s];
1125 nvc0->textures_dirty[s] = (1 << nvc0->num_textures[s]) - 1;
1126 nvc0->samplers_dirty[s] = (1 << nvc0->num_samplers[s]) - 1;
1127 }
1128 ctx->saved.texture[0] = nvc0->textures[4][0];
1129 ctx->saved.texture[1] = nvc0->textures[4][1];
1130 ctx->saved.sampler[0] = nvc0->samplers[4][0];
1131 ctx->saved.sampler[1] = nvc0->samplers[4][1];
1132
1133 nvc0->samplers[4][0] = &blitter->sampler[ctx->filter];
1134 nvc0->samplers[4][1] = &blitter->sampler[ctx->filter];
1135
1136 for (s = 0; s <= 3; ++s)
1137 nvc0->num_samplers[s] = 0;
1138 nvc0->num_samplers[4] = 2;
1139
1140 nvc0->min_samples = 1;
1141
1142 ctx->saved.dirty_3d = nvc0->dirty_3d;
1143
1144 nvc0->textures_dirty[4] |= 3;
1145 nvc0->samplers_dirty[4] |= 3;
1146
1147 nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB);
1148 nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(4, 0));
1149 nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(4, 1));
1150
1151 nvc0->dirty_3d = NVC0_NEW_3D_FRAMEBUFFER | NVC0_NEW_3D_MIN_SAMPLES |
1152 NVC0_NEW_3D_VERTPROG | NVC0_NEW_3D_FRAGPROG |
1153 NVC0_NEW_3D_TCTLPROG | NVC0_NEW_3D_TEVLPROG | NVC0_NEW_3D_GMTYPROG |
1154 NVC0_NEW_3D_TEXTURES | NVC0_NEW_3D_SAMPLERS | NVC0_NEW_3D_WINDOW_RECTS;
1155 }
1156
1157 static void
nvc0_blitctx_post_blit(struct nvc0_blitctx * blit)1158 nvc0_blitctx_post_blit(struct nvc0_blitctx *blit)
1159 {
1160 struct nvc0_context *nvc0 = blit->nvc0;
1161 int s;
1162
1163 pipe_surface_reference(&nvc0->framebuffer.cbufs[0], NULL);
1164
1165 nvc0->framebuffer.width = blit->saved.fb.width;
1166 nvc0->framebuffer.height = blit->saved.fb.height;
1167 nvc0->framebuffer.samples = blit->saved.fb.samples;
1168 nvc0->framebuffer.layers = blit->saved.fb.layers;
1169 nvc0->framebuffer.nr_cbufs = blit->saved.fb.nr_cbufs;
1170 nvc0->framebuffer.cbufs[0] = blit->saved.fb.cbufs[0];
1171 nvc0->framebuffer.zsbuf = blit->saved.fb.zsbuf;
1172
1173 nvc0->rast = blit->saved.rast;
1174
1175 nvc0->vertprog = blit->saved.vp;
1176 nvc0->tctlprog = blit->saved.tcp;
1177 nvc0->tevlprog = blit->saved.tep;
1178 nvc0->gmtyprog = blit->saved.gp;
1179 nvc0->fragprog = blit->saved.fp;
1180
1181 nvc0->min_samples = blit->saved.min_samples;
1182 nvc0->window_rect = blit->saved.window_rect;
1183
1184 pipe_sampler_view_reference(&nvc0->textures[4][0], NULL);
1185 pipe_sampler_view_reference(&nvc0->textures[4][1], NULL);
1186
1187 for (s = 0; s <= 4; ++s) {
1188 nvc0->num_textures[s] = blit->saved.num_textures[s];
1189 nvc0->num_samplers[s] = blit->saved.num_samplers[s];
1190 nvc0->textures_dirty[s] = (1 << nvc0->num_textures[s]) - 1;
1191 nvc0->samplers_dirty[s] = (1 << nvc0->num_samplers[s]) - 1;
1192 }
1193 nvc0->textures[4][0] = blit->saved.texture[0];
1194 nvc0->textures[4][1] = blit->saved.texture[1];
1195 nvc0->samplers[4][0] = blit->saved.sampler[0];
1196 nvc0->samplers[4][1] = blit->saved.sampler[1];
1197
1198 nvc0->textures_dirty[4] |= 3;
1199 nvc0->samplers_dirty[4] |= 3;
1200
1201 if (nvc0->cond_query && !blit->render_condition_enable)
1202 nvc0->base.pipe.render_condition(&nvc0->base.pipe, nvc0->cond_query,
1203 nvc0->cond_cond, nvc0->cond_mode);
1204
1205 nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX_TMP);
1206 nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEXT);
1207 nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB);
1208 nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(4, 0));
1209 nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(4, 1));
1210 nouveau_scratch_done(&nvc0->base);
1211
1212 nvc0->dirty_3d = blit->saved.dirty_3d |
1213 (NVC0_NEW_3D_FRAMEBUFFER | NVC0_NEW_3D_SCISSOR | NVC0_NEW_3D_SAMPLE_MASK |
1214 NVC0_NEW_3D_RASTERIZER | NVC0_NEW_3D_ZSA | NVC0_NEW_3D_BLEND |
1215 NVC0_NEW_3D_VIEWPORT | NVC0_NEW_3D_WINDOW_RECTS |
1216 NVC0_NEW_3D_TEXTURES | NVC0_NEW_3D_SAMPLERS |
1217 NVC0_NEW_3D_VERTPROG | NVC0_NEW_3D_FRAGPROG |
1218 NVC0_NEW_3D_TCTLPROG | NVC0_NEW_3D_TEVLPROG | NVC0_NEW_3D_GMTYPROG |
1219 NVC0_NEW_3D_TFB_TARGETS | NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS);
1220 nvc0->scissors_dirty |= 1;
1221 nvc0->viewports_dirty |= 1;
1222
1223 nvc0->base.pipe.set_min_samples(&nvc0->base.pipe, blit->saved.min_samples);
1224 }
1225
1226 static void
nvc0_blit_3d(struct nvc0_context * nvc0,const struct pipe_blit_info * info)1227 nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
1228 {
1229 struct nvc0_screen *screen = nvc0->screen;
1230 struct nvc0_blitctx *blit = nvc0->blit;
1231 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1232 struct pipe_resource *src = info->src.resource;
1233 struct pipe_resource *dst = info->dst.resource;
1234 struct nouveau_bo *vtxbuf_bo;
1235 uint32_t stride, length, *vbuf;
1236 uint64_t vtxbuf;
1237 int32_t minx, maxx, miny, maxy;
1238 int32_t i, n;
1239 float x0, x1, y0, y1, z;
1240 float dz;
1241 float x_range, y_range;
1242
1243 blit->mode = nv50_blit_select_mode(info);
1244 blit->color_mask = nv50_blit_derive_color_mask(info);
1245 blit->filter = nv50_blit_get_filter(info);
1246 blit->render_condition_enable = info->render_condition_enable;
1247
1248 nvc0_blit_select_vp(blit);
1249 nvc0_blit_select_fp(blit, info);
1250 nvc0_blitctx_pre_blit(blit, info);
1251
1252 nvc0_blit_set_dst(blit, dst, info->dst.level, -1, info->dst.format);
1253 nvc0_blit_set_src(blit, src, info->src.level, -1, info->src.format,
1254 blit->filter);
1255
1256 nvc0_blitctx_prepare_state(blit);
1257
1258 nvc0_state_validate_3d(nvc0, ~0);
1259
1260 x_range = (float)info->src.box.width / (float)info->dst.box.width;
1261 y_range = (float)info->src.box.height / (float)info->dst.box.height;
1262
1263 x0 = (float)info->src.box.x - x_range * (float)info->dst.box.x;
1264 y0 = (float)info->src.box.y - y_range * (float)info->dst.box.y;
1265
1266 x1 = x0 + 32768.0f * x_range;
1267 y1 = y0 + 32768.0f * y_range;
1268
1269 x0 *= (float)(1 << nv50_miptree(src)->ms_x);
1270 x1 *= (float)(1 << nv50_miptree(src)->ms_x);
1271 y0 *= (float)(1 << nv50_miptree(src)->ms_y);
1272 y1 *= (float)(1 << nv50_miptree(src)->ms_y);
1273
1274 dz = (float)info->src.box.depth / (float)info->dst.box.depth;
1275 z = (float)info->src.box.z;
1276 if (nv50_miptree(src)->layout_3d)
1277 z += 0.5f * dz;
1278
1279 if (src->last_level > 0) {
1280 /* If there are mip maps, GPU always assumes normalized coordinates. */
1281 const unsigned l = info->src.level;
1282 const float fh = u_minify(src->width0 << nv50_miptree(src)->ms_x, l);
1283 const float fv = u_minify(src->height0 << nv50_miptree(src)->ms_y, l);
1284 x0 /= fh;
1285 x1 /= fh;
1286 y0 /= fv;
1287 y1 /= fv;
1288 if (nv50_miptree(src)->layout_3d) {
1289 z /= u_minify(src->depth0, l);
1290 dz /= u_minify(src->depth0, l);
1291 }
1292 }
1293
1294 bool serialize = false;
1295 struct nv50_miptree *mt = nv50_miptree(dst);
1296 if (screen->eng3d->oclass >= TU102_3D_CLASS) {
1297 IMMED_NVC0(push, SUBC_3D(TU102_3D_SET_COLOR_RENDER_TO_ZETA_SURFACE),
1298 util_format_is_depth_or_stencil(info->dst.format));
1299 } else {
1300 /* When flipping a surface from zeta <-> color "mode", we have to wait for
1301 * the GPU to flush its current draws.
1302 */
1303 serialize = util_format_is_depth_or_stencil(info->dst.format);
1304 if (serialize && mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
1305 IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
1306 }
1307 }
1308
1309 IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 0);
1310 IMMED_NVC0(push, NVC0_3D(VIEW_VOLUME_CLIP_CTRL), 0x2 |
1311 NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_RANGE_0_1);
1312 BEGIN_NVC0(push, NVC0_3D(VIEWPORT_HORIZ(0)), 2);
1313 PUSH_DATA (push, nvc0->framebuffer.width << 16);
1314 PUSH_DATA (push, nvc0->framebuffer.height << 16);
1315
1316 /* Draw a large triangle in screen coordinates covering the whole
1317 * render target, with scissors defining the destination region.
1318 * The vertex is supplied with non-normalized texture coordinates
1319 * arranged in a way to yield the desired offset and scale.
1320 *
1321 * Note that while the source texture is presented to the sampler as
1322 * non-MSAA (even if it is), the destination texture is treated as MSAA for
1323 * rendering. This means that
1324 * - destination coordinates shouldn't be scaled
1325 * - without per-sample rendering, the target will be a solid-fill for all
1326 * of the samples
1327 *
1328 * The last point implies that this process is very bad for 1:1 blits, as
1329 * well as scaled blits between MSAA surfaces. This works fine for
1330 * upscaling and downscaling though. The 1:1 blits should ideally be
1331 * handled by the 2d engine, which can do it perfectly.
1332 */
1333
1334 minx = info->dst.box.x;
1335 maxx = info->dst.box.x + info->dst.box.width;
1336 miny = info->dst.box.y;
1337 maxy = info->dst.box.y + info->dst.box.height;
1338 if (info->scissor_enable) {
1339 minx = MAX2(minx, info->scissor.minx);
1340 maxx = MIN2(maxx, info->scissor.maxx);
1341 miny = MAX2(miny, info->scissor.miny);
1342 maxy = MIN2(maxy, info->scissor.maxy);
1343 }
1344 BEGIN_NVC0(push, NVC0_3D(SCISSOR_HORIZ(0)), 2);
1345 PUSH_DATA (push, (maxx << 16) | minx);
1346 PUSH_DATA (push, (maxy << 16) | miny);
1347
1348 stride = (3 + 2) * 4;
1349 length = stride * 3 * info->dst.box.depth;
1350
1351 vbuf = nouveau_scratch_get(&nvc0->base, length, &vtxbuf, &vtxbuf_bo);
1352 if (!vbuf) {
1353 assert(vbuf);
1354 return;
1355 }
1356
1357 BCTX_REFN_bo(nvc0->bufctx_3d, 3D_VTX_TMP,
1358 NOUVEAU_BO_GART | NOUVEAU_BO_RD, vtxbuf_bo);
1359 BCTX_REFN_bo(nvc0->bufctx_3d, 3D_TEXT,
1360 NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD, screen->text);
1361 PUSH_VAL(push);
1362
1363 BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(0)), 4);
1364 PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | stride <<
1365 NVC0_3D_VERTEX_ARRAY_FETCH_STRIDE__SHIFT);
1366 PUSH_DATAh(push, vtxbuf);
1367 PUSH_DATA (push, vtxbuf);
1368 PUSH_DATA (push, 0);
1369 if (screen->eng3d->oclass < TU102_3D_CLASS)
1370 BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
1371 else
1372 BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
1373 PUSH_DATAh(push, vtxbuf + length - 1);
1374 PUSH_DATA (push, vtxbuf + length - 1);
1375
1376 n = MAX2(2, nvc0->state.num_vtxelts);
1377
1378 BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(0)), n);
1379 PUSH_DATA (push, NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT |
1380 NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32 | 0x00 <<
1381 NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__SHIFT);
1382 PUSH_DATA (push, NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT |
1383 NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32_32 | 0x08 <<
1384 NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__SHIFT);
1385 for (i = 2; i < n; i++) {
1386 PUSH_DATA(push, NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT |
1387 NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32 |
1388 NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST);
1389 }
1390 for (i = 1; i < n; ++i)
1391 IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 0);
1392 if (nvc0->state.instance_elts) {
1393 nvc0->state.instance_elts = 0;
1394 BEGIN_NVC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_PER_INSTANCE), 2);
1395 PUSH_DATA (push, n);
1396 PUSH_DATA (push, 0);
1397 }
1398 nvc0->state.num_vtxelts = 2;
1399
1400 if (nvc0->state.prim_restart) {
1401 IMMED_NVC0(push, NVC0_3D(PRIM_RESTART_ENABLE), 0);
1402 nvc0->state.prim_restart = 0;
1403 }
1404
1405 if (nvc0->state.index_bias) {
1406 IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_BASE), 0);
1407 IMMED_NVC0(push, NVC0_3D(VERTEX_ID_BASE), 0);
1408 nvc0->state.index_bias = 0;
1409 }
1410
1411 for (i = 0; i < info->dst.box.depth; ++i, z += dz) {
1412 if (info->dst.box.z + i) {
1413 BEGIN_NVC0(push, NVC0_3D(LAYER), 1);
1414 PUSH_DATA (push, info->dst.box.z + i);
1415 }
1416
1417 *(vbuf++) = fui(0.0f);
1418 *(vbuf++) = fui(0.0f);
1419 *(vbuf++) = fui(x0);
1420 *(vbuf++) = fui(y0);
1421 *(vbuf++) = fui(z);
1422
1423 *(vbuf++) = fui(32768.0f);
1424 *(vbuf++) = fui(0.0f);
1425 *(vbuf++) = fui(x1);
1426 *(vbuf++) = fui(y0);
1427 *(vbuf++) = fui(z);
1428
1429 *(vbuf++) = fui(0.0f);
1430 *(vbuf++) = fui(32768.0f);
1431 *(vbuf++) = fui(x0);
1432 *(vbuf++) = fui(y1);
1433 *(vbuf++) = fui(z);
1434
1435 IMMED_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL),
1436 NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES);
1437 BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2);
1438 PUSH_DATA (push, i * 3);
1439 PUSH_DATA (push, 3);
1440 IMMED_NVC0(push, NVC0_3D(VERTEX_END_GL), 0);
1441 }
1442 if (info->dst.box.z + info->dst.box.depth - 1)
1443 IMMED_NVC0(push, NVC0_3D(LAYER), 0);
1444
1445 nvc0_blitctx_post_blit(blit);
1446
1447 /* restore viewport transform */
1448 IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 1);
1449 if (screen->eng3d->oclass >= TU102_3D_CLASS)
1450 IMMED_NVC0(push, SUBC_3D(TU102_3D_SET_COLOR_RENDER_TO_ZETA_SURFACE), 0);
1451 else if (serialize)
1452 /* mark the surface as reading, which will force a serialize next time
1453 * it's used for writing.
1454 */
1455 mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
1456 }
1457
1458 static void
nvc0_blit_eng2d(struct nvc0_context * nvc0,const struct pipe_blit_info * info)1459 nvc0_blit_eng2d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
1460 {
1461 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1462 struct nv50_miptree *dst = nv50_miptree(info->dst.resource);
1463 struct nv50_miptree *src = nv50_miptree(info->src.resource);
1464 const int32_t srcx_adj = info->src.box.width < 0 ? -1 : 0;
1465 const int32_t srcy_adj = info->src.box.height < 0 ? -1 : 0;
1466 const int dz = info->dst.box.z;
1467 const int sz = info->src.box.z;
1468 uint32_t dstw, dsth;
1469 int32_t dstx, dsty;
1470 int64_t srcx, srcy;
1471 int64_t du_dx, dv_dy;
1472 int i;
1473 uint32_t mode;
1474 uint32_t mask = nv50_blit_eng2d_get_mask(info);
1475 bool b;
1476
1477 mode = nv50_blit_get_filter(info) ?
1478 NV50_2D_BLIT_CONTROL_FILTER_BILINEAR :
1479 NV50_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE;
1480 mode |= (src->base.base.nr_samples > dst->base.base.nr_samples) ?
1481 NV50_2D_BLIT_CONTROL_ORIGIN_CORNER : NV50_2D_BLIT_CONTROL_ORIGIN_CENTER;
1482
1483 du_dx = ((int64_t)info->src.box.width << 32) / info->dst.box.width;
1484 dv_dy = ((int64_t)info->src.box.height << 32) / info->dst.box.height;
1485
1486 b = info->dst.format == info->src.format;
1487 nvc0_2d_texture_set(push, 1, dst, info->dst.level, dz, info->dst.format, b);
1488 nvc0_2d_texture_set(push, 0, src, info->src.level, sz, info->src.format, b);
1489
1490 if (info->scissor_enable) {
1491 BEGIN_NVC0(push, NVC0_2D(CLIP_X), 5);
1492 PUSH_DATA (push, info->scissor.minx << dst->ms_x);
1493 PUSH_DATA (push, info->scissor.miny << dst->ms_y);
1494 PUSH_DATA (push, (info->scissor.maxx - info->scissor.minx) << dst->ms_x);
1495 PUSH_DATA (push, (info->scissor.maxy - info->scissor.miny) << dst->ms_y);
1496 PUSH_DATA (push, 1); /* enable */
1497 }
1498
1499 if (nvc0->cond_query && info->render_condition_enable)
1500 IMMED_NVC0(push, NVC0_2D(COND_MODE), nvc0->cond_condmode);
1501
1502 if (mask != 0xffffffff) {
1503 IMMED_NVC0(push, NVC0_2D(ROP), 0xca); /* DPSDxax */
1504 IMMED_NVC0(push, NVC0_2D(PATTERN_COLOR_FORMAT),
1505 NV50_2D_PATTERN_COLOR_FORMAT_A8R8G8B8);
1506 BEGIN_NVC0(push, NVC0_2D(PATTERN_BITMAP_COLOR(0)), 4);
1507 PUSH_DATA (push, 0x00000000);
1508 PUSH_DATA (push, mask);
1509 PUSH_DATA (push, 0xffffffff);
1510 PUSH_DATA (push, 0xffffffff);
1511 IMMED_NVC0(push, NVC0_2D(OPERATION), NV50_2D_OPERATION_ROP);
1512 } else
1513 if (info->src.format != info->dst.format) {
1514 if (info->src.format == PIPE_FORMAT_R8_UNORM ||
1515 info->src.format == PIPE_FORMAT_R8_SNORM ||
1516 info->src.format == PIPE_FORMAT_R16_UNORM ||
1517 info->src.format == PIPE_FORMAT_R16_SNORM ||
1518 info->src.format == PIPE_FORMAT_R16_FLOAT ||
1519 info->src.format == PIPE_FORMAT_R32_FLOAT) {
1520 mask = 0xffff0000; /* also makes condition for OPERATION reset true */
1521 BEGIN_NVC0(push, NVC0_2D(BETA4), 2);
1522 PUSH_DATA (push, mask);
1523 PUSH_DATA (push, NV50_2D_OPERATION_SRCCOPY_PREMULT);
1524 } else
1525 if (info->src.format == PIPE_FORMAT_A8_UNORM) {
1526 mask = 0xff000000;
1527 BEGIN_NVC0(push, NVC0_2D(BETA4), 2);
1528 PUSH_DATA (push, mask);
1529 PUSH_DATA (push, NV50_2D_OPERATION_SRCCOPY_PREMULT);
1530 }
1531 }
1532
1533 if (src->ms_x > dst->ms_x || src->ms_y > dst->ms_y) {
1534 /* ms_x is always >= ms_y */
1535 du_dx <<= src->ms_x - dst->ms_x;
1536 dv_dy <<= src->ms_y - dst->ms_y;
1537 } else {
1538 du_dx >>= dst->ms_x - src->ms_x;
1539 dv_dy >>= dst->ms_y - src->ms_y;
1540 }
1541
1542 srcx = (int64_t)(info->src.box.x + srcx_adj) << (src->ms_x + 32);
1543 srcy = (int64_t)(info->src.box.y + srcy_adj) << (src->ms_y + 32);
1544
1545 if (src->base.base.nr_samples > dst->base.base.nr_samples) {
1546 /* center src coorinates for proper MS resolve filtering */
1547 srcx += (int64_t)1 << (src->ms_x + 31);
1548 srcy += (int64_t)1 << (src->ms_y + 31);
1549 }
1550
1551 dstx = info->dst.box.x << dst->ms_x;
1552 dsty = info->dst.box.y << dst->ms_y;
1553
1554 dstw = info->dst.box.width << dst->ms_x;
1555 dsth = info->dst.box.height << dst->ms_y;
1556
1557 if (dstx < 0) {
1558 dstw += dstx;
1559 srcx -= du_dx * dstx;
1560 dstx = 0;
1561 }
1562 if (dsty < 0) {
1563 dsth += dsty;
1564 srcy -= dv_dy * dsty;
1565 dsty = 0;
1566 }
1567
1568 IMMED_NVC0(push, NVC0_2D(BLIT_CONTROL), mode);
1569 BEGIN_NVC0(push, NVC0_2D(BLIT_DST_X), 4);
1570 PUSH_DATA (push, dstx);
1571 PUSH_DATA (push, dsty);
1572 PUSH_DATA (push, dstw);
1573 PUSH_DATA (push, dsth);
1574 BEGIN_NVC0(push, NVC0_2D(BLIT_DU_DX_FRACT), 4);
1575 PUSH_DATA (push, du_dx);
1576 PUSH_DATA (push, du_dx >> 32);
1577 PUSH_DATA (push, dv_dy);
1578 PUSH_DATA (push, dv_dy >> 32);
1579
1580 BCTX_REFN(nvc0->bufctx, 2D, &dst->base, WR);
1581 BCTX_REFN(nvc0->bufctx, 2D, &src->base, RD);
1582 nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx);
1583 if (PUSH_VAL(nvc0->base.pushbuf))
1584 return;
1585
1586 for (i = 0; i < info->dst.box.depth; ++i) {
1587 if (i > 0) {
1588 /* no scaling in z-direction possible for eng2d blits */
1589 if (dst->layout_3d) {
1590 BEGIN_NVC0(push, NVC0_2D(DST_LAYER), 1);
1591 PUSH_DATA (push, info->dst.box.z + i);
1592 } else {
1593 const unsigned z = info->dst.box.z + i;
1594 const uint64_t address = dst->base.address +
1595 dst->level[info->dst.level].offset +
1596 z * dst->layer_stride;
1597 BEGIN_NVC0(push, NVC0_2D(DST_ADDRESS_HIGH), 2);
1598 PUSH_DATAh(push, address);
1599 PUSH_DATA (push, address);
1600 }
1601 if (src->layout_3d) {
1602 /* not possible because of depth tiling */
1603 assert(0);
1604 } else {
1605 const unsigned z = info->src.box.z + i;
1606 const uint64_t address = src->base.address +
1607 src->level[info->src.level].offset +
1608 z * src->layer_stride;
1609 BEGIN_NVC0(push, NVC0_2D(SRC_ADDRESS_HIGH), 2);
1610 PUSH_DATAh(push, address);
1611 PUSH_DATA (push, address);
1612 }
1613 BEGIN_NVC0(push, NVC0_2D(BLIT_SRC_Y_INT), 1); /* trigger */
1614 PUSH_DATA (push, srcy >> 32);
1615 } else {
1616 BEGIN_NVC0(push, NVC0_2D(BLIT_SRC_X_FRACT), 4);
1617 PUSH_DATA (push, srcx);
1618 PUSH_DATA (push, srcx >> 32);
1619 PUSH_DATA (push, srcy);
1620 PUSH_DATA (push, srcy >> 32);
1621 }
1622 }
1623 nvc0_resource_validate(nvc0, &dst->base, NOUVEAU_BO_WR);
1624 nvc0_resource_validate(nvc0, &src->base, NOUVEAU_BO_RD);
1625
1626 nouveau_bufctx_reset(nvc0->bufctx, NVC0_BIND_2D);
1627
1628 if (info->scissor_enable)
1629 IMMED_NVC0(push, NVC0_2D(CLIP_ENABLE), 0);
1630 if (mask != 0xffffffff)
1631 IMMED_NVC0(push, NVC0_2D(OPERATION), NV50_2D_OPERATION_SRCCOPY);
1632 if (nvc0->cond_query && info->render_condition_enable)
1633 IMMED_NVC0(push, NVC0_2D(COND_MODE), NV50_2D_COND_MODE_ALWAYS);
1634 }
1635
1636 static void
nvc0_blit(struct pipe_context * pipe,const struct pipe_blit_info * info)1637 nvc0_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
1638 {
1639 struct nvc0_context *nvc0 = nvc0_context(pipe);
1640 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1641 bool eng3d = false;
1642
1643 if (info->src.box.width == 0 || info->src.box.height == 0 ||
1644 info->dst.box.width == 0 || info->dst.box.height == 0) {
1645 util_debug_message(&nvc0->base.debug, ERROR,
1646 "Blit with zero-size src or dst box");
1647 return;
1648 }
1649
1650 if (util_format_is_depth_or_stencil(info->dst.resource->format)) {
1651 if (!(info->mask & PIPE_MASK_ZS))
1652 return;
1653 if (info->dst.resource->format == PIPE_FORMAT_Z32_FLOAT ||
1654 info->dst.resource->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
1655 eng3d = true;
1656 if (info->filter != PIPE_TEX_FILTER_NEAREST)
1657 eng3d = true;
1658 } else {
1659 if (!(info->mask & PIPE_MASK_RGBA))
1660 return;
1661 if (info->mask != PIPE_MASK_RGBA)
1662 eng3d = true;
1663 }
1664
1665 if (nv50_miptree(info->src.resource)->layout_3d) {
1666 eng3d = true;
1667 } else
1668 if (info->src.box.depth != info->dst.box.depth) {
1669 eng3d = true;
1670 debug_printf("blit: cannot filter array or cube textures in z direction");
1671 }
1672
1673 if (!eng3d && info->dst.format != info->src.format) {
1674 if (!nv50_2d_dst_format_faithful(info->dst.format)) {
1675 eng3d = true;
1676 } else
1677 if (!nv50_2d_src_format_faithful(info->src.format)) {
1678 if (!util_format_is_luminance(info->src.format)) {
1679 if (!nv50_2d_dst_format_ops_supported(info->dst.format))
1680 eng3d = true;
1681 else
1682 if (util_format_is_intensity(info->src.format))
1683 eng3d = info->src.format != PIPE_FORMAT_I8_UNORM;
1684 else
1685 if (util_format_is_alpha(info->src.format))
1686 eng3d = info->src.format != PIPE_FORMAT_A8_UNORM;
1687 else
1688 if (util_format_is_srgb(info->dst.format) &&
1689 util_format_get_nr_components(info->src.format) == 1)
1690 eng3d = true;
1691 else
1692 eng3d = !nv50_2d_format_supported(info->src.format);
1693 }
1694 } else
1695 if (util_format_is_luminance_alpha(info->src.format))
1696 eng3d = true;
1697 }
1698
1699 if (info->src.resource->nr_samples == 8 &&
1700 info->dst.resource->nr_samples <= 1)
1701 eng3d = true;
1702 #if 0
1703 /* FIXME: can't make this work with eng2d anymore, at least not on nv50 */
1704 if (info->src.resource->nr_samples > 1 ||
1705 info->dst.resource->nr_samples > 1)
1706 eng3d = true;
1707 #endif
1708 /* FIXME: find correct src coordinates adjustments */
1709 if ((info->src.box.width != info->dst.box.width &&
1710 info->src.box.width != -info->dst.box.width) ||
1711 (info->src.box.height != info->dst.box.height &&
1712 info->src.box.height != -info->dst.box.height))
1713 eng3d = true;
1714
1715 if (info->num_window_rectangles > 0 || info->window_rectangle_include)
1716 eng3d = true;
1717
1718 simple_mtx_lock(&nvc0->screen->state_lock);
1719 if (nvc0->screen->num_occlusion_queries_active)
1720 IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 0);
1721
1722 if (!eng3d)
1723 nvc0_blit_eng2d(nvc0, info);
1724 else
1725 nvc0_blit_3d(nvc0, info);
1726
1727 if (nvc0->screen->num_occlusion_queries_active)
1728 IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 1);
1729 PUSH_KICK(push);
1730 simple_mtx_unlock(&nvc0->screen->state_lock);
1731
1732 NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_blit_count, 1);
1733 }
1734
1735 static void
nvc0_flush_resource(struct pipe_context * ctx,struct pipe_resource * resource)1736 nvc0_flush_resource(struct pipe_context *ctx,
1737 struct pipe_resource *resource)
1738 {
1739 }
1740
1741 bool
nvc0_blitter_create(struct nvc0_screen * screen)1742 nvc0_blitter_create(struct nvc0_screen *screen)
1743 {
1744 screen->blitter = CALLOC_STRUCT(nvc0_blitter);
1745 if (!screen->blitter) {
1746 NOUVEAU_ERR("failed to allocate blitter struct\n");
1747 return false;
1748 }
1749 screen->blitter->screen = screen;
1750
1751 (void) mtx_init(&screen->blitter->mutex, mtx_plain);
1752
1753 nvc0_blitter_make_sampler(screen->blitter);
1754
1755 return true;
1756 }
1757
1758 void
nvc0_blitter_destroy(struct nvc0_screen * screen)1759 nvc0_blitter_destroy(struct nvc0_screen *screen)
1760 {
1761 struct nvc0_blitter *blitter = screen->blitter;
1762 unsigned i, m;
1763
1764 for (i = 0; i < NV50_BLIT_MAX_TEXTURE_TYPES; ++i) {
1765 for (m = 0; m < NV50_BLIT_MODES; ++m) {
1766 struct nvc0_program *prog = blitter->fp[i][m];
1767 if (prog) {
1768 nvc0_program_destroy(NULL, prog);
1769 ralloc_free((void *)prog->nir);
1770 FREE(prog);
1771 }
1772 }
1773 }
1774 if (blitter->vp) {
1775 struct nvc0_program *prog = blitter->vp;
1776 nvc0_program_destroy(NULL, prog);
1777 ralloc_free((void *)prog->nir);
1778 FREE(prog);
1779 }
1780
1781 mtx_destroy(&blitter->mutex);
1782 FREE(blitter);
1783 }
1784
1785 bool
nvc0_blitctx_create(struct nvc0_context * nvc0)1786 nvc0_blitctx_create(struct nvc0_context *nvc0)
1787 {
1788 nvc0->blit = CALLOC_STRUCT(nvc0_blitctx);
1789 if (!nvc0->blit) {
1790 NOUVEAU_ERR("failed to allocate blit context\n");
1791 return false;
1792 }
1793
1794 nvc0->blit->nvc0 = nvc0;
1795
1796 nvc0->blit->rast.pipe.half_pixel_center = 1;
1797
1798 return true;
1799 }
1800
1801 void
nvc0_blitctx_destroy(struct nvc0_context * nvc0)1802 nvc0_blitctx_destroy(struct nvc0_context *nvc0)
1803 {
1804 FREE(nvc0->blit);
1805 }
1806
1807 void
nvc0_init_surface_functions(struct nvc0_context * nvc0)1808 nvc0_init_surface_functions(struct nvc0_context *nvc0)
1809 {
1810 struct pipe_context *pipe = &nvc0->base.pipe;
1811
1812 pipe->resource_copy_region = nvc0_resource_copy_region;
1813 pipe->blit = nvc0_blit;
1814 pipe->flush_resource = nvc0_flush_resource;
1815 pipe->clear_render_target = nvc0_clear_render_target;
1816 pipe->clear_depth_stencil = nvc0_clear_depth_stencil;
1817 pipe->clear_texture = u_default_clear_texture;
1818 pipe->clear_buffer = nvc0_clear_buffer;
1819 if (nvc0->screen->base.class_3d >= GM200_3D_CLASS)
1820 pipe->evaluate_depth_buffer = gm200_evaluate_depth_buffer;
1821 }
1822