1 /*
2 * Copyright © 2022 Raspberry Pi Ltd
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "v3d_context.h"
25 #include "broadcom/common/v3d_tfu.h"
26
27 bool
v3dX(tfu)28 v3dX(tfu)(struct pipe_context *pctx,
29 struct pipe_resource *pdst,
30 struct pipe_resource *psrc,
31 unsigned int src_level,
32 unsigned int base_level,
33 unsigned int last_level,
34 unsigned int src_layer,
35 unsigned int dst_layer,
36 bool for_mipmap)
37 {
38 struct v3d_context *v3d = v3d_context(pctx);
39 struct v3d_screen *screen = v3d->screen;
40 struct v3d_resource *src = v3d_resource(psrc);
41 struct v3d_resource *dst = v3d_resource(pdst);
42 struct v3d_resource_slice *src_base_slice = &src->slices[src_level];
43 struct v3d_resource_slice *dst_base_slice = &dst->slices[base_level];
44 int msaa_scale = pdst->nr_samples > 1 ? 2 : 1;
45 int width = u_minify(pdst->width0, base_level) * msaa_scale;
46 int height = u_minify(pdst->height0, base_level) * msaa_scale;
47 enum pipe_format pformat;
48
49 if (psrc->format != pdst->format)
50 return false;
51 if (psrc->nr_samples != pdst->nr_samples)
52 return false;
53
54 if (pdst->target != PIPE_TEXTURE_2D || psrc->target != PIPE_TEXTURE_2D)
55 return false;
56
57 /* Can't write to raster. */
58 if (dst_base_slice->tiling == V3D_TILING_RASTER)
59 return false;
60
61 /* When using TFU for blit, we are doing exact copies (both input and
62 * output format must be the same, no scaling, etc), so there is no
63 * pixel format conversions. Thus we can rewrite the format to use one
64 * that is TFU compatible based on its texel size.
65 */
66 if (for_mipmap) {
67 pformat = pdst->format;
68 } else {
69 switch (dst->cpp) {
70 case 16: pformat = PIPE_FORMAT_R32G32B32A32_FLOAT; break;
71 case 8: pformat = PIPE_FORMAT_R16G16B16A16_FLOAT; break;
72 case 4: pformat = PIPE_FORMAT_R32_FLOAT; break;
73 case 2: pformat = PIPE_FORMAT_R16_FLOAT; break;
74 case 1: pformat = PIPE_FORMAT_R8_UNORM; break;
75 default: unreachable("unsupported format bit-size"); break;
76 };
77 }
78
79 uint32_t tex_format = v3d_get_tex_format(&screen->devinfo, pformat);
80
81 if (!v3dX(tfu_supports_tex_format)(tex_format, for_mipmap)) {
82 assert(for_mipmap);
83 return false;
84 }
85
86 v3d_flush_jobs_writing_resource(v3d, psrc, V3D_FLUSH_DEFAULT, false);
87 v3d_flush_jobs_reading_resource(v3d, pdst, V3D_FLUSH_DEFAULT, false);
88
89 struct drm_v3d_submit_tfu tfu = {
90 .ios = (height << 16) | width,
91 .bo_handles = {
92 dst->bo->handle,
93 src != dst ? src->bo->handle : 0
94 },
95 .in_sync = v3d->out_sync,
96 .out_sync = v3d->out_sync,
97 };
98 uint32_t src_offset = (src->bo->offset +
99 v3d_layer_offset(psrc, src_level, src_layer));
100 tfu.iia |= src_offset;
101
102 uint32_t dst_offset = (dst->bo->offset +
103 v3d_layer_offset(pdst, base_level, dst_layer));
104 tfu.ioa |= dst_offset;
105
106 switch (src_base_slice->tiling) {
107 case V3D_TILING_UIF_NO_XOR:
108 case V3D_TILING_UIF_XOR:
109 tfu.iis |= (src_base_slice->padded_height /
110 (2 * v3d_utile_height(src->cpp)));
111 break;
112 case V3D_TILING_RASTER:
113 tfu.iis |= src_base_slice->stride / src->cpp;
114 break;
115 case V3D_TILING_LINEARTILE:
116 case V3D_TILING_UBLINEAR_1_COLUMN:
117 case V3D_TILING_UBLINEAR_2_COLUMN:
118 break;
119 }
120
121 #if V3D_VERSION == 42
122 if (src_base_slice->tiling == V3D_TILING_RASTER) {
123 tfu.icfg |= (V3D33_TFU_ICFG_FORMAT_RASTER <<
124 V3D33_TFU_ICFG_FORMAT_SHIFT);
125 } else {
126 tfu.icfg |= ((V3D33_TFU_ICFG_FORMAT_LINEARTILE +
127 (src_base_slice->tiling - V3D_TILING_LINEARTILE)) <<
128 V3D33_TFU_ICFG_FORMAT_SHIFT);
129 }
130 tfu.icfg |= tex_format << V3D33_TFU_ICFG_TTYPE_SHIFT;
131
132 if (last_level != base_level)
133 tfu.ioa |= V3D33_TFU_IOA_DIMTW;
134
135 tfu.ioa |= ((V3D33_TFU_IOA_FORMAT_LINEARTILE +
136 (dst_base_slice->tiling - V3D_TILING_LINEARTILE)) <<
137 V3D33_TFU_IOA_FORMAT_SHIFT);
138
139 tfu.icfg |= (last_level - base_level) << V3D33_TFU_ICFG_NUMMM_SHIFT;
140
141 /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the
142 * OPAD field for the destination (how many extra UIF blocks beyond
143 * those necessary to cover the height). When filling mipmaps, the
144 * miplevel 1+ tiling state is inferred.
145 */
146 if (dst_base_slice->tiling == V3D_TILING_UIF_NO_XOR ||
147 dst_base_slice->tiling == V3D_TILING_UIF_XOR) {
148 int uif_block_h = 2 * v3d_utile_height(dst->cpp);
149 int implicit_padded_height = align(height, uif_block_h);
150
151 tfu.icfg |= (((dst_base_slice->padded_height -
152 implicit_padded_height) / uif_block_h) <<
153 V3D33_TFU_ICFG_OPAD_SHIFT);
154 }
155 #endif /* V3D_VERSION == 42 */
156
157 #if V3D_VERSION >= 71
158 if (src_base_slice->tiling == V3D_TILING_RASTER) {
159 tfu.icfg = V3D71_TFU_ICFG_FORMAT_RASTER << V3D71_TFU_ICFG_IFORMAT_SHIFT;
160 } else {
161 tfu.icfg = (V3D71_TFU_ICFG_FORMAT_LINEARTILE +
162 (src_base_slice->tiling - V3D_TILING_LINEARTILE)) <<
163 V3D71_TFU_ICFG_IFORMAT_SHIFT;
164 }
165 tfu.icfg |= tex_format << V3D71_TFU_ICFG_OTYPE_SHIFT;
166
167 if (last_level != base_level)
168 tfu.v71.ioc |= V3D71_TFU_IOC_DIMTW;
169
170 tfu.v71.ioc |= ((V3D71_TFU_IOC_FORMAT_LINEARTILE +
171 (dst_base_slice->tiling - V3D_TILING_LINEARTILE)) <<
172 V3D71_TFU_IOC_FORMAT_SHIFT);
173
174 switch (dst_base_slice->tiling) {
175 case V3D_TILING_UIF_NO_XOR:
176 case V3D_TILING_UIF_XOR:
177 tfu.v71.ioc |=
178 (dst_base_slice->padded_height / (2 * v3d_utile_height(dst->cpp))) <<
179 V3D71_TFU_IOC_STRIDE_SHIFT;
180 break;
181 case V3D_TILING_RASTER:
182 tfu.v71.ioc |= (dst_base_slice->padded_height / dst->cpp) <<
183 V3D71_TFU_IOC_STRIDE_SHIFT;
184 break;
185 default:
186 break;
187 }
188
189 tfu.v71.ioc |= (last_level - base_level) << V3D71_TFU_IOC_NUMMM_SHIFT;
190 #endif /* V3D_VERSION >= 71*/
191
192 int ret = v3d_ioctl(screen->fd, DRM_IOCTL_V3D_SUBMIT_TFU, &tfu);
193 if (ret != 0) {
194 fprintf(stderr, "Failed to submit TFU job: %d\n", ret);
195 return false;
196 }
197
198 dst->writes++;
199
200 return true;
201 }
202
203