1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 /* This file is based on rgxdefs.h and should only contain function-like macros
25 * and inline functions. Any object-like macros should instead appear in
26 * rogue_hw_defs.h.
27 */
28
29 #ifndef ROGUE_HW_UTILS_H
30 #define ROGUE_HW_UTILS_H
31
32 #include <stdint.h>
33
34 #include "pvr_types.h"
35
36 #define __pvr_address_type pvr_dev_addr_t
37 #define __pvr_get_address(pvr_dev_addr) (pvr_dev_addr).addr
38 /* clang-format off */
39 #define __pvr_make_address(addr_u64) PVR_DEV_ADDR(addr_u64)
40 /* clang-format on */
41
42 #include "csbgen/rogue_cdm.h"
43 #include "csbgen/rogue_lls.h"
44
45 #undef __pvr_make_address
46 #undef __pvr_get_address
47 #undef __pvr_address_type
48
49 #include "rogue_hw_defs.h"
50 #include "pvr_device_info.h"
51 #include "util/compiler.h"
52 #include "util/macros.h"
53
54 static inline void
rogue_get_isp_samples_per_tile_xy(const struct pvr_device_info * dev_info,uint32_t samples,uint32_t * const x_out,uint32_t * const y_out)55 rogue_get_isp_samples_per_tile_xy(const struct pvr_device_info *dev_info,
56 uint32_t samples,
57 uint32_t *const x_out,
58 uint32_t *const y_out)
59 {
60 const uint32_t tile_size_x =
61 PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0U);
62 const uint32_t tile_size_y =
63 PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0U);
64 const uint32_t samples_per_pixel =
65 PVR_GET_FEATURE_VALUE(dev_info, isp_samples_per_pixel, 0U);
66
67 #if !defined(NDEBUG)
68 switch (samples_per_pixel) {
69 case 1:
70 case 2:
71 case 4:
72 break;
73 default:
74 assert(!"Unsupported ISP samples per pixel");
75 }
76 #endif
77
78 *x_out = tile_size_x;
79 *y_out = tile_size_y;
80
81 switch (samples) {
82 case 1:
83 break;
84 case 2:
85 if (samples_per_pixel == 2 || samples_per_pixel == 4)
86 *y_out *= 2;
87
88 break;
89 case 4:
90 if (samples_per_pixel == 2 || samples_per_pixel == 4)
91 *x_out *= 2;
92
93 if (samples_per_pixel == 2)
94 *y_out *= 2;
95
96 break;
97 case 8:
98 *y_out *= 2;
99 break;
100 default:
101 assert(!"Unsupported number of samples");
102 }
103 }
104
rogue_get_isp_scale_xy_from_samples(const uint32_t samples,uint32_t * const x_scale_out,uint32_t * const y_scale_out)105 static void rogue_get_isp_scale_xy_from_samples(const uint32_t samples,
106 uint32_t *const x_scale_out,
107 uint32_t *const y_scale_out)
108 {
109 switch (samples) {
110 case 1:
111 *x_scale_out = 1;
112 *y_scale_out = 1;
113 break;
114 case 2:
115 *x_scale_out = 1;
116 *y_scale_out = 2;
117 break;
118 case 4:
119 *x_scale_out = 2;
120 *y_scale_out = 2;
121 break;
122 case 8:
123 *x_scale_out = 2;
124 *y_scale_out = 4;
125 break;
126 default:
127 unreachable("Unsupported number of samples");
128 }
129 }
130
131 static inline void
rogue_get_isp_num_tiles_xy(const struct pvr_device_info * dev_info,uint32_t samples,uint32_t width,uint32_t height,uint32_t * const x_out,uint32_t * const y_out)132 rogue_get_isp_num_tiles_xy(const struct pvr_device_info *dev_info,
133 uint32_t samples,
134 uint32_t width,
135 uint32_t height,
136 uint32_t *const x_out,
137 uint32_t *const y_out)
138 {
139 uint32_t tile_samples_x;
140 uint32_t tile_samples_y;
141 uint32_t scale_x;
142 uint32_t scale_y;
143
144 rogue_get_isp_samples_per_tile_xy(dev_info,
145 samples,
146 &tile_samples_x,
147 &tile_samples_y);
148
149 rogue_get_isp_scale_xy_from_samples(samples, &scale_x, &scale_y);
150
151 *x_out = DIV_ROUND_UP(width * scale_x, tile_samples_x);
152 *y_out = DIV_ROUND_UP(height * scale_y, tile_samples_y);
153
154 if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
155 assert(PVR_GET_FEATURE_VALUE(dev_info,
156 simple_parameter_format_version,
157 0U) == 2U);
158 /* Align to a 2x2 tile block. */
159 *x_out = ALIGN_POT(*x_out, 2);
160 *y_out = ALIGN_POT(*y_out, 2);
161 }
162 }
163
164 static inline void
rogue_get_zls_tile_size_xy(const struct pvr_device_info * dev_info,uint32_t * const x_out,uint32_t * const y_out)165 rogue_get_zls_tile_size_xy(const struct pvr_device_info *dev_info,
166 uint32_t *const x_out,
167 uint32_t *const y_out)
168 {
169 uint32_t version = 0;
170 bool has_version;
171
172 has_version =
173 !PVR_FEATURE_VALUE(dev_info, simple_parameter_format_version, &version);
174
175 *x_out = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0U);
176 *y_out = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0U);
177
178 if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) &&
179 has_version && version == 2) {
180 *x_out *= 2;
181 *y_out *= 2;
182 }
183 }
184
185 static inline uint32_t
rogue_get_max_output_regs_per_pixel(const struct pvr_device_info * dev_info)186 rogue_get_max_output_regs_per_pixel(const struct pvr_device_info *dev_info)
187 {
188 if (PVR_HAS_FEATURE(dev_info, eight_output_registers))
189 return 8U;
190
191 return 4U;
192 }
193
194 static inline void
rogue_get_num_macrotiles_xy(const struct pvr_device_info * dev_info,uint32_t * const x_out,uint32_t * const y_out)195 rogue_get_num_macrotiles_xy(const struct pvr_device_info *dev_info,
196 uint32_t *const x_out,
197 uint32_t *const y_out)
198 {
199 uint32_t version;
200
201 if (PVR_FEATURE_VALUE(dev_info, simple_parameter_format_version, &version))
202 version = 0;
203
204 if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ||
205 version == 2) {
206 *x_out = 4;
207 *y_out = 4;
208 } else {
209 *x_out = 1;
210 *y_out = 1;
211 }
212 }
213
214 static inline uint32_t
rogue_get_macrotile_array_size(const struct pvr_device_info * dev_info)215 rogue_get_macrotile_array_size(const struct pvr_device_info *dev_info)
216 {
217 uint32_t num_macrotiles_x;
218 uint32_t num_macrotiles_y;
219
220 if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format))
221 return 0;
222
223 rogue_get_num_macrotiles_xy(dev_info, &num_macrotiles_x, &num_macrotiles_y);
224
225 return num_macrotiles_x * num_macrotiles_y * 8U;
226 }
227
228 /* Region header size in bytes. */
229 static inline uint32_t
rogue_get_region_header_size(const struct pvr_device_info * dev_info)230 rogue_get_region_header_size(const struct pvr_device_info *dev_info)
231 {
232 uint32_t version;
233
234 if (PVR_FEATURE_VALUE(dev_info, simple_parameter_format_version, &version))
235 version = 0;
236
237 if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) &&
238 version == 2) {
239 return 6;
240 }
241
242 return 5;
243 }
244
245 static inline uint32_t
rogue_get_render_size_max(const struct pvr_device_info * dev_info)246 rogue_get_render_size_max(const struct pvr_device_info *dev_info)
247 {
248 if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format))
249 if (!PVR_HAS_FEATURE(dev_info, screen_size8K))
250 return 4096U;
251
252 return 8192U;
253 }
254
255 #define rogue_get_render_size_max_x(dev_info) \
256 rogue_get_render_size_max(dev_info)
257
258 #define rogue_get_render_size_max_y(dev_info) \
259 rogue_get_render_size_max(dev_info)
260
261 static inline uint32_t
rogue_get_slc_cache_line_size(const struct pvr_device_info * dev_info)262 rogue_get_slc_cache_line_size(const struct pvr_device_info *dev_info)
263 {
264 return PVR_GET_FEATURE_VALUE(dev_info, slc_cache_line_size_bits, 8U) / 8U;
265 }
266
pvr_get_max_user_vertex_output_components(const struct pvr_device_info * dev_info)267 static inline uint32_t pvr_get_max_user_vertex_output_components(
268 const struct pvr_device_info *dev_info)
269 {
270 const uint32_t uvs_pba_entries =
271 PVR_GET_FEATURE_VALUE(dev_info, uvs_pba_entries, 0U);
272 const uint32_t uvs_banks = PVR_GET_FEATURE_VALUE(dev_info, uvs_banks, 0U);
273
274 if (uvs_banks <= 8U && uvs_pba_entries == 160U)
275 return 64U;
276
277 return 128U;
278 }
279
280 static inline uint32_t
rogue_max_compute_shared_registers(const struct pvr_device_info * dev_info)281 rogue_max_compute_shared_registers(const struct pvr_device_info *dev_info)
282 {
283 if (PVR_HAS_FEATURE(dev_info, compute))
284 return 1024U;
285
286 return 0U;
287 }
288
289 static inline uint32_t
rogue_get_max_num_cores(const struct pvr_device_info * dev_info)290 rogue_get_max_num_cores(const struct pvr_device_info *dev_info)
291 {
292 if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support) &&
293 PVR_HAS_FEATURE(dev_info, xpu_max_slaves)) {
294 return PVR_GET_FEATURE_VALUE(dev_info, xpu_max_slaves, 0U) + 1U;
295 }
296
297 return 1U;
298 }
299
300 static inline uint32_t
rogue_get_cdm_context_resume_buffer_size(const struct pvr_device_info * dev_info)301 rogue_get_cdm_context_resume_buffer_size(const struct pvr_device_info *dev_info)
302 {
303 if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
304 const uint32_t max_num_cores = rogue_get_max_num_cores(dev_info);
305 const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
306 const uint32_t cdm_context_resume_buffer_stride =
307 ALIGN_POT(ROGUE_LLS_CDM_CONTEXT_RESUME_BUFFER_SIZE, cache_line_size);
308
309 return cdm_context_resume_buffer_stride * max_num_cores;
310 }
311
312 return ROGUE_LLS_CDM_CONTEXT_RESUME_BUFFER_SIZE;
313 }
314
rogue_get_cdm_context_resume_buffer_alignment(const struct pvr_device_info * dev_info)315 static inline uint32_t rogue_get_cdm_context_resume_buffer_alignment(
316 const struct pvr_device_info *dev_info)
317 {
318 if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support))
319 return rogue_get_slc_cache_line_size(dev_info);
320
321 return ROGUE_LLS_CDM_CONTEXT_RESUME_BUFFER_ALIGNMENT;
322 }
323
324 static inline uint32_t
rogue_get_compute_max_work_group_size(const struct pvr_device_info * dev_info)325 rogue_get_compute_max_work_group_size(const struct pvr_device_info *dev_info)
326 {
327 /* The number of tasks which can be executed per USC - Limited to 16U by the
328 * CDM.
329 */
330 const uint32_t max_tasks_per_usc = 16U;
331
332 if (!PVR_HAS_ERN(dev_info, 35421)) {
333 /* Barriers on work-groups > 32 instances aren't supported. */
334 return ROGUE_MAX_INSTANCES_PER_TASK;
335 }
336
337 return ROGUE_MAX_INSTANCES_PER_TASK * max_tasks_per_usc;
338 }
339
340 /* Don't use this directly. Use the x and y define macros. */
341 static inline uint32_t
__rogue_get_param_vf_max(const struct pvr_device_info * dev_info)342 __rogue_get_param_vf_max(const struct pvr_device_info *dev_info)
343 {
344 return (rogue_get_render_size_max(dev_info) * 3 / 2) - 1;
345 }
346
347 #define rogue_get_param_vf_max_x(dev_info) __rogue_get_param_vf_max(dev_info)
348 #define rogue_get_param_vf_max_y(dev_info) __rogue_get_param_vf_max(dev_info)
349
350 #endif /* ROGUE_HW_UTILS_H */
351