xref: /aosp_15_r20/external/mesa3d/src/imagination/include/hwdef/rogue_hw_utils.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Imagination Technologies Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 /* This file is based on rgxdefs.h and should only contain function-like macros
25  * and inline functions. Any object-like macros should instead appear in
26  * rogue_hw_defs.h.
27  */
28 
29 #ifndef ROGUE_HW_UTILS_H
30 #define ROGUE_HW_UTILS_H
31 
32 #include <stdint.h>
33 
34 #include "pvr_types.h"
35 
36 #define __pvr_address_type pvr_dev_addr_t
37 #define __pvr_get_address(pvr_dev_addr) (pvr_dev_addr).addr
38 /* clang-format off */
39 #define __pvr_make_address(addr_u64) PVR_DEV_ADDR(addr_u64)
40 /* clang-format on */
41 
42 #include "csbgen/rogue_cdm.h"
43 #include "csbgen/rogue_lls.h"
44 
45 #undef __pvr_make_address
46 #undef __pvr_get_address
47 #undef __pvr_address_type
48 
49 #include "rogue_hw_defs.h"
50 #include "pvr_device_info.h"
51 #include "util/compiler.h"
52 #include "util/macros.h"
53 
54 static inline void
rogue_get_isp_samples_per_tile_xy(const struct pvr_device_info * dev_info,uint32_t samples,uint32_t * const x_out,uint32_t * const y_out)55 rogue_get_isp_samples_per_tile_xy(const struct pvr_device_info *dev_info,
56                                   uint32_t samples,
57                                   uint32_t *const x_out,
58                                   uint32_t *const y_out)
59 {
60    const uint32_t tile_size_x =
61       PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0U);
62    const uint32_t tile_size_y =
63       PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0U);
64    const uint32_t samples_per_pixel =
65       PVR_GET_FEATURE_VALUE(dev_info, isp_samples_per_pixel, 0U);
66 
67 #if !defined(NDEBUG)
68    switch (samples_per_pixel) {
69    case 1:
70    case 2:
71    case 4:
72       break;
73    default:
74       assert(!"Unsupported ISP samples per pixel");
75    }
76 #endif
77 
78    *x_out = tile_size_x;
79    *y_out = tile_size_y;
80 
81    switch (samples) {
82    case 1:
83       break;
84    case 2:
85       if (samples_per_pixel == 2 || samples_per_pixel == 4)
86          *y_out *= 2;
87 
88       break;
89    case 4:
90       if (samples_per_pixel == 2 || samples_per_pixel == 4)
91          *x_out *= 2;
92 
93       if (samples_per_pixel == 2)
94          *y_out *= 2;
95 
96       break;
97    case 8:
98       *y_out *= 2;
99       break;
100    default:
101       assert(!"Unsupported number of samples");
102    }
103 }
104 
rogue_get_isp_scale_xy_from_samples(const uint32_t samples,uint32_t * const x_scale_out,uint32_t * const y_scale_out)105 static void rogue_get_isp_scale_xy_from_samples(const uint32_t samples,
106                                                 uint32_t *const x_scale_out,
107                                                 uint32_t *const y_scale_out)
108 {
109    switch (samples) {
110    case 1:
111       *x_scale_out = 1;
112       *y_scale_out = 1;
113       break;
114    case 2:
115       *x_scale_out = 1;
116       *y_scale_out = 2;
117       break;
118    case 4:
119       *x_scale_out = 2;
120       *y_scale_out = 2;
121       break;
122    case 8:
123       *x_scale_out = 2;
124       *y_scale_out = 4;
125       break;
126    default:
127       unreachable("Unsupported number of samples");
128    }
129 }
130 
131 static inline void
rogue_get_isp_num_tiles_xy(const struct pvr_device_info * dev_info,uint32_t samples,uint32_t width,uint32_t height,uint32_t * const x_out,uint32_t * const y_out)132 rogue_get_isp_num_tiles_xy(const struct pvr_device_info *dev_info,
133                            uint32_t samples,
134                            uint32_t width,
135                            uint32_t height,
136                            uint32_t *const x_out,
137                            uint32_t *const y_out)
138 {
139    uint32_t tile_samples_x;
140    uint32_t tile_samples_y;
141    uint32_t scale_x;
142    uint32_t scale_y;
143 
144    rogue_get_isp_samples_per_tile_xy(dev_info,
145                                      samples,
146                                      &tile_samples_x,
147                                      &tile_samples_y);
148 
149    rogue_get_isp_scale_xy_from_samples(samples, &scale_x, &scale_y);
150 
151    *x_out = DIV_ROUND_UP(width * scale_x, tile_samples_x);
152    *y_out = DIV_ROUND_UP(height * scale_y, tile_samples_y);
153 
154    if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
155       assert(PVR_GET_FEATURE_VALUE(dev_info,
156                                    simple_parameter_format_version,
157                                    0U) == 2U);
158       /* Align to a 2x2 tile block. */
159       *x_out = ALIGN_POT(*x_out, 2);
160       *y_out = ALIGN_POT(*y_out, 2);
161    }
162 }
163 
164 static inline void
rogue_get_zls_tile_size_xy(const struct pvr_device_info * dev_info,uint32_t * const x_out,uint32_t * const y_out)165 rogue_get_zls_tile_size_xy(const struct pvr_device_info *dev_info,
166                            uint32_t *const x_out,
167                            uint32_t *const y_out)
168 {
169    uint32_t version = 0;
170    bool has_version;
171 
172    has_version =
173       !PVR_FEATURE_VALUE(dev_info, simple_parameter_format_version, &version);
174 
175    *x_out = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0U);
176    *y_out = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0U);
177 
178    if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) &&
179        has_version && version == 2) {
180       *x_out *= 2;
181       *y_out *= 2;
182    }
183 }
184 
185 static inline uint32_t
rogue_get_max_output_regs_per_pixel(const struct pvr_device_info * dev_info)186 rogue_get_max_output_regs_per_pixel(const struct pvr_device_info *dev_info)
187 {
188    if (PVR_HAS_FEATURE(dev_info, eight_output_registers))
189       return 8U;
190 
191    return 4U;
192 }
193 
194 static inline void
rogue_get_num_macrotiles_xy(const struct pvr_device_info * dev_info,uint32_t * const x_out,uint32_t * const y_out)195 rogue_get_num_macrotiles_xy(const struct pvr_device_info *dev_info,
196                             uint32_t *const x_out,
197                             uint32_t *const y_out)
198 {
199    uint32_t version;
200 
201    if (PVR_FEATURE_VALUE(dev_info, simple_parameter_format_version, &version))
202       version = 0;
203 
204    if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ||
205        version == 2) {
206       *x_out = 4;
207       *y_out = 4;
208    } else {
209       *x_out = 1;
210       *y_out = 1;
211    }
212 }
213 
214 static inline uint32_t
rogue_get_macrotile_array_size(const struct pvr_device_info * dev_info)215 rogue_get_macrotile_array_size(const struct pvr_device_info *dev_info)
216 {
217    uint32_t num_macrotiles_x;
218    uint32_t num_macrotiles_y;
219 
220    if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format))
221       return 0;
222 
223    rogue_get_num_macrotiles_xy(dev_info, &num_macrotiles_x, &num_macrotiles_y);
224 
225    return num_macrotiles_x * num_macrotiles_y * 8U;
226 }
227 
228 /* Region header size in bytes. */
229 static inline uint32_t
rogue_get_region_header_size(const struct pvr_device_info * dev_info)230 rogue_get_region_header_size(const struct pvr_device_info *dev_info)
231 {
232    uint32_t version;
233 
234    if (PVR_FEATURE_VALUE(dev_info, simple_parameter_format_version, &version))
235       version = 0;
236 
237    if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) &&
238        version == 2) {
239       return 6;
240    }
241 
242    return 5;
243 }
244 
245 static inline uint32_t
rogue_get_render_size_max(const struct pvr_device_info * dev_info)246 rogue_get_render_size_max(const struct pvr_device_info *dev_info)
247 {
248    if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format))
249       if (!PVR_HAS_FEATURE(dev_info, screen_size8K))
250          return 4096U;
251 
252    return 8192U;
253 }
254 
255 #define rogue_get_render_size_max_x(dev_info) \
256    rogue_get_render_size_max(dev_info)
257 
258 #define rogue_get_render_size_max_y(dev_info) \
259    rogue_get_render_size_max(dev_info)
260 
261 static inline uint32_t
rogue_get_slc_cache_line_size(const struct pvr_device_info * dev_info)262 rogue_get_slc_cache_line_size(const struct pvr_device_info *dev_info)
263 {
264    return PVR_GET_FEATURE_VALUE(dev_info, slc_cache_line_size_bits, 8U) / 8U;
265 }
266 
pvr_get_max_user_vertex_output_components(const struct pvr_device_info * dev_info)267 static inline uint32_t pvr_get_max_user_vertex_output_components(
268    const struct pvr_device_info *dev_info)
269 {
270    const uint32_t uvs_pba_entries =
271       PVR_GET_FEATURE_VALUE(dev_info, uvs_pba_entries, 0U);
272    const uint32_t uvs_banks = PVR_GET_FEATURE_VALUE(dev_info, uvs_banks, 0U);
273 
274    if (uvs_banks <= 8U && uvs_pba_entries == 160U)
275       return 64U;
276 
277    return 128U;
278 }
279 
280 static inline uint32_t
rogue_max_compute_shared_registers(const struct pvr_device_info * dev_info)281 rogue_max_compute_shared_registers(const struct pvr_device_info *dev_info)
282 {
283    if (PVR_HAS_FEATURE(dev_info, compute))
284       return 1024U;
285 
286    return 0U;
287 }
288 
289 static inline uint32_t
rogue_get_max_num_cores(const struct pvr_device_info * dev_info)290 rogue_get_max_num_cores(const struct pvr_device_info *dev_info)
291 {
292    if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support) &&
293        PVR_HAS_FEATURE(dev_info, xpu_max_slaves)) {
294       return PVR_GET_FEATURE_VALUE(dev_info, xpu_max_slaves, 0U) + 1U;
295    }
296 
297    return 1U;
298 }
299 
300 static inline uint32_t
rogue_get_cdm_context_resume_buffer_size(const struct pvr_device_info * dev_info)301 rogue_get_cdm_context_resume_buffer_size(const struct pvr_device_info *dev_info)
302 {
303    if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
304       const uint32_t max_num_cores = rogue_get_max_num_cores(dev_info);
305       const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
306       const uint32_t cdm_context_resume_buffer_stride =
307          ALIGN_POT(ROGUE_LLS_CDM_CONTEXT_RESUME_BUFFER_SIZE, cache_line_size);
308 
309       return cdm_context_resume_buffer_stride * max_num_cores;
310    }
311 
312    return ROGUE_LLS_CDM_CONTEXT_RESUME_BUFFER_SIZE;
313 }
314 
rogue_get_cdm_context_resume_buffer_alignment(const struct pvr_device_info * dev_info)315 static inline uint32_t rogue_get_cdm_context_resume_buffer_alignment(
316    const struct pvr_device_info *dev_info)
317 {
318    if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support))
319       return rogue_get_slc_cache_line_size(dev_info);
320 
321    return ROGUE_LLS_CDM_CONTEXT_RESUME_BUFFER_ALIGNMENT;
322 }
323 
324 static inline uint32_t
rogue_get_compute_max_work_group_size(const struct pvr_device_info * dev_info)325 rogue_get_compute_max_work_group_size(const struct pvr_device_info *dev_info)
326 {
327    /* The number of tasks which can be executed per USC - Limited to 16U by the
328     * CDM.
329     */
330    const uint32_t max_tasks_per_usc = 16U;
331 
332    if (!PVR_HAS_ERN(dev_info, 35421)) {
333       /* Barriers on work-groups > 32 instances aren't supported. */
334       return ROGUE_MAX_INSTANCES_PER_TASK;
335    }
336 
337    return ROGUE_MAX_INSTANCES_PER_TASK * max_tasks_per_usc;
338 }
339 
340 /* Don't use this directly. Use the x and y define macros. */
341 static inline uint32_t
__rogue_get_param_vf_max(const struct pvr_device_info * dev_info)342 __rogue_get_param_vf_max(const struct pvr_device_info *dev_info)
343 {
344    return (rogue_get_render_size_max(dev_info) * 3 / 2) - 1;
345 }
346 
347 #define rogue_get_param_vf_max_x(dev_info) __rogue_get_param_vf_max(dev_info)
348 #define rogue_get_param_vf_max_y(dev_info) __rogue_get_param_vf_max(dev_info)
349 
350 #endif /* ROGUE_HW_UTILS_H */
351