xref: /aosp_15_r20/external/mesa3d/src/amd/common/ac_binary.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "ac_binary.h"
8 
9 #include "ac_gpu_info.h"
10 #include "util/u_math.h"
11 #include "util/u_memory.h"
12 
13 #include <sid.h>
14 #include <stdio.h>
15 
16 #define SPILLED_SGPRS 0x4
17 #define SPILLED_VGPRS 0x8
18 
19 /* Parse configuration data in .AMDGPU.config section format. */
ac_parse_shader_binary_config(const char * data,size_t nbytes,unsigned wave_size,const struct radeon_info * info,struct ac_shader_config * conf)20 void ac_parse_shader_binary_config(const char *data, size_t nbytes, unsigned wave_size,
21                                    const struct radeon_info *info, struct ac_shader_config *conf)
22 {
23    for (size_t i = 0; i < nbytes; i += 8) {
24       unsigned reg = util_le32_to_cpu(*(uint32_t *)(data + i));
25       unsigned value = util_le32_to_cpu(*(uint32_t *)(data + i + 4));
26       switch (reg) {
27       case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
28       case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
29       case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
30       case R_00B848_COMPUTE_PGM_RSRC1:
31       case R_00B428_SPI_SHADER_PGM_RSRC1_HS:
32          if (wave_size == 32 || info->wave64_vgpr_alloc_granularity == 8)
33             conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 8);
34          else
35             conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
36 
37          conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
38          /* TODO: LLVM doesn't set FLOAT_MODE for non-compute shaders */
39          conf->float_mode = G_00B028_FLOAT_MODE(value);
40          conf->rsrc1 = value;
41          break;
42       case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
43          conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
44          /* TODO: LLVM doesn't set SHARED_VGPR_CNT for all shader types */
45          conf->num_shared_vgprs = G_00B02C_SHARED_VGPR_CNT(value);
46          conf->rsrc2 = value;
47          break;
48       case R_00B12C_SPI_SHADER_PGM_RSRC2_VS:
49          conf->num_shared_vgprs = G_00B12C_SHARED_VGPR_CNT(value);
50          conf->rsrc2 = value;
51          break;
52       case R_00B22C_SPI_SHADER_PGM_RSRC2_GS:
53          conf->num_shared_vgprs = G_00B22C_SHARED_VGPR_CNT(value);
54          conf->rsrc2 = value;
55          break;
56       case R_00B42C_SPI_SHADER_PGM_RSRC2_HS:
57          conf->num_shared_vgprs = G_00B42C_SHARED_VGPR_CNT(value);
58          conf->rsrc2 = value;
59          break;
60       case R_00B84C_COMPUTE_PGM_RSRC2:
61          conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value));
62          conf->rsrc2 = value;
63          break;
64       case R_00B8A0_COMPUTE_PGM_RSRC3:
65          conf->num_shared_vgprs = G_00B8A0_SHARED_VGPR_CNT(value);
66          conf->rsrc3 = value;
67          break;
68       case R_02865C_SPI_PS_INPUT_ENA:
69       case R_0286CC_SPI_PS_INPUT_ENA:
70          conf->spi_ps_input_ena = value;
71          break;
72       case R_028660_SPI_PS_INPUT_ADDR:
73       case R_0286D0_SPI_PS_INPUT_ADDR:
74          conf->spi_ps_input_addr = value;
75          break;
76       case R_0286E8_SPI_TMPRING_SIZE:
77       case R_00B860_COMPUTE_TMPRING_SIZE:
78          if (info->gfx_level >= GFX11)
79             conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(value) * 256;
80          else
81             conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(value) * 1024;
82          break;
83       case SPILLED_SGPRS:
84          conf->spilled_sgprs = value;
85          break;
86       case SPILLED_VGPRS:
87          conf->spilled_vgprs = value;
88          break;
89       default: {
90          static bool printed;
91 
92          if (!printed) {
93             fprintf(stderr,
94                     "Warning: LLVM emitted unknown "
95                     "config register: 0x%x\n",
96                     reg);
97             printed = true;
98          }
99       } break;
100       }
101    }
102 
103    if (!conf->spi_ps_input_addr)
104       conf->spi_ps_input_addr = conf->spi_ps_input_ena;
105 
106    /* Enable 64-bit and 16-bit denormals, because there is no performance
107     * cost.
108     *
109     * Don't enable denormals for 32-bit floats, because:
110     * - denormals disable output modifiers
111     * - denormals break v_mad_f32
112     * - GFX6 & GFX7 would be very slow
113     */
114    conf->float_mode &= ~V_00B028_FP_32_DENORMS;
115    conf->float_mode |= V_00B028_FP_16_64_DENORMS;
116 }
117 
ac_align_shader_binary_for_prefetch(const struct radeon_info * info,unsigned size)118 unsigned ac_align_shader_binary_for_prefetch(const struct radeon_info *info, unsigned size)
119 {
120    /* The SQ fetches up to N cache lines of 16 dwords
121     * ahead of the PC, configurable by SH_MEM_CONFIG and
122     * S_INST_PREFETCH. This can cause two issues:
123     *
124     * (1) Crossing a page boundary to an unmapped page. The logic
125     *     does not distinguish between a required fetch and a "mere"
126     *     prefetch and will fault.
127     *
128     * (2) Prefetching instructions that will be changed for a
129     *     different shader.
130     *
131     * (2) is not currently an issue because we flush the I$ at IB
132     * boundaries, but (1) needs to be addressed. Due to buffer
133     * suballocation, we just play it safe.
134     */
135    unsigned prefetch_distance = 0;
136 
137    if (!info->has_graphics && info->family >= CHIP_MI200)
138       prefetch_distance = 16;
139    else if (info->gfx_level >= GFX10)
140       prefetch_distance = 3;
141 
142    if (prefetch_distance) {
143       if (info->gfx_level >= GFX11)
144          size = align(size + prefetch_distance * 64, 128);
145       else
146          size = align(size + prefetch_distance * 64, 64);
147    }
148 
149    return size;
150 }
151