1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "ac_binary.h"
8
9 #include "ac_gpu_info.h"
10 #include "util/u_math.h"
11 #include "util/u_memory.h"
12
13 #include <sid.h>
14 #include <stdio.h>
15
16 #define SPILLED_SGPRS 0x4
17 #define SPILLED_VGPRS 0x8
18
19 /* Parse configuration data in .AMDGPU.config section format. */
ac_parse_shader_binary_config(const char * data,size_t nbytes,unsigned wave_size,const struct radeon_info * info,struct ac_shader_config * conf)20 void ac_parse_shader_binary_config(const char *data, size_t nbytes, unsigned wave_size,
21 const struct radeon_info *info, struct ac_shader_config *conf)
22 {
23 for (size_t i = 0; i < nbytes; i += 8) {
24 unsigned reg = util_le32_to_cpu(*(uint32_t *)(data + i));
25 unsigned value = util_le32_to_cpu(*(uint32_t *)(data + i + 4));
26 switch (reg) {
27 case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
28 case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
29 case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
30 case R_00B848_COMPUTE_PGM_RSRC1:
31 case R_00B428_SPI_SHADER_PGM_RSRC1_HS:
32 if (wave_size == 32 || info->wave64_vgpr_alloc_granularity == 8)
33 conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 8);
34 else
35 conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
36
37 conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
38 /* TODO: LLVM doesn't set FLOAT_MODE for non-compute shaders */
39 conf->float_mode = G_00B028_FLOAT_MODE(value);
40 conf->rsrc1 = value;
41 break;
42 case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
43 conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
44 /* TODO: LLVM doesn't set SHARED_VGPR_CNT for all shader types */
45 conf->num_shared_vgprs = G_00B02C_SHARED_VGPR_CNT(value);
46 conf->rsrc2 = value;
47 break;
48 case R_00B12C_SPI_SHADER_PGM_RSRC2_VS:
49 conf->num_shared_vgprs = G_00B12C_SHARED_VGPR_CNT(value);
50 conf->rsrc2 = value;
51 break;
52 case R_00B22C_SPI_SHADER_PGM_RSRC2_GS:
53 conf->num_shared_vgprs = G_00B22C_SHARED_VGPR_CNT(value);
54 conf->rsrc2 = value;
55 break;
56 case R_00B42C_SPI_SHADER_PGM_RSRC2_HS:
57 conf->num_shared_vgprs = G_00B42C_SHARED_VGPR_CNT(value);
58 conf->rsrc2 = value;
59 break;
60 case R_00B84C_COMPUTE_PGM_RSRC2:
61 conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value));
62 conf->rsrc2 = value;
63 break;
64 case R_00B8A0_COMPUTE_PGM_RSRC3:
65 conf->num_shared_vgprs = G_00B8A0_SHARED_VGPR_CNT(value);
66 conf->rsrc3 = value;
67 break;
68 case R_02865C_SPI_PS_INPUT_ENA:
69 case R_0286CC_SPI_PS_INPUT_ENA:
70 conf->spi_ps_input_ena = value;
71 break;
72 case R_028660_SPI_PS_INPUT_ADDR:
73 case R_0286D0_SPI_PS_INPUT_ADDR:
74 conf->spi_ps_input_addr = value;
75 break;
76 case R_0286E8_SPI_TMPRING_SIZE:
77 case R_00B860_COMPUTE_TMPRING_SIZE:
78 if (info->gfx_level >= GFX11)
79 conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(value) * 256;
80 else
81 conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(value) * 1024;
82 break;
83 case SPILLED_SGPRS:
84 conf->spilled_sgprs = value;
85 break;
86 case SPILLED_VGPRS:
87 conf->spilled_vgprs = value;
88 break;
89 default: {
90 static bool printed;
91
92 if (!printed) {
93 fprintf(stderr,
94 "Warning: LLVM emitted unknown "
95 "config register: 0x%x\n",
96 reg);
97 printed = true;
98 }
99 } break;
100 }
101 }
102
103 if (!conf->spi_ps_input_addr)
104 conf->spi_ps_input_addr = conf->spi_ps_input_ena;
105
106 /* Enable 64-bit and 16-bit denormals, because there is no performance
107 * cost.
108 *
109 * Don't enable denormals for 32-bit floats, because:
110 * - denormals disable output modifiers
111 * - denormals break v_mad_f32
112 * - GFX6 & GFX7 would be very slow
113 */
114 conf->float_mode &= ~V_00B028_FP_32_DENORMS;
115 conf->float_mode |= V_00B028_FP_16_64_DENORMS;
116 }
117
ac_align_shader_binary_for_prefetch(const struct radeon_info * info,unsigned size)118 unsigned ac_align_shader_binary_for_prefetch(const struct radeon_info *info, unsigned size)
119 {
120 /* The SQ fetches up to N cache lines of 16 dwords
121 * ahead of the PC, configurable by SH_MEM_CONFIG and
122 * S_INST_PREFETCH. This can cause two issues:
123 *
124 * (1) Crossing a page boundary to an unmapped page. The logic
125 * does not distinguish between a required fetch and a "mere"
126 * prefetch and will fault.
127 *
128 * (2) Prefetching instructions that will be changed for a
129 * different shader.
130 *
131 * (2) is not currently an issue because we flush the I$ at IB
132 * boundaries, but (1) needs to be addressed. Due to buffer
133 * suballocation, we just play it safe.
134 */
135 unsigned prefetch_distance = 0;
136
137 if (!info->has_graphics && info->family >= CHIP_MI200)
138 prefetch_distance = 16;
139 else if (info->gfx_level >= GFX10)
140 prefetch_distance = 3;
141
142 if (prefetch_distance) {
143 if (info->gfx_level >= GFX11)
144 size = align(size + prefetch_distance * 64, 128);
145 else
146 size = align(size + prefetch_distance * 64, 64);
147 }
148
149 return size;
150 }
151