xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/r600/cayman_msaa.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  * Authors: Marek Olšák <[email protected]>
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "r600_cs.h"
8 #include "evergreend.h"
9 
10 /* 2xMSAA
11  * There are two locations (4, 4), (-4, -4). */
12 const uint32_t eg_sample_locs_2x[4] = {
13 	FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
14 	FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
15 	FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
16 	FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
17 };
18 const unsigned eg_max_dist_2x = 4;
19 /* 4xMSAA
20  * There are 4 locations: (-2, 6), (6, -2), (-6, 2), (2, 6). */
21 const uint32_t eg_sample_locs_4x[4] = {
22 	FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
23 	FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
24 	FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
25 	FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
26 };
27 const unsigned eg_max_dist_4x = 6;
28 
29 /* Cayman 8xMSAA */
30 static const uint32_t cm_sample_locs_8x[] = {
31 	FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
32 	FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
33 	FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
34 	FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
35 	FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
36 	FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
37 	FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
38 	FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
39 };
40 static const unsigned cm_max_dist_8x = 8;
41 /* Cayman 16xMSAA */
42 static const uint32_t cm_sample_locs_16x[] = {
43 	FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
44 	FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
45 	FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
46 	FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
47 	FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
48 	FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
49 	FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
50 	FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
51 	FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
52 	FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
53 	FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
54 	FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
55 	FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
56 	FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
57 	FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
58 	FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
59 };
60 static const unsigned cm_max_dist_16x = 8;
61 
cayman_get_sample_position(struct pipe_context * ctx,unsigned sample_count,unsigned sample_index,float * out_value)62 void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
63 				unsigned sample_index, float *out_value)
64 {
65 	int offset, index;
66 	struct {
67 		int idx:4;
68 	} val;
69 	switch (sample_count) {
70 	case 1:
71 	default:
72 		out_value[0] = out_value[1] = 0.5;
73 		break;
74 	case 2:
75 		offset = 4 * (sample_index * 2);
76 		val.idx = (eg_sample_locs_2x[0] >> offset) & 0xf;
77 		out_value[0] = (float)(val.idx + 8) / 16.0f;
78 		val.idx = (eg_sample_locs_2x[0] >> (offset + 4)) & 0xf;
79 		out_value[1] = (float)(val.idx + 8) / 16.0f;
80 		break;
81 	case 4:
82 		offset = 4 * (sample_index * 2);
83 		val.idx = (eg_sample_locs_4x[0] >> offset) & 0xf;
84 		out_value[0] = (float)(val.idx + 8) / 16.0f;
85 		val.idx = (eg_sample_locs_4x[0] >> (offset + 4)) & 0xf;
86 		out_value[1] = (float)(val.idx + 8) / 16.0f;
87 		break;
88 	case 8:
89 		offset = 4 * (sample_index % 4 * 2);
90 		index = (sample_index / 4) * 4;
91 		val.idx = (cm_sample_locs_8x[index] >> offset) & 0xf;
92 		out_value[0] = (float)(val.idx + 8) / 16.0f;
93 		val.idx = (cm_sample_locs_8x[index] >> (offset + 4)) & 0xf;
94 		out_value[1] = (float)(val.idx + 8) / 16.0f;
95 		break;
96 	case 16:
97 		offset = 4 * (sample_index % 4 * 2);
98 		index = (sample_index / 4) * 4;
99 		val.idx = (cm_sample_locs_16x[index] >> offset) & 0xf;
100 		out_value[0] = (float)(val.idx + 8) / 16.0f;
101 		val.idx = (cm_sample_locs_16x[index] >> (offset + 4)) & 0xf;
102 		out_value[1] = (float)(val.idx + 8) / 16.0f;
103 		break;
104 	}
105 }
106 
cayman_init_msaa(struct pipe_context * ctx)107 void cayman_init_msaa(struct pipe_context *ctx)
108 {
109 	struct r600_common_context *rctx = (struct r600_common_context*)ctx;
110 	int i;
111 
112 	cayman_get_sample_position(ctx, 1, 0, rctx->sample_locations_1x[0]);
113 
114 	for (i = 0; i < 2; i++)
115 		cayman_get_sample_position(ctx, 2, i, rctx->sample_locations_2x[i]);
116 	for (i = 0; i < 4; i++)
117 		cayman_get_sample_position(ctx, 4, i, rctx->sample_locations_4x[i]);
118 	for (i = 0; i < 8; i++)
119 		cayman_get_sample_position(ctx, 8, i, rctx->sample_locations_8x[i]);
120 	for (i = 0; i < 16; i++)
121 		cayman_get_sample_position(ctx, 16, i, rctx->sample_locations_16x[i]);
122 }
123 
cayman_emit_msaa_sample_locs(struct radeon_cmdbuf * cs,int nr_samples)124 static void cayman_emit_msaa_sample_locs(struct radeon_cmdbuf *cs, int nr_samples)
125 {
126 	switch (nr_samples) {
127 	default:
128 	case 1:
129 		radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 0);
130 		radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 0);
131 		radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 0);
132 		radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 0);
133 		break;
134 	case 2:
135 		radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]);
136 		radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]);
137 		radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_2x[2]);
138 		radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_2x[3]);
139 		break;
140 	case 4:
141 		radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_4x[0]);
142 		radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_4x[1]);
143 		radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_4x[2]);
144 		radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_4x[3]);
145 		break;
146 	case 8:
147 		radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14);
148 		radeon_emit(cs, cm_sample_locs_8x[0]);
149 		radeon_emit(cs, cm_sample_locs_8x[4]);
150 		radeon_emit(cs, 0);
151 		radeon_emit(cs, 0);
152 		radeon_emit(cs, cm_sample_locs_8x[1]);
153 		radeon_emit(cs, cm_sample_locs_8x[5]);
154 		radeon_emit(cs, 0);
155 		radeon_emit(cs, 0);
156 		radeon_emit(cs, cm_sample_locs_8x[2]);
157 		radeon_emit(cs, cm_sample_locs_8x[6]);
158 		radeon_emit(cs, 0);
159 		radeon_emit(cs, 0);
160 		radeon_emit(cs, cm_sample_locs_8x[3]);
161 		radeon_emit(cs, cm_sample_locs_8x[7]);
162 		break;
163 	case 16:
164 		radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 16);
165 		radeon_emit(cs, cm_sample_locs_16x[0]);
166 		radeon_emit(cs, cm_sample_locs_16x[4]);
167 		radeon_emit(cs, cm_sample_locs_16x[8]);
168 		radeon_emit(cs, cm_sample_locs_16x[12]);
169 		radeon_emit(cs, cm_sample_locs_16x[1]);
170 		radeon_emit(cs, cm_sample_locs_16x[5]);
171 		radeon_emit(cs, cm_sample_locs_16x[9]);
172 		radeon_emit(cs, cm_sample_locs_16x[13]);
173 		radeon_emit(cs, cm_sample_locs_16x[2]);
174 		radeon_emit(cs, cm_sample_locs_16x[6]);
175 		radeon_emit(cs, cm_sample_locs_16x[10]);
176 		radeon_emit(cs, cm_sample_locs_16x[14]);
177 		radeon_emit(cs, cm_sample_locs_16x[3]);
178 		radeon_emit(cs, cm_sample_locs_16x[7]);
179 		radeon_emit(cs, cm_sample_locs_16x[11]);
180 		radeon_emit(cs, cm_sample_locs_16x[15]);
181 		break;
182 	}
183 }
184 
cayman_emit_msaa_state(struct radeon_cmdbuf * cs,int nr_samples,int ps_iter_samples,int overrast_samples)185 void cayman_emit_msaa_state(struct radeon_cmdbuf *cs, int nr_samples,
186 			    int ps_iter_samples, int overrast_samples)
187 {
188 	int setup_samples = nr_samples > 1 ? nr_samples :
189 			    overrast_samples > 1 ? overrast_samples : 0;
190 	/* Required by OpenGL line rasterization.
191 	 *
192 	 * TODO: We should also enable perpendicular endcaps for AA lines,
193 	 *       but that requires implementing line stippling in the pixel
194 	 *       shader. SC can only do line stippling with axis-aligned
195 	 *       endcaps.
196 	 */
197 	unsigned sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1);
198 	unsigned sc_mode_cntl_1 =
199 		EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
200 		EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1);
201 
202 	if (nr_samples > 1) {
203 		cayman_emit_msaa_sample_locs(cs, nr_samples);
204 	}
205 
206 	if (setup_samples > 1) {
207 		/* indexed by log2(nr_samples) */
208 		const unsigned max_dist[] = {
209 			0,
210 			eg_max_dist_2x,
211 			eg_max_dist_4x,
212 			cm_max_dist_8x,
213 			cm_max_dist_16x
214 		};
215 		unsigned log_samples = util_logbase2(setup_samples);
216 		unsigned log_ps_iter_samples =
217 			util_logbase2(util_next_power_of_two(ps_iter_samples));
218 
219 		radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
220 		radeon_emit(cs, sc_line_cntl |
221 			    S_028BDC_EXPAND_LINE_WIDTH(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */
222 		radeon_emit(cs, S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
223 			    S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |
224 			    S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); /* CM_R_028BE0_PA_SC_AA_CONFIG */
225 
226 		if (nr_samples > 1) {
227 			radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
228 					       S_028804_MAX_ANCHOR_SAMPLES(log_samples) |
229 					       S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
230 					       S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
231 					       S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) |
232 					       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
233 					       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
234 			radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
235 					       EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1) |
236 					       sc_mode_cntl_1);
237 		} else if (overrast_samples > 1) {
238 			radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
239 					       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
240 					       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) |
241 					       S_028804_OVERRASTERIZATION_AMOUNT(log_samples));
242 			radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
243 					       sc_mode_cntl_1);
244 		}
245 	} else {
246 		radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
247 		radeon_emit(cs, sc_line_cntl); /* CM_R_028BDC_PA_SC_LINE_CNTL */
248 		radeon_emit(cs, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */
249 
250 		radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
251 				       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
252 				       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
253 		radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
254 				       sc_mode_cntl_1);
255 	}
256 }
257