xref: /aosp_15_r20/external/mesa3d/src/amd/common/ac_nir_meta_ps_resolve.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2024 Advanced Micro Devices, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "ac_nir_meta.h"
8 #include "ac_nir_helpers.h"
9 #include "nir_builder.h"
10 #include "compiler/aco_interface.h"
11 
12 static nir_def *
build_tex_load_ms(nir_builder * b,unsigned num_components,unsigned bit_size,nir_deref_instr * tex_deref,nir_def * coord,nir_def * sample_index)13 build_tex_load_ms(nir_builder *b, unsigned num_components, unsigned bit_size,
14                   nir_deref_instr *tex_deref, nir_def *coord, nir_def *sample_index)
15 {
16    nir_tex_src srcs[] = {
17       nir_tex_src_for_ssa(nir_tex_src_coord, coord),
18       nir_tex_src_for_ssa(nir_tex_src_ms_index, sample_index),
19    };
20    nir_def *result = nir_build_tex_deref_instr(b, nir_texop_txf_ms, tex_deref, tex_deref,
21                                                ARRAY_SIZE(srcs), srcs);
22 
23    nir_tex_instr *tex = nir_instr_as_tex(result->parent_instr);
24 
25    assert(bit_size == 32 || bit_size == 16);
26    if (bit_size == 16) {
27       tex->dest_type = nir_type_float16;
28       tex->def.bit_size = 16;
29    }
30    return nir_trim_vector(b, result, num_components);
31 }
32 
33 nir_shader *
ac_create_resolve_ps(const struct ac_ps_resolve_options * options,const union ac_ps_resolve_key * key)34 ac_create_resolve_ps(const struct ac_ps_resolve_options *options,
35                      const union ac_ps_resolve_key *key)
36 {
37    if (options->print_key) {
38       fprintf(stderr, "Internal shader: resolve_ps\n");
39       fprintf(stderr, "   key.use_aco = %u\n", key->use_aco);
40       fprintf(stderr, "   key.src_is_array = %u\n", key->src_is_array);
41       fprintf(stderr, "   key.log_samples = %u\n", key->log_samples);
42       fprintf(stderr, "   key.last_src_channel = %u\n", key->last_src_channel);
43       fprintf(stderr, "   key.x_clamp_to_edge = %u\n", key->x_clamp_to_edge);
44       fprintf(stderr, "   key.y_clamp_to_edge = %u\n", key->y_clamp_to_edge);
45       fprintf(stderr, "   key.d16 = %u\n", key->d16);
46       fprintf(stderr, "   key.a16 = %u\n", key->a16);
47       fprintf(stderr, "\n");
48    }
49 
50    nir_builder b =
51       nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options->nir_options, "ac_resolve_ps");
52    b.shader->info.use_aco_amd = options->use_aco ||
53                                 (key->use_aco && aco_is_gpu_supported(options->info));
54    BITSET_SET(b.shader->info.textures_used, 1);
55 
56    const struct glsl_type *sampler_type =
57       glsl_sampler_type(GLSL_SAMPLER_DIM_MS, /*shadow*/ false, /*is_array*/ key->src_is_array,
58                         GLSL_TYPE_FLOAT);
59    nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "samp0");
60    sampler->data.binding = 0;
61 
62    nir_deref_instr *deref = nir_build_deref_var(&b, sampler);
63    nir_def *zero = nir_imm_int(&b, 0);
64    nir_def *baryc = nir_load_barycentric_pixel(&b, 32, .interp_mode = INTERP_MODE_SMOOTH);
65    nir_def *coord = nir_load_interpolated_input(&b, 2 + key->src_is_array, 32, baryc, zero,
66                                                 .dest_type = nir_type_float32,
67                                                 .io_semantics = (nir_io_semantics){
68                                                                 .location = VARYING_SLOT_VAR0,
69                                                                 .num_slots = 1});
70 
71    /* Nearest filtering floors and then converts to integer, and then
72     * applies clamp to edge as clamp(coord, 0, dim - 1).
73     */
74    coord = nir_vector_insert_imm(&b, coord, nir_ffloor(&b, nir_channel(&b, coord, 0)), 0);
75    coord = nir_vector_insert_imm(&b, coord, nir_ffloor(&b, nir_channel(&b, coord, 1)), 1);
76    coord = nir_f2iN(&b, coord, key->a16 ? 16 : 32);
77 
78    /* Clamp to edge only for X and Y because Z can't be out of bounds. */
79    nir_def *resinfo = NULL;
80    for (unsigned chan = 0; chan < 2; chan++) {
81       if (chan ? key->y_clamp_to_edge : key->x_clamp_to_edge) {
82          if (!resinfo) {
83             resinfo = nir_build_tex_deref_instr(&b, nir_texop_txs, deref, deref, 0, NULL);
84 
85             if (key->a16) {
86                resinfo = nir_umin_imm(&b, resinfo, INT16_MAX);
87                resinfo = nir_i2i16(&b, resinfo);
88             }
89          }
90 
91          nir_def *tmp = nir_channel(&b, coord, chan);
92          tmp = nir_imax_imm(&b, tmp, 0);
93          tmp = nir_imin(&b, tmp, nir_iadd_imm(&b, nir_channel(&b, resinfo, chan), -1));
94          coord = nir_vector_insert_imm(&b, coord, tmp, chan);
95       }
96    }
97 
98    /* Use samples_identical if it's supported. */
99    bool uses_samples_identical = options->info->gfx_level < GFX11 && !options->no_fmask;
100    nir_def *sample0 = NULL;
101    nir_if *if_identical = NULL;
102 
103    assert(key->last_src_channel <= key->last_dst_channel);
104 
105    if (uses_samples_identical) {
106       nir_tex_src iden_srcs[] = {
107          nir_tex_src_for_ssa(nir_tex_src_coord, coord),
108       };
109       nir_def *samples_identical =
110          nir_build_tex_deref_instr(&b, nir_texop_samples_identical, deref, deref,
111                                    ARRAY_SIZE(iden_srcs), iden_srcs);
112 
113       /* If all samples are identical, load only sample 0. */
114       if_identical = nir_push_if(&b, samples_identical);
115       {
116          sample0 = build_tex_load_ms(&b, key->last_src_channel + 1, key->d16 ? 16 : 32,
117                                      deref, coord, nir_imm_intN_t(&b, 0, coord->bit_size));
118       }
119       nir_push_else(&b, if_identical);
120    }
121 
122    /* Insert the sample index into the coordinates. */
123    unsigned num_src_coords = 2 + key->src_is_array + 1;
124    unsigned num_samples = 1 << key->log_samples;
125    nir_def *coord_src[16] = {0};
126 
127    for (unsigned i = 0; i < num_samples; i++) {
128       coord_src[i] = nir_pad_vector(&b, coord, num_src_coords);
129       coord_src[i] = nir_vector_insert_imm(&b, coord_src[i],
130                                            nir_imm_intN_t(&b, i, coord->bit_size),
131                                            num_src_coords - 1);
132    }
133 
134    /* We need this because LLVM interleaves coordinate computations with image loads, which breaks
135     * VMEM clauses.
136     */
137    ac_optimization_barrier_vgpr_array(options->info, &b, coord_src, num_samples, num_src_coords);
138 
139    nir_def *samples[16] = {0};
140    for (unsigned i = 0; i < num_samples; i++) {
141       samples[i] = build_tex_load_ms(&b, key->last_src_channel + 1, key->d16 ? 16 : 32,
142                                      deref, nir_trim_vector(&b, coord_src[i], num_src_coords - 1),
143                                      nir_channel(&b, coord_src[i], num_src_coords - 1));
144    }
145    nir_def *result = ac_average_samples(&b, samples, num_samples);
146 
147    if (uses_samples_identical) {
148       nir_pop_if(&b, if_identical);
149       result = nir_if_phi(&b, sample0, result);
150    }
151 
152    result = nir_pad_vector(&b, result, key->last_dst_channel + 1);
153    for (unsigned i = key->last_src_channel + 1; i <= key->last_dst_channel; i++) {
154       result = nir_vector_insert_imm(&b, result,
155                                      nir_imm_floatN_t(&b, i == 3 ? 1 : 0, result->bit_size), i);
156    }
157 
158    nir_store_output(&b, result, zero,
159                     .write_mask = BITFIELD_MASK(key->last_dst_channel + 1),
160                     .src_type = key->d16 ? nir_type_float16 : nir_type_float32,
161                     .io_semantics = (nir_io_semantics){
162                                     .location = FRAG_RESULT_DATA0,
163                                     .num_slots = 1});
164 
165    return b.shader;
166 }
167