xref: /aosp_15_r20/external/mesa3d/src/amd/common/ac_nir_lower_global_access.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2021 Valve Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "ac_nir.h"
8 #include "nir.h"
9 #include "nir_builder.h"
10 
11 static bool
is_u2u64(nir_scalar scalar)12 is_u2u64(nir_scalar scalar)
13 {
14    if (nir_scalar_is_alu(scalar) && nir_scalar_alu_op(scalar) == nir_op_u2u64)
15       return true;
16 
17    if (nir_scalar_is_alu(scalar) && nir_scalar_alu_op(scalar) == nir_op_pack_64_2x32_split) {
18       nir_scalar src1 = nir_scalar_chase_alu_src(scalar, 1);
19       return nir_scalar_is_const(src1) && nir_scalar_as_uint(src1) == 0;
20    }
21 
22    return false;
23 }
24 
25 static nir_def *
try_extract_additions(nir_builder * b,nir_scalar scalar,uint64_t * out_const,nir_def ** out_offset)26 try_extract_additions(nir_builder *b, nir_scalar scalar, uint64_t *out_const,
27                       nir_def **out_offset)
28 {
29    if (!nir_scalar_is_alu(scalar) || nir_scalar_alu_op(scalar) != nir_op_iadd)
30       return NULL;
31 
32    nir_alu_instr *alu = nir_instr_as_alu(scalar.def->parent_instr);
33    nir_scalar src0 = nir_scalar_chase_alu_src(scalar, 0);
34    nir_scalar src1 = nir_scalar_chase_alu_src(scalar, 1);
35 
36    for (unsigned i = 0; i < 2; ++i) {
37       nir_scalar src = i ? src1 : src0;
38       if (nir_scalar_is_const(src)) {
39          *out_const += nir_scalar_as_uint(src);
40       } else if (is_u2u64(src)) {
41          nir_scalar offset_scalar = nir_scalar_chase_alu_src(src, 0);
42          if (offset_scalar.def->bit_size != 32)
43             continue;
44 
45          nir_def *offset = nir_channel(b, offset_scalar.def, offset_scalar.comp);
46          if (*out_offset)
47             *out_offset = nir_iadd(b, *out_offset, offset);
48          else
49             *out_offset = offset;
50       } else {
51          continue;
52       }
53 
54       nir_def *replace_src =
55          try_extract_additions(b, i == 1 ? src0 : src1, out_const, out_offset);
56       return replace_src ? replace_src : nir_ssa_for_alu_src(b, alu, 1 - i);
57    }
58 
59    nir_def *replace_src0 = try_extract_additions(b, src0, out_const, out_offset);
60    nir_def *replace_src1 = try_extract_additions(b, src1, out_const, out_offset);
61    if (!replace_src0 && !replace_src1)
62       return NULL;
63 
64    replace_src0 = replace_src0 ? replace_src0 : nir_channel(b, src0.def, src0.comp);
65    replace_src1 = replace_src1 ? replace_src1 : nir_channel(b, src1.def, src1.comp);
66    return nir_iadd(b, replace_src0, replace_src1);
67 }
68 
69 static bool
process_instr(nir_builder * b,nir_intrinsic_instr * intrin,void * _)70 process_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *_)
71 {
72    nir_intrinsic_op op;
73    switch (intrin->intrinsic) {
74    case nir_intrinsic_load_global:
75    case nir_intrinsic_load_global_constant:
76       op = nir_intrinsic_load_global_amd;
77       break;
78    case nir_intrinsic_global_atomic:
79       op = nir_intrinsic_global_atomic_amd;
80       break;
81     case nir_intrinsic_global_atomic_swap:
82       op = nir_intrinsic_global_atomic_swap_amd;
83       break;
84    case nir_intrinsic_store_global:
85       op = nir_intrinsic_store_global_amd;
86       break;
87    default:
88       return false;
89    }
90    unsigned addr_src_idx = op == nir_intrinsic_store_global_amd ? 1 : 0;
91 
92    nir_src *addr_src = &intrin->src[addr_src_idx];
93 
94    uint64_t off_const = 0;
95    nir_def *offset = NULL;
96    nir_scalar src = {addr_src->ssa, 0};
97    b->cursor = nir_after_instr(addr_src->ssa->parent_instr);
98    nir_def *addr = try_extract_additions(b, src, &off_const, &offset);
99    addr = addr ? addr : addr_src->ssa;
100 
101    b->cursor = nir_before_instr(&intrin->instr);
102 
103    if (off_const > UINT32_MAX) {
104       addr = nir_iadd_imm(b, addr, off_const);
105       off_const = 0;
106    }
107 
108    nir_intrinsic_instr *new_intrin = nir_intrinsic_instr_create(b->shader, op);
109 
110    new_intrin->num_components = intrin->num_components;
111 
112    if (op != nir_intrinsic_store_global_amd)
113       nir_def_init(&new_intrin->instr, &new_intrin->def,
114                    intrin->def.num_components, intrin->def.bit_size);
115 
116    unsigned num_src = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
117    for (unsigned i = 0; i < num_src; i++)
118       new_intrin->src[i] = nir_src_for_ssa(intrin->src[i].ssa);
119    new_intrin->src[num_src] = nir_src_for_ssa(offset ? offset : nir_imm_zero(b, 1, 32));
120    new_intrin->src[addr_src_idx] = nir_src_for_ssa(addr);
121 
122    if (nir_intrinsic_has_access(intrin))
123       nir_intrinsic_set_access(new_intrin, nir_intrinsic_access(intrin));
124    if (nir_intrinsic_has_align_mul(intrin))
125       nir_intrinsic_set_align_mul(new_intrin, nir_intrinsic_align_mul(intrin));
126    if (nir_intrinsic_has_align_offset(intrin))
127       nir_intrinsic_set_align_offset(new_intrin, nir_intrinsic_align_offset(intrin));
128    if (nir_intrinsic_has_write_mask(intrin))
129       nir_intrinsic_set_write_mask(new_intrin, nir_intrinsic_write_mask(intrin));
130    if (nir_intrinsic_has_atomic_op(intrin))
131       nir_intrinsic_set_atomic_op(new_intrin, nir_intrinsic_atomic_op(intrin));
132    nir_intrinsic_set_base(new_intrin, off_const);
133 
134    nir_builder_instr_insert(b, &new_intrin->instr);
135    if (op != nir_intrinsic_store_global_amd)
136       nir_def_rewrite_uses(&intrin->def, &new_intrin->def);
137    nir_instr_remove(&intrin->instr);
138 
139    return true;
140 }
141 
142 bool
ac_nir_lower_global_access(nir_shader * shader)143 ac_nir_lower_global_access(nir_shader *shader)
144 {
145    return nir_shader_intrinsics_pass(shader, process_instr,
146                                        nir_metadata_control_flow, NULL);
147 }
148