xref: /aosp_15_r20/external/mesa3d/src/compiler/nir/nir_lower_image_atomics_to_global.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2023 Valve Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "util/format/u_format.h"
7 #include "nir_builder.h"
8 
9 /*
10  * If shader images are uncompressed, dedicated image atomics are unnecessary.
11  * Instead, there may be a "load texel address" instruction that does all the
12  * addressing math, and then regular global atomics may be used with the
13  * calculated address. This pass lowers image atomics to image_texel_address +
14  * global atomics.
15  */
16 
17 static bool
lower(nir_builder * b,nir_intrinsic_instr * intr,UNUSED void * _)18 lower(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *_)
19 {
20    nir_intrinsic_op address_op;
21    bool swap;
22 
23 #define CASE(storage)                                                  \
24    case nir_intrinsic_##storage##_atomic:                              \
25    case nir_intrinsic_##storage##_atomic_swap:                         \
26       address_op = nir_intrinsic_##storage##_texel_address;            \
27       swap = intr->intrinsic == nir_intrinsic_##storage##_atomic_swap; \
28       break;
29 
30    switch (intr->intrinsic) {
31       CASE(image)
32       CASE(bindless_image)
33       CASE(image_deref)
34    default:
35       return false;
36    }
37 #undef CASE
38 
39    b->cursor = nir_before_instr(&intr->instr);
40    nir_atomic_op atomic_op = nir_intrinsic_atomic_op(intr);
41    enum pipe_format format = nir_intrinsic_format(intr);
42    unsigned bit_size = intr->def.bit_size;
43 
44    /* Even for "formatless" access, we know the size of the texel accessed,
45     * since it's the size of the atomic. We can use that to synthesize a
46     * compatible format, which is good enough for texel address computations.
47     */
48    if (format == PIPE_FORMAT_NONE) {
49       nir_alu_type type_ = nir_atomic_op_type(atomic_op);
50       enum util_format_type format_type;
51       if (type_ == nir_type_float)
52          format_type = UTIL_FORMAT_TYPE_FLOAT;
53       else if (type_ == nir_type_int)
54          format_type = UTIL_FORMAT_TYPE_SIGNED;
55       else
56          format_type = UTIL_FORMAT_TYPE_UNSIGNED;
57 
58       format = util_format_get_array(format_type, bit_size, 1, false,
59                                      type_ != nir_type_float);
60    }
61 
62    /* Get the relevant texel address */
63    nir_def *address = nir_image_texel_address(
64       b, 64, intr->src[0].ssa, intr->src[1].ssa, intr->src[2].ssa,
65       .image_dim = nir_intrinsic_image_dim(intr),
66       .image_array = nir_intrinsic_image_array(intr),
67       .format = format,
68       .access = nir_intrinsic_access(intr));
69 
70    nir_instr *address_instr = address->parent_instr;
71    nir_intrinsic_instr *address_intr = nir_instr_as_intrinsic(address_instr);
72 
73    address_intr->intrinsic = address_op;
74    if (address_op == nir_intrinsic_image_texel_address) {
75       nir_intrinsic_set_range_base(address_intr,
76                                    nir_intrinsic_range_base(intr));
77    }
78 
79    /* Build the global atomic */
80    nir_def *global;
81    if (swap) {
82       global = nir_global_atomic_swap(b, bit_size, address, intr->src[3].ssa,
83                                       intr->src[4].ssa, .atomic_op = atomic_op);
84    } else {
85       global = nir_global_atomic(b, bit_size, address, intr->src[3].ssa,
86                                  .atomic_op = atomic_op);
87    }
88 
89    /* Replace the image atomic with the global atomic. Remove the image
90     * explicitly because it has side effects so is not DCE'd.
91     */
92    nir_def_rewrite_uses(&intr->def, global);
93    nir_instr_remove(&intr->instr);
94    return true;
95 }
96 
97 bool
nir_lower_image_atomics_to_global(nir_shader * shader)98 nir_lower_image_atomics_to_global(nir_shader *shader)
99 {
100    return nir_shader_intrinsics_pass(shader, lower,
101                                      nir_metadata_control_flow,
102                                      NULL);
103 }
104