xref: /aosp_15_r20/external/mesa3d/src/broadcom/compiler/v3d_tex.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker  * Copyright © 2016-2018 Broadcom
3*61046927SAndroid Build Coastguard Worker  *
4*61046927SAndroid Build Coastguard Worker  * Permission is hereby granted, free of charge, to any person obtaining a
5*61046927SAndroid Build Coastguard Worker  * copy of this software and associated documentation files (the "Software"),
6*61046927SAndroid Build Coastguard Worker  * to deal in the Software without restriction, including without limitation
7*61046927SAndroid Build Coastguard Worker  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8*61046927SAndroid Build Coastguard Worker  * and/or sell copies of the Software, and to permit persons to whom the
9*61046927SAndroid Build Coastguard Worker  * Software is furnished to do so, subject to the following conditions:
10*61046927SAndroid Build Coastguard Worker  *
11*61046927SAndroid Build Coastguard Worker  * The above copyright notice and this permission notice (including the next
12*61046927SAndroid Build Coastguard Worker  * paragraph) shall be included in all copies or substantial portions of the
13*61046927SAndroid Build Coastguard Worker  * Software.
14*61046927SAndroid Build Coastguard Worker  *
15*61046927SAndroid Build Coastguard Worker  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16*61046927SAndroid Build Coastguard Worker  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17*61046927SAndroid Build Coastguard Worker  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18*61046927SAndroid Build Coastguard Worker  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19*61046927SAndroid Build Coastguard Worker  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20*61046927SAndroid Build Coastguard Worker  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21*61046927SAndroid Build Coastguard Worker  * IN THE SOFTWARE.
22*61046927SAndroid Build Coastguard Worker  */
23*61046927SAndroid Build Coastguard Worker 
24*61046927SAndroid Build Coastguard Worker #include "v3d_compiler.h"
25*61046927SAndroid Build Coastguard Worker 
26*61046927SAndroid Build Coastguard Worker /* We don't do any address packing. */
27*61046927SAndroid Build Coastguard Worker #define __gen_user_data void
28*61046927SAndroid Build Coastguard Worker #define __gen_address_type uint32_t
29*61046927SAndroid Build Coastguard Worker #define __gen_address_offset(reloc) (*reloc)
30*61046927SAndroid Build Coastguard Worker #define __gen_emit_reloc(cl, reloc)
31*61046927SAndroid Build Coastguard Worker #include "cle/v3d_packet_v42_pack.h"
32*61046927SAndroid Build Coastguard Worker 
33*61046927SAndroid Build Coastguard Worker static inline struct qinst *
vir_TMU_WRITE(struct v3d_compile * c,enum v3d_qpu_waddr waddr,struct qreg val)34*61046927SAndroid Build Coastguard Worker vir_TMU_WRITE(struct v3d_compile *c, enum v3d_qpu_waddr waddr, struct qreg val)
35*61046927SAndroid Build Coastguard Worker {
36*61046927SAndroid Build Coastguard Worker         /* XXX perf: We should figure out how to merge ALU operations
37*61046927SAndroid Build Coastguard Worker          * producing the val with this MOV, when possible.
38*61046927SAndroid Build Coastguard Worker          */
39*61046927SAndroid Build Coastguard Worker         return vir_MOV_dest(c, vir_reg(QFILE_MAGIC, waddr), val);
40*61046927SAndroid Build Coastguard Worker }
41*61046927SAndroid Build Coastguard Worker 
42*61046927SAndroid Build Coastguard Worker static inline struct qinst *
vir_TMU_WRITE_or_count(struct v3d_compile * c,enum v3d_qpu_waddr waddr,struct qreg val,uint32_t * tmu_writes)43*61046927SAndroid Build Coastguard Worker vir_TMU_WRITE_or_count(struct v3d_compile *c,
44*61046927SAndroid Build Coastguard Worker                        enum v3d_qpu_waddr waddr,
45*61046927SAndroid Build Coastguard Worker                        struct qreg val,
46*61046927SAndroid Build Coastguard Worker                        uint32_t *tmu_writes)
47*61046927SAndroid Build Coastguard Worker {
48*61046927SAndroid Build Coastguard Worker         if (tmu_writes) {
49*61046927SAndroid Build Coastguard Worker                 (*tmu_writes)++;
50*61046927SAndroid Build Coastguard Worker                 return NULL;
51*61046927SAndroid Build Coastguard Worker         } else {
52*61046927SAndroid Build Coastguard Worker                 return vir_TMU_WRITE(c, waddr, val);
53*61046927SAndroid Build Coastguard Worker         }
54*61046927SAndroid Build Coastguard Worker }
55*61046927SAndroid Build Coastguard Worker 
56*61046927SAndroid Build Coastguard Worker static void
vir_WRTMUC(struct v3d_compile * c,enum quniform_contents contents,uint32_t data)57*61046927SAndroid Build Coastguard Worker vir_WRTMUC(struct v3d_compile *c, enum quniform_contents contents, uint32_t data)
58*61046927SAndroid Build Coastguard Worker {
59*61046927SAndroid Build Coastguard Worker         struct qinst *inst = vir_NOP(c);
60*61046927SAndroid Build Coastguard Worker         inst->qpu.sig.wrtmuc = true;
61*61046927SAndroid Build Coastguard Worker         inst->uniform = vir_get_uniform_index(c, contents, data);
62*61046927SAndroid Build Coastguard Worker }
63*61046927SAndroid Build Coastguard Worker 
64*61046927SAndroid Build Coastguard Worker static const struct V3D42_TMU_CONFIG_PARAMETER_1 p1_unpacked_default = {
65*61046927SAndroid Build Coastguard Worker         .per_pixel_mask_enable = true,
66*61046927SAndroid Build Coastguard Worker };
67*61046927SAndroid Build Coastguard Worker 
68*61046927SAndroid Build Coastguard Worker static const struct V3D42_TMU_CONFIG_PARAMETER_2 p2_unpacked_default = {
69*61046927SAndroid Build Coastguard Worker         .op = V3D_TMU_OP_REGULAR,
70*61046927SAndroid Build Coastguard Worker };
71*61046927SAndroid Build Coastguard Worker 
72*61046927SAndroid Build Coastguard Worker /**
73*61046927SAndroid Build Coastguard Worker  * If 'tmu_writes' is not NULL, then it just counts required register writes,
74*61046927SAndroid Build Coastguard Worker  * otherwise, it emits the actual register writes.
75*61046927SAndroid Build Coastguard Worker  *
76*61046927SAndroid Build Coastguard Worker  * It is important to notice that emitting register writes for the current
77*61046927SAndroid Build Coastguard Worker  * TMU operation may trigger a TMU flush, since it is possible that any
78*61046927SAndroid Build Coastguard Worker  * of the inputs required for the register writes is the result of a pending
79*61046927SAndroid Build Coastguard Worker  * TMU operation. If that happens we need to make sure that it doesn't happen
80*61046927SAndroid Build Coastguard Worker  * in the middle of the TMU register writes for the current TMU operation,
81*61046927SAndroid Build Coastguard Worker  * which is why we always call ntq_get_src() even if we are only interested in
82*61046927SAndroid Build Coastguard Worker  * register write counts.
83*61046927SAndroid Build Coastguard Worker  */
84*61046927SAndroid Build Coastguard Worker static void
handle_tex_src(struct v3d_compile * c,nir_tex_instr * instr,unsigned src_idx,unsigned non_array_components,struct V3D42_TMU_CONFIG_PARAMETER_2 * p2_unpacked,struct qreg * s_out,unsigned * tmu_writes)85*61046927SAndroid Build Coastguard Worker handle_tex_src(struct v3d_compile *c,
86*61046927SAndroid Build Coastguard Worker                nir_tex_instr *instr,
87*61046927SAndroid Build Coastguard Worker                unsigned src_idx,
88*61046927SAndroid Build Coastguard Worker                unsigned non_array_components,
89*61046927SAndroid Build Coastguard Worker                struct V3D42_TMU_CONFIG_PARAMETER_2 *p2_unpacked,
90*61046927SAndroid Build Coastguard Worker                struct qreg *s_out,
91*61046927SAndroid Build Coastguard Worker                unsigned *tmu_writes)
92*61046927SAndroid Build Coastguard Worker {
93*61046927SAndroid Build Coastguard Worker         /* Either we are calling this just to count required TMU writes, or we
94*61046927SAndroid Build Coastguard Worker          * are calling this to emit the actual TMU writes.
95*61046927SAndroid Build Coastguard Worker          */
96*61046927SAndroid Build Coastguard Worker         assert(tmu_writes || (s_out && p2_unpacked));
97*61046927SAndroid Build Coastguard Worker 
98*61046927SAndroid Build Coastguard Worker         struct qreg s;
99*61046927SAndroid Build Coastguard Worker         switch (instr->src[src_idx].src_type) {
100*61046927SAndroid Build Coastguard Worker         case nir_tex_src_coord:
101*61046927SAndroid Build Coastguard Worker                 /* S triggers the lookup, so save it for the end. */
102*61046927SAndroid Build Coastguard Worker                 s = ntq_get_src(c, instr->src[src_idx].src, 0);
103*61046927SAndroid Build Coastguard Worker                 if (tmu_writes)
104*61046927SAndroid Build Coastguard Worker                         (*tmu_writes)++;
105*61046927SAndroid Build Coastguard Worker                 else
106*61046927SAndroid Build Coastguard Worker                         *s_out = s;
107*61046927SAndroid Build Coastguard Worker 
108*61046927SAndroid Build Coastguard Worker                 if (non_array_components > 1) {
109*61046927SAndroid Build Coastguard Worker                         struct qreg src =
110*61046927SAndroid Build Coastguard Worker                                 ntq_get_src(c, instr->src[src_idx].src, 1);
111*61046927SAndroid Build Coastguard Worker                         vir_TMU_WRITE_or_count(c, V3D_QPU_WADDR_TMUT, src,
112*61046927SAndroid Build Coastguard Worker                                                 tmu_writes);
113*61046927SAndroid Build Coastguard Worker                 }
114*61046927SAndroid Build Coastguard Worker 
115*61046927SAndroid Build Coastguard Worker                 if (non_array_components > 2) {
116*61046927SAndroid Build Coastguard Worker                         struct qreg src =
117*61046927SAndroid Build Coastguard Worker                                 ntq_get_src(c, instr->src[src_idx].src, 2);
118*61046927SAndroid Build Coastguard Worker                         vir_TMU_WRITE_or_count(c, V3D_QPU_WADDR_TMUR, src,
119*61046927SAndroid Build Coastguard Worker                                                tmu_writes);
120*61046927SAndroid Build Coastguard Worker                 }
121*61046927SAndroid Build Coastguard Worker 
122*61046927SAndroid Build Coastguard Worker                 if (instr->is_array) {
123*61046927SAndroid Build Coastguard Worker                         struct qreg src =
124*61046927SAndroid Build Coastguard Worker                                 ntq_get_src(c, instr->src[src_idx].src,
125*61046927SAndroid Build Coastguard Worker                                             instr->coord_components - 1);
126*61046927SAndroid Build Coastguard Worker                         vir_TMU_WRITE_or_count(c, V3D_QPU_WADDR_TMUI, src,
127*61046927SAndroid Build Coastguard Worker                                                tmu_writes);
128*61046927SAndroid Build Coastguard Worker                 }
129*61046927SAndroid Build Coastguard Worker                 break;
130*61046927SAndroid Build Coastguard Worker 
131*61046927SAndroid Build Coastguard Worker         case nir_tex_src_bias: {
132*61046927SAndroid Build Coastguard Worker                 struct qreg src = ntq_get_src(c, instr->src[src_idx].src, 0);
133*61046927SAndroid Build Coastguard Worker                 vir_TMU_WRITE_or_count(c, V3D_QPU_WADDR_TMUB, src, tmu_writes);
134*61046927SAndroid Build Coastguard Worker                 break;
135*61046927SAndroid Build Coastguard Worker         }
136*61046927SAndroid Build Coastguard Worker 
137*61046927SAndroid Build Coastguard Worker         case nir_tex_src_lod: {
138*61046927SAndroid Build Coastguard Worker                 struct qreg src = ntq_get_src(c, instr->src[src_idx].src, 0);
139*61046927SAndroid Build Coastguard Worker                 vir_TMU_WRITE_or_count(c, V3D_QPU_WADDR_TMUB, src, tmu_writes);
140*61046927SAndroid Build Coastguard Worker                 if (!tmu_writes) {
141*61046927SAndroid Build Coastguard Worker                         /* With texel fetch automatic LOD is already disabled,
142*61046927SAndroid Build Coastguard Worker                          * and disable_autolod must not be enabled. For
143*61046927SAndroid Build Coastguard Worker                          * non-cubes we can use the register TMUSLOD, that
144*61046927SAndroid Build Coastguard Worker                          * implicitly sets disable_autolod.
145*61046927SAndroid Build Coastguard Worker                          */
146*61046927SAndroid Build Coastguard Worker                         assert(p2_unpacked);
147*61046927SAndroid Build Coastguard Worker                         if (instr->op != nir_texop_txf &&
148*61046927SAndroid Build Coastguard Worker                             instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
149*61046927SAndroid Build Coastguard Worker                                     p2_unpacked->disable_autolod = true;
150*61046927SAndroid Build Coastguard Worker                         }
151*61046927SAndroid Build Coastguard Worker                }
152*61046927SAndroid Build Coastguard Worker                break;
153*61046927SAndroid Build Coastguard Worker         }
154*61046927SAndroid Build Coastguard Worker 
155*61046927SAndroid Build Coastguard Worker         case nir_tex_src_comparator: {
156*61046927SAndroid Build Coastguard Worker                 struct qreg src = ntq_get_src(c, instr->src[src_idx].src, 0);
157*61046927SAndroid Build Coastguard Worker                 vir_TMU_WRITE_or_count(c, V3D_QPU_WADDR_TMUDREF, src, tmu_writes);
158*61046927SAndroid Build Coastguard Worker                 break;
159*61046927SAndroid Build Coastguard Worker         }
160*61046927SAndroid Build Coastguard Worker 
161*61046927SAndroid Build Coastguard Worker         case nir_tex_src_offset: {
162*61046927SAndroid Build Coastguard Worker                 bool is_const_offset = nir_src_is_const(instr->src[src_idx].src);
163*61046927SAndroid Build Coastguard Worker                 if (is_const_offset) {
164*61046927SAndroid Build Coastguard Worker                         if (!tmu_writes) {
165*61046927SAndroid Build Coastguard Worker                                 p2_unpacked->offset_s =
166*61046927SAndroid Build Coastguard Worker                                         nir_src_comp_as_int(instr->src[src_idx].src, 0);
167*61046927SAndroid Build Coastguard Worker                                 if (non_array_components >= 2)
168*61046927SAndroid Build Coastguard Worker                                         p2_unpacked->offset_t =
169*61046927SAndroid Build Coastguard Worker                                                 nir_src_comp_as_int(instr->src[src_idx].src, 1);
170*61046927SAndroid Build Coastguard Worker                                 if (non_array_components >= 3)
171*61046927SAndroid Build Coastguard Worker                                         p2_unpacked->offset_r =
172*61046927SAndroid Build Coastguard Worker                                                 nir_src_comp_as_int(instr->src[src_idx].src, 2);
173*61046927SAndroid Build Coastguard Worker                         }
174*61046927SAndroid Build Coastguard Worker                 } else {
175*61046927SAndroid Build Coastguard Worker                         struct qreg src_0 =
176*61046927SAndroid Build Coastguard Worker                                 ntq_get_src(c, instr->src[src_idx].src, 0);
177*61046927SAndroid Build Coastguard Worker                         struct qreg src_1 =
178*61046927SAndroid Build Coastguard Worker                                 ntq_get_src(c, instr->src[src_idx].src, 1);
179*61046927SAndroid Build Coastguard Worker                         if (!tmu_writes) {
180*61046927SAndroid Build Coastguard Worker                                 struct qreg mask = vir_uniform_ui(c, 0xf);
181*61046927SAndroid Build Coastguard Worker                                 struct qreg x, y, offset;
182*61046927SAndroid Build Coastguard Worker 
183*61046927SAndroid Build Coastguard Worker                                 x = vir_AND(c, src_0, mask);
184*61046927SAndroid Build Coastguard Worker                                 y = vir_AND(c, src_1, mask);
185*61046927SAndroid Build Coastguard Worker                                 offset = vir_OR(c, x,
186*61046927SAndroid Build Coastguard Worker                                                 vir_SHL(c, y, vir_uniform_ui(c, 4)));
187*61046927SAndroid Build Coastguard Worker 
188*61046927SAndroid Build Coastguard Worker                                 vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUOFF, offset);
189*61046927SAndroid Build Coastguard Worker                         } else {
190*61046927SAndroid Build Coastguard Worker                                 (*tmu_writes)++;
191*61046927SAndroid Build Coastguard Worker                         }
192*61046927SAndroid Build Coastguard Worker                 }
193*61046927SAndroid Build Coastguard Worker                 break;
194*61046927SAndroid Build Coastguard Worker         }
195*61046927SAndroid Build Coastguard Worker 
196*61046927SAndroid Build Coastguard Worker         default:
197*61046927SAndroid Build Coastguard Worker                 unreachable("unknown texture source");
198*61046927SAndroid Build Coastguard Worker         }
199*61046927SAndroid Build Coastguard Worker }
200*61046927SAndroid Build Coastguard Worker 
201*61046927SAndroid Build Coastguard Worker static void
vir_tex_handle_srcs(struct v3d_compile * c,nir_tex_instr * instr,struct V3D42_TMU_CONFIG_PARAMETER_2 * p2_unpacked,struct qreg * s,unsigned * tmu_writes)202*61046927SAndroid Build Coastguard Worker vir_tex_handle_srcs(struct v3d_compile *c,
203*61046927SAndroid Build Coastguard Worker                     nir_tex_instr *instr,
204*61046927SAndroid Build Coastguard Worker                     struct V3D42_TMU_CONFIG_PARAMETER_2 *p2_unpacked,
205*61046927SAndroid Build Coastguard Worker                     struct qreg *s,
206*61046927SAndroid Build Coastguard Worker                     unsigned *tmu_writes)
207*61046927SAndroid Build Coastguard Worker {
208*61046927SAndroid Build Coastguard Worker         unsigned non_array_components = instr->op != nir_texop_lod ?
209*61046927SAndroid Build Coastguard Worker                 instr->coord_components - instr->is_array :
210*61046927SAndroid Build Coastguard Worker                 instr->coord_components;
211*61046927SAndroid Build Coastguard Worker 
212*61046927SAndroid Build Coastguard Worker         for (unsigned i = 0; i < instr->num_srcs; i++) {
213*61046927SAndroid Build Coastguard Worker                 handle_tex_src(c, instr, i, non_array_components,
214*61046927SAndroid Build Coastguard Worker                                p2_unpacked, s, tmu_writes);
215*61046927SAndroid Build Coastguard Worker         }
216*61046927SAndroid Build Coastguard Worker }
217*61046927SAndroid Build Coastguard Worker 
218*61046927SAndroid Build Coastguard Worker static unsigned
get_required_tex_tmu_writes(struct v3d_compile * c,nir_tex_instr * instr)219*61046927SAndroid Build Coastguard Worker get_required_tex_tmu_writes(struct v3d_compile *c, nir_tex_instr *instr)
220*61046927SAndroid Build Coastguard Worker {
221*61046927SAndroid Build Coastguard Worker         unsigned tmu_writes = 0;
222*61046927SAndroid Build Coastguard Worker         vir_tex_handle_srcs(c, instr, NULL, NULL, &tmu_writes);
223*61046927SAndroid Build Coastguard Worker         return tmu_writes;
224*61046927SAndroid Build Coastguard Worker }
225*61046927SAndroid Build Coastguard Worker 
226*61046927SAndroid Build Coastguard Worker void
v3d_vir_emit_tex(struct v3d_compile * c,nir_tex_instr * instr)227*61046927SAndroid Build Coastguard Worker v3d_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
228*61046927SAndroid Build Coastguard Worker {
229*61046927SAndroid Build Coastguard Worker         unsigned texture_idx = instr->texture_index;
230*61046927SAndroid Build Coastguard Worker 
231*61046927SAndroid Build Coastguard Worker         /* For instructions that don't have a sampler (i.e. txf) we bind
232*61046927SAndroid Build Coastguard Worker          * default sampler state via the backend_flags to handle precision.
233*61046927SAndroid Build Coastguard Worker          */
234*61046927SAndroid Build Coastguard Worker         unsigned sampler_idx = nir_tex_instr_need_sampler(instr) ?
235*61046927SAndroid Build Coastguard Worker                                instr->sampler_index : instr->backend_flags;
236*61046927SAndroid Build Coastguard Worker 
237*61046927SAndroid Build Coastguard Worker         /* Even if the texture operation doesn't need a sampler by
238*61046927SAndroid Build Coastguard Worker          * itself, we still need to add the sampler configuration
239*61046927SAndroid Build Coastguard Worker          * parameter if the output is 32 bit
240*61046927SAndroid Build Coastguard Worker          */
241*61046927SAndroid Build Coastguard Worker         assert(sampler_idx < c->key->num_samplers_used);
242*61046927SAndroid Build Coastguard Worker         bool output_type_32_bit =
243*61046927SAndroid Build Coastguard Worker                 c->key->sampler[sampler_idx].return_size == 32;
244*61046927SAndroid Build Coastguard Worker 
245*61046927SAndroid Build Coastguard Worker         struct V3D42_TMU_CONFIG_PARAMETER_0 p0_unpacked = {
246*61046927SAndroid Build Coastguard Worker         };
247*61046927SAndroid Build Coastguard Worker 
248*61046927SAndroid Build Coastguard Worker         /* Limit the number of channels returned to both how many the NIR
249*61046927SAndroid Build Coastguard Worker          * instruction writes and how many the instruction could produce.
250*61046927SAndroid Build Coastguard Worker          */
251*61046927SAndroid Build Coastguard Worker         uint32_t components_read = nir_def_components_read(&instr->def);
252*61046927SAndroid Build Coastguard Worker         p0_unpacked.return_words_of_texture_data = output_type_32_bit ?
253*61046927SAndroid Build Coastguard Worker                 (components_read & 0xf): (components_read & 0x3);
254*61046927SAndroid Build Coastguard Worker         assert(p0_unpacked.return_words_of_texture_data != 0);
255*61046927SAndroid Build Coastguard Worker 
256*61046927SAndroid Build Coastguard Worker         struct V3D42_TMU_CONFIG_PARAMETER_2 p2_unpacked = {
257*61046927SAndroid Build Coastguard Worker                 .op = V3D_TMU_OP_REGULAR,
258*61046927SAndroid Build Coastguard Worker                 .gather_mode = instr->op == nir_texop_tg4,
259*61046927SAndroid Build Coastguard Worker                 .gather_component = instr->component,
260*61046927SAndroid Build Coastguard Worker                 .coefficient_mode = instr->op == nir_texop_txd,
261*61046927SAndroid Build Coastguard Worker                 .disable_autolod = instr->op == nir_texop_tg4,
262*61046927SAndroid Build Coastguard Worker                 .lod_query = instr->op == nir_texop_lod,
263*61046927SAndroid Build Coastguard Worker         };
264*61046927SAndroid Build Coastguard Worker 
265*61046927SAndroid Build Coastguard Worker         const unsigned tmu_writes = get_required_tex_tmu_writes(c, instr);
266*61046927SAndroid Build Coastguard Worker 
267*61046927SAndroid Build Coastguard Worker         /* The input FIFO has 16 slots across all threads so if we require
268*61046927SAndroid Build Coastguard Worker          * more than that we need to lower thread count.
269*61046927SAndroid Build Coastguard Worker          */
270*61046927SAndroid Build Coastguard Worker         while (tmu_writes > 16 / c->threads)
271*61046927SAndroid Build Coastguard Worker                 c->threads /= 2;
272*61046927SAndroid Build Coastguard Worker 
273*61046927SAndroid Build Coastguard Worker        /* If pipelining this TMU operation would overflow TMU fifos, we need
274*61046927SAndroid Build Coastguard Worker         * to flush any outstanding TMU operations.
275*61046927SAndroid Build Coastguard Worker         */
276*61046927SAndroid Build Coastguard Worker         const unsigned dest_components =
277*61046927SAndroid Build Coastguard Worker            util_bitcount(p0_unpacked.return_words_of_texture_data);
278*61046927SAndroid Build Coastguard Worker         if (ntq_tmu_fifo_overflow(c, dest_components))
279*61046927SAndroid Build Coastguard Worker                 ntq_flush_tmu(c);
280*61046927SAndroid Build Coastguard Worker 
281*61046927SAndroid Build Coastguard Worker         /* Process tex sources emitting corresponding TMU writes */
282*61046927SAndroid Build Coastguard Worker         struct qreg s = { };
283*61046927SAndroid Build Coastguard Worker         vir_tex_handle_srcs(c, instr, &p2_unpacked, &s, NULL);
284*61046927SAndroid Build Coastguard Worker 
285*61046927SAndroid Build Coastguard Worker         uint32_t p0_packed;
286*61046927SAndroid Build Coastguard Worker         V3D42_TMU_CONFIG_PARAMETER_0_pack(NULL,
287*61046927SAndroid Build Coastguard Worker                                           (uint8_t *)&p0_packed,
288*61046927SAndroid Build Coastguard Worker                                           &p0_unpacked);
289*61046927SAndroid Build Coastguard Worker 
290*61046927SAndroid Build Coastguard Worker         uint32_t p2_packed;
291*61046927SAndroid Build Coastguard Worker         V3D42_TMU_CONFIG_PARAMETER_2_pack(NULL,
292*61046927SAndroid Build Coastguard Worker                                           (uint8_t *)&p2_packed,
293*61046927SAndroid Build Coastguard Worker                                           &p2_unpacked);
294*61046927SAndroid Build Coastguard Worker 
295*61046927SAndroid Build Coastguard Worker         /* Load texture_idx number into the high bits of the texture address field,
296*61046927SAndroid Build Coastguard Worker          * which will be be used by the driver to decide which texture to put
297*61046927SAndroid Build Coastguard Worker          * in the actual address field.
298*61046927SAndroid Build Coastguard Worker          */
299*61046927SAndroid Build Coastguard Worker         p0_packed |= texture_idx << 24;
300*61046927SAndroid Build Coastguard Worker 
301*61046927SAndroid Build Coastguard Worker         vir_WRTMUC(c, QUNIFORM_TMU_CONFIG_P0, p0_packed);
302*61046927SAndroid Build Coastguard Worker 
303*61046927SAndroid Build Coastguard Worker         /* p1 is optional, but we can skip it only if p2 can be skipped too */
304*61046927SAndroid Build Coastguard Worker         bool needs_p2_config =
305*61046927SAndroid Build Coastguard Worker                 (instr->op == nir_texop_lod ||
306*61046927SAndroid Build Coastguard Worker                  memcmp(&p2_unpacked, &p2_unpacked_default,
307*61046927SAndroid Build Coastguard Worker                         sizeof(p2_unpacked)) != 0);
308*61046927SAndroid Build Coastguard Worker 
309*61046927SAndroid Build Coastguard Worker         /* To handle the cases were we can't just use p1_unpacked_default */
310*61046927SAndroid Build Coastguard Worker         bool non_default_p1_config = nir_tex_instr_need_sampler(instr) ||
311*61046927SAndroid Build Coastguard Worker                 output_type_32_bit;
312*61046927SAndroid Build Coastguard Worker 
313*61046927SAndroid Build Coastguard Worker         if (non_default_p1_config) {
314*61046927SAndroid Build Coastguard Worker                 struct V3D42_TMU_CONFIG_PARAMETER_1 p1_unpacked = {
315*61046927SAndroid Build Coastguard Worker                         .output_type_32_bit = output_type_32_bit,
316*61046927SAndroid Build Coastguard Worker 
317*61046927SAndroid Build Coastguard Worker                         .unnormalized_coordinates = (instr->sampler_dim ==
318*61046927SAndroid Build Coastguard Worker                                                      GLSL_SAMPLER_DIM_RECT),
319*61046927SAndroid Build Coastguard Worker                 };
320*61046927SAndroid Build Coastguard Worker 
321*61046927SAndroid Build Coastguard Worker                 /* Word enables can't ask for more channels than the
322*61046927SAndroid Build Coastguard Worker                  * output type could provide (2 for f16, 4 for
323*61046927SAndroid Build Coastguard Worker                  * 32-bit).
324*61046927SAndroid Build Coastguard Worker                  */
325*61046927SAndroid Build Coastguard Worker                 assert(!p1_unpacked.output_type_32_bit ||
326*61046927SAndroid Build Coastguard Worker                        p0_unpacked.return_words_of_texture_data < (1 << 4));
327*61046927SAndroid Build Coastguard Worker                 assert(p1_unpacked.output_type_32_bit ||
328*61046927SAndroid Build Coastguard Worker                        p0_unpacked.return_words_of_texture_data < (1 << 2));
329*61046927SAndroid Build Coastguard Worker 
330*61046927SAndroid Build Coastguard Worker                 uint32_t p1_packed;
331*61046927SAndroid Build Coastguard Worker                 V3D42_TMU_CONFIG_PARAMETER_1_pack(NULL,
332*61046927SAndroid Build Coastguard Worker                                                   (uint8_t *)&p1_packed,
333*61046927SAndroid Build Coastguard Worker                                                   &p1_unpacked);
334*61046927SAndroid Build Coastguard Worker 
335*61046927SAndroid Build Coastguard Worker                 if (nir_tex_instr_need_sampler(instr)) {
336*61046927SAndroid Build Coastguard Worker                         /* Load sampler_idx number into the high bits of the
337*61046927SAndroid Build Coastguard Worker                          * sampler address field, which will be be used by the
338*61046927SAndroid Build Coastguard Worker                          * driver to decide which sampler to put in the actual
339*61046927SAndroid Build Coastguard Worker                          * address field.
340*61046927SAndroid Build Coastguard Worker                          */
341*61046927SAndroid Build Coastguard Worker                         p1_packed |= sampler_idx << 24;
342*61046927SAndroid Build Coastguard Worker 
343*61046927SAndroid Build Coastguard Worker                         vir_WRTMUC(c, QUNIFORM_TMU_CONFIG_P1, p1_packed);
344*61046927SAndroid Build Coastguard Worker                 } else {
345*61046927SAndroid Build Coastguard Worker                         /* In this case, we don't need to merge in any
346*61046927SAndroid Build Coastguard Worker                          * sampler state from the API and can just use
347*61046927SAndroid Build Coastguard Worker                          * our packed bits */
348*61046927SAndroid Build Coastguard Worker                         vir_WRTMUC(c, QUNIFORM_CONSTANT, p1_packed);
349*61046927SAndroid Build Coastguard Worker                 }
350*61046927SAndroid Build Coastguard Worker         } else if (needs_p2_config) {
351*61046927SAndroid Build Coastguard Worker                 /* Configuration parameters need to be set up in
352*61046927SAndroid Build Coastguard Worker                  * order, and if P2 is needed, you need to set up P1
353*61046927SAndroid Build Coastguard Worker                  * too even if sampler info is not needed by the
354*61046927SAndroid Build Coastguard Worker                  * texture operation. But we can set up default info,
355*61046927SAndroid Build Coastguard Worker                  * and avoid asking the driver for the sampler state
356*61046927SAndroid Build Coastguard Worker                  * address
357*61046927SAndroid Build Coastguard Worker                  */
358*61046927SAndroid Build Coastguard Worker                 uint32_t p1_packed_default;
359*61046927SAndroid Build Coastguard Worker                 V3D42_TMU_CONFIG_PARAMETER_1_pack(NULL,
360*61046927SAndroid Build Coastguard Worker                                                   (uint8_t *)&p1_packed_default,
361*61046927SAndroid Build Coastguard Worker                                                   &p1_unpacked_default);
362*61046927SAndroid Build Coastguard Worker                 vir_WRTMUC(c, QUNIFORM_CONSTANT, p1_packed_default);
363*61046927SAndroid Build Coastguard Worker         }
364*61046927SAndroid Build Coastguard Worker 
365*61046927SAndroid Build Coastguard Worker         if (needs_p2_config)
366*61046927SAndroid Build Coastguard Worker                 vir_WRTMUC(c, QUNIFORM_CONSTANT, p2_packed);
367*61046927SAndroid Build Coastguard Worker 
368*61046927SAndroid Build Coastguard Worker         /* Emit retiring TMU write */
369*61046927SAndroid Build Coastguard Worker         struct qinst *retiring;
370*61046927SAndroid Build Coastguard Worker         if (instr->op == nir_texop_txf) {
371*61046927SAndroid Build Coastguard Worker                 assert(instr->sampler_dim != GLSL_SAMPLER_DIM_CUBE);
372*61046927SAndroid Build Coastguard Worker                 retiring = vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUSF, s);
373*61046927SAndroid Build Coastguard Worker         } else if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
374*61046927SAndroid Build Coastguard Worker                 retiring = vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUSCM, s);
375*61046927SAndroid Build Coastguard Worker         } else if (instr->op == nir_texop_txl) {
376*61046927SAndroid Build Coastguard Worker                 retiring = vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUSLOD, s);
377*61046927SAndroid Build Coastguard Worker         } else {
378*61046927SAndroid Build Coastguard Worker                 retiring = vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUS, s);
379*61046927SAndroid Build Coastguard Worker         }
380*61046927SAndroid Build Coastguard Worker 
381*61046927SAndroid Build Coastguard Worker         retiring->ldtmu_count = p0_unpacked.return_words_of_texture_data;
382*61046927SAndroid Build Coastguard Worker         ntq_add_pending_tmu_flush(c, &instr->def,
383*61046927SAndroid Build Coastguard Worker                                   p0_unpacked.return_words_of_texture_data);
384*61046927SAndroid Build Coastguard Worker }
385*61046927SAndroid Build Coastguard Worker 
386*61046927SAndroid Build Coastguard Worker static uint32_t
v3d_image_atomic_tmu_op(nir_intrinsic_instr * instr)387*61046927SAndroid Build Coastguard Worker v3d_image_atomic_tmu_op(nir_intrinsic_instr *instr)
388*61046927SAndroid Build Coastguard Worker {
389*61046927SAndroid Build Coastguard Worker         nir_atomic_op atomic_op = nir_intrinsic_atomic_op(instr);
390*61046927SAndroid Build Coastguard Worker         switch (atomic_op) {
391*61046927SAndroid Build Coastguard Worker         case nir_atomic_op_iadd:    return v3d_get_op_for_atomic_add(instr, 3);
392*61046927SAndroid Build Coastguard Worker         case nir_atomic_op_imin:    return V3D_TMU_OP_WRITE_SMIN;
393*61046927SAndroid Build Coastguard Worker         case nir_atomic_op_umin:    return V3D_TMU_OP_WRITE_UMIN_FULL_L1_CLEAR;
394*61046927SAndroid Build Coastguard Worker         case nir_atomic_op_imax:    return V3D_TMU_OP_WRITE_SMAX;
395*61046927SAndroid Build Coastguard Worker         case nir_atomic_op_umax:    return V3D_TMU_OP_WRITE_UMAX;
396*61046927SAndroid Build Coastguard Worker         case nir_atomic_op_iand:    return V3D_TMU_OP_WRITE_AND_READ_INC;
397*61046927SAndroid Build Coastguard Worker         case nir_atomic_op_ior:     return V3D_TMU_OP_WRITE_OR_READ_DEC;
398*61046927SAndroid Build Coastguard Worker         case nir_atomic_op_ixor:    return V3D_TMU_OP_WRITE_XOR_READ_NOT;
399*61046927SAndroid Build Coastguard Worker         case nir_atomic_op_xchg:    return V3D_TMU_OP_WRITE_XCHG_READ_FLUSH;
400*61046927SAndroid Build Coastguard Worker         case nir_atomic_op_cmpxchg: return V3D_TMU_OP_WRITE_CMPXCHG_READ_FLUSH;
401*61046927SAndroid Build Coastguard Worker         default:                    unreachable("unknown atomic op");
402*61046927SAndroid Build Coastguard Worker         }
403*61046927SAndroid Build Coastguard Worker }
404*61046927SAndroid Build Coastguard Worker 
405*61046927SAndroid Build Coastguard Worker static uint32_t
v3d_image_load_store_tmu_op(nir_intrinsic_instr * instr)406*61046927SAndroid Build Coastguard Worker v3d_image_load_store_tmu_op(nir_intrinsic_instr *instr)
407*61046927SAndroid Build Coastguard Worker {
408*61046927SAndroid Build Coastguard Worker         switch (instr->intrinsic) {
409*61046927SAndroid Build Coastguard Worker         case nir_intrinsic_image_load:
410*61046927SAndroid Build Coastguard Worker         case nir_intrinsic_image_store:
411*61046927SAndroid Build Coastguard Worker                 return V3D_TMU_OP_REGULAR;
412*61046927SAndroid Build Coastguard Worker 
413*61046927SAndroid Build Coastguard Worker         case nir_intrinsic_image_atomic:
414*61046927SAndroid Build Coastguard Worker         case nir_intrinsic_image_atomic_swap:
415*61046927SAndroid Build Coastguard Worker                 return v3d_image_atomic_tmu_op(instr);
416*61046927SAndroid Build Coastguard Worker 
417*61046927SAndroid Build Coastguard Worker         default:
418*61046927SAndroid Build Coastguard Worker                 unreachable("unknown image intrinsic");
419*61046927SAndroid Build Coastguard Worker         };
420*61046927SAndroid Build Coastguard Worker }
421*61046927SAndroid Build Coastguard Worker 
422*61046927SAndroid Build Coastguard Worker /**
423*61046927SAndroid Build Coastguard Worker  * If 'tmu_writes' is not NULL, then it just counts required register writes,
424*61046927SAndroid Build Coastguard Worker  * otherwise, it emits the actual register writes.
425*61046927SAndroid Build Coastguard Worker  *
426*61046927SAndroid Build Coastguard Worker  * It is important to notice that emitting register writes for the current
427*61046927SAndroid Build Coastguard Worker  * TMU operation may trigger a TMU flush, since it is possible that any
428*61046927SAndroid Build Coastguard Worker  * of the inputs required for the register writes is the result of a pending
429*61046927SAndroid Build Coastguard Worker  * TMU operation. If that happens we need to make sure that it doesn't happen
430*61046927SAndroid Build Coastguard Worker  * in the middle of the TMU register writes for the current TMU operation,
431*61046927SAndroid Build Coastguard Worker  * which is why we always call ntq_get_src() even if we are only interested in
432*61046927SAndroid Build Coastguard Worker  * register write counts.
433*61046927SAndroid Build Coastguard Worker  */
434*61046927SAndroid Build Coastguard Worker static struct qinst *
vir_image_emit_register_writes(struct v3d_compile * c,nir_intrinsic_instr * instr,bool atomic_add_replaced,uint32_t * tmu_writes)435*61046927SAndroid Build Coastguard Worker vir_image_emit_register_writes(struct v3d_compile *c,
436*61046927SAndroid Build Coastguard Worker                                nir_intrinsic_instr *instr,
437*61046927SAndroid Build Coastguard Worker                                bool atomic_add_replaced,
438*61046927SAndroid Build Coastguard Worker                                uint32_t *tmu_writes)
439*61046927SAndroid Build Coastguard Worker {
440*61046927SAndroid Build Coastguard Worker         if (tmu_writes)
441*61046927SAndroid Build Coastguard Worker                 *tmu_writes = 0;
442*61046927SAndroid Build Coastguard Worker 
443*61046927SAndroid Build Coastguard Worker         bool is_1d = false;
444*61046927SAndroid Build Coastguard Worker         switch (nir_intrinsic_image_dim(instr)) {
445*61046927SAndroid Build Coastguard Worker         case GLSL_SAMPLER_DIM_1D:
446*61046927SAndroid Build Coastguard Worker                 is_1d = true;
447*61046927SAndroid Build Coastguard Worker                 break;
448*61046927SAndroid Build Coastguard Worker         case GLSL_SAMPLER_DIM_BUF:
449*61046927SAndroid Build Coastguard Worker                 break;
450*61046927SAndroid Build Coastguard Worker         case GLSL_SAMPLER_DIM_2D:
451*61046927SAndroid Build Coastguard Worker         case GLSL_SAMPLER_DIM_RECT:
452*61046927SAndroid Build Coastguard Worker         case GLSL_SAMPLER_DIM_CUBE: {
453*61046927SAndroid Build Coastguard Worker                 struct qreg src = ntq_get_src(c, instr->src[1], 1);
454*61046927SAndroid Build Coastguard Worker                 vir_TMU_WRITE_or_count(c, V3D_QPU_WADDR_TMUT, src, tmu_writes);
455*61046927SAndroid Build Coastguard Worker                 break;
456*61046927SAndroid Build Coastguard Worker         }
457*61046927SAndroid Build Coastguard Worker         case GLSL_SAMPLER_DIM_3D: {
458*61046927SAndroid Build Coastguard Worker                 struct qreg src_1_1 = ntq_get_src(c, instr->src[1], 1);
459*61046927SAndroid Build Coastguard Worker                 struct qreg src_1_2 = ntq_get_src(c, instr->src[1], 2);
460*61046927SAndroid Build Coastguard Worker                 vir_TMU_WRITE_or_count(c, V3D_QPU_WADDR_TMUT, src_1_1, tmu_writes);
461*61046927SAndroid Build Coastguard Worker                 vir_TMU_WRITE_or_count(c, V3D_QPU_WADDR_TMUR, src_1_2, tmu_writes);
462*61046927SAndroid Build Coastguard Worker                 break;
463*61046927SAndroid Build Coastguard Worker         }
464*61046927SAndroid Build Coastguard Worker         default:
465*61046927SAndroid Build Coastguard Worker                 unreachable("bad image sampler dim");
466*61046927SAndroid Build Coastguard Worker         }
467*61046927SAndroid Build Coastguard Worker 
468*61046927SAndroid Build Coastguard Worker         /* In order to fetch on a cube map, we need to interpret it as
469*61046927SAndroid Build Coastguard Worker          * 2D arrays, where the third coord would be the face index.
470*61046927SAndroid Build Coastguard Worker          */
471*61046927SAndroid Build Coastguard Worker         if (nir_intrinsic_image_dim(instr) == GLSL_SAMPLER_DIM_CUBE ||
472*61046927SAndroid Build Coastguard Worker             nir_intrinsic_image_array(instr)) {
473*61046927SAndroid Build Coastguard Worker                 struct qreg src = ntq_get_src(c, instr->src[1], is_1d ? 1 : 2);
474*61046927SAndroid Build Coastguard Worker                 vir_TMU_WRITE_or_count(c, V3D_QPU_WADDR_TMUI, src, tmu_writes);
475*61046927SAndroid Build Coastguard Worker         }
476*61046927SAndroid Build Coastguard Worker 
477*61046927SAndroid Build Coastguard Worker         /* Emit the data writes for atomics or image store. */
478*61046927SAndroid Build Coastguard Worker         if (instr->intrinsic != nir_intrinsic_image_load &&
479*61046927SAndroid Build Coastguard Worker             !atomic_add_replaced) {
480*61046927SAndroid Build Coastguard Worker                 for (int i = 0; i < nir_intrinsic_src_components(instr, 3); i++) {
481*61046927SAndroid Build Coastguard Worker                         struct qreg src_3_i = ntq_get_src(c, instr->src[3], i);
482*61046927SAndroid Build Coastguard Worker                         vir_TMU_WRITE_or_count(c, V3D_QPU_WADDR_TMUD, src_3_i,
483*61046927SAndroid Build Coastguard Worker                                                tmu_writes);
484*61046927SAndroid Build Coastguard Worker                 }
485*61046927SAndroid Build Coastguard Worker 
486*61046927SAndroid Build Coastguard Worker                 /* Second atomic argument */
487*61046927SAndroid Build Coastguard Worker                 if (instr->intrinsic == nir_intrinsic_image_atomic_swap &&
488*61046927SAndroid Build Coastguard Worker                     nir_intrinsic_atomic_op(instr) == nir_atomic_op_cmpxchg) {
489*61046927SAndroid Build Coastguard Worker                         struct qreg src_4_0 = ntq_get_src(c, instr->src[4], 0);
490*61046927SAndroid Build Coastguard Worker                         vir_TMU_WRITE_or_count(c, V3D_QPU_WADDR_TMUD, src_4_0,
491*61046927SAndroid Build Coastguard Worker                                                tmu_writes);
492*61046927SAndroid Build Coastguard Worker                 }
493*61046927SAndroid Build Coastguard Worker         }
494*61046927SAndroid Build Coastguard Worker 
495*61046927SAndroid Build Coastguard Worker         struct qreg src_1_0 = ntq_get_src(c, instr->src[1], 0);
496*61046927SAndroid Build Coastguard Worker         if (!tmu_writes && vir_in_nonuniform_control_flow(c) &&
497*61046927SAndroid Build Coastguard Worker             instr->intrinsic != nir_intrinsic_image_load) {
498*61046927SAndroid Build Coastguard Worker                 vir_set_pf(c, vir_MOV_dest(c, vir_nop_reg(), c->execute),
499*61046927SAndroid Build Coastguard Worker                            V3D_QPU_PF_PUSHZ);
500*61046927SAndroid Build Coastguard Worker         }
501*61046927SAndroid Build Coastguard Worker 
502*61046927SAndroid Build Coastguard Worker         struct qinst *retiring =
503*61046927SAndroid Build Coastguard Worker                 vir_TMU_WRITE_or_count(c, V3D_QPU_WADDR_TMUSF, src_1_0, tmu_writes);
504*61046927SAndroid Build Coastguard Worker 
505*61046927SAndroid Build Coastguard Worker         if (!tmu_writes && vir_in_nonuniform_control_flow(c) &&
506*61046927SAndroid Build Coastguard Worker             instr->intrinsic != nir_intrinsic_image_load) {
507*61046927SAndroid Build Coastguard Worker                 struct qinst *last_inst =
508*61046927SAndroid Build Coastguard Worker                         (struct  qinst *)c->cur_block->instructions.prev;
509*61046927SAndroid Build Coastguard Worker                 vir_set_cond(last_inst, V3D_QPU_COND_IFA);
510*61046927SAndroid Build Coastguard Worker         }
511*61046927SAndroid Build Coastguard Worker 
512*61046927SAndroid Build Coastguard Worker         return retiring;
513*61046927SAndroid Build Coastguard Worker }
514*61046927SAndroid Build Coastguard Worker 
515*61046927SAndroid Build Coastguard Worker static unsigned
get_required_image_tmu_writes(struct v3d_compile * c,nir_intrinsic_instr * instr,bool atomic_add_replaced)516*61046927SAndroid Build Coastguard Worker get_required_image_tmu_writes(struct v3d_compile *c,
517*61046927SAndroid Build Coastguard Worker                               nir_intrinsic_instr *instr,
518*61046927SAndroid Build Coastguard Worker                               bool atomic_add_replaced)
519*61046927SAndroid Build Coastguard Worker {
520*61046927SAndroid Build Coastguard Worker         unsigned tmu_writes;
521*61046927SAndroid Build Coastguard Worker         vir_image_emit_register_writes(c, instr, atomic_add_replaced,
522*61046927SAndroid Build Coastguard Worker                                        &tmu_writes);
523*61046927SAndroid Build Coastguard Worker         return tmu_writes;
524*61046927SAndroid Build Coastguard Worker }
525*61046927SAndroid Build Coastguard Worker 
526*61046927SAndroid Build Coastguard Worker static uint32_t
return_channels_required(nir_intrinsic_instr * instr,bool is_32bit)527*61046927SAndroid Build Coastguard Worker return_channels_required(nir_intrinsic_instr *instr, bool is_32bit)
528*61046927SAndroid Build Coastguard Worker {
529*61046927SAndroid Build Coastguard Worker         if (nir_intrinsic_dest_components(instr) == 0)
530*61046927SAndroid Build Coastguard Worker                 return 0;
531*61046927SAndroid Build Coastguard Worker 
532*61046927SAndroid Build Coastguard Worker         /* V3D requires that atomic operations always return data even if the
533*61046927SAndroid Build Coastguard Worker          * shader doesn't use it.
534*61046927SAndroid Build Coastguard Worker          */
535*61046927SAndroid Build Coastguard Worker         if (instr->intrinsic == nir_intrinsic_image_atomic ||
536*61046927SAndroid Build Coastguard Worker             instr->intrinsic == nir_intrinsic_image_atomic_swap) {
537*61046927SAndroid Build Coastguard Worker                 return 1;
538*61046927SAndroid Build Coastguard Worker         }
539*61046927SAndroid Build Coastguard Worker 
540*61046927SAndroid Build Coastguard Worker         /* Otherwise limit the number of words to read based on the components
541*61046927SAndroid Build Coastguard Worker          * actually used by the shader, limited to the maximum allowed based
542*61046927SAndroid Build Coastguard Worker          * on the output size.
543*61046927SAndroid Build Coastguard Worker          */
544*61046927SAndroid Build Coastguard Worker         nir_component_mask_t read_mask = nir_def_components_read(&instr->def);
545*61046927SAndroid Build Coastguard Worker         read_mask &= is_32bit ? 0xf : 0x3;
546*61046927SAndroid Build Coastguard Worker         assert(read_mask);
547*61046927SAndroid Build Coastguard Worker 
548*61046927SAndroid Build Coastguard Worker         if (read_mask & 0x8)
549*61046927SAndroid Build Coastguard Worker                 return 4;
550*61046927SAndroid Build Coastguard Worker         if (read_mask & 0x4)
551*61046927SAndroid Build Coastguard Worker                 return 3;
552*61046927SAndroid Build Coastguard Worker         if (read_mask & 0x2)
553*61046927SAndroid Build Coastguard Worker                 return 2;
554*61046927SAndroid Build Coastguard Worker         else
555*61046927SAndroid Build Coastguard Worker                 return 1;
556*61046927SAndroid Build Coastguard Worker }
557*61046927SAndroid Build Coastguard Worker 
558*61046927SAndroid Build Coastguard Worker void
v3d_vir_emit_image_load_store(struct v3d_compile * c,nir_intrinsic_instr * instr)559*61046927SAndroid Build Coastguard Worker v3d_vir_emit_image_load_store(struct v3d_compile *c,
560*61046927SAndroid Build Coastguard Worker                               nir_intrinsic_instr *instr)
561*61046927SAndroid Build Coastguard Worker {
562*61046927SAndroid Build Coastguard Worker         unsigned format = nir_intrinsic_format(instr);
563*61046927SAndroid Build Coastguard Worker         unsigned unit = nir_src_as_uint(instr->src[0]);
564*61046927SAndroid Build Coastguard Worker 
565*61046927SAndroid Build Coastguard Worker         struct V3D42_TMU_CONFIG_PARAMETER_0 p0_unpacked = {
566*61046927SAndroid Build Coastguard Worker         };
567*61046927SAndroid Build Coastguard Worker 
568*61046927SAndroid Build Coastguard Worker         struct V3D42_TMU_CONFIG_PARAMETER_1 p1_unpacked = {
569*61046927SAndroid Build Coastguard Worker                 .per_pixel_mask_enable = true,
570*61046927SAndroid Build Coastguard Worker                 .output_type_32_bit = v3d_gl_format_is_return_32(format),
571*61046927SAndroid Build Coastguard Worker         };
572*61046927SAndroid Build Coastguard Worker 
573*61046927SAndroid Build Coastguard Worker         struct V3D42_TMU_CONFIG_PARAMETER_2 p2_unpacked = { 0 };
574*61046927SAndroid Build Coastguard Worker 
575*61046927SAndroid Build Coastguard Worker         /* Limit the number of channels to those that are actually used */
576*61046927SAndroid Build Coastguard Worker         uint32_t return_channels =
577*61046927SAndroid Build Coastguard Worker                 return_channels_required(instr, p1_unpacked.output_type_32_bit);
578*61046927SAndroid Build Coastguard Worker         assert(return_channels <= nir_intrinsic_dest_components(instr));
579*61046927SAndroid Build Coastguard Worker         p0_unpacked.return_words_of_texture_data =
580*61046927SAndroid Build Coastguard Worker                 (1 << return_channels) - 1;
581*61046927SAndroid Build Coastguard Worker 
582*61046927SAndroid Build Coastguard Worker         p2_unpacked.op = v3d_image_load_store_tmu_op(instr);
583*61046927SAndroid Build Coastguard Worker 
584*61046927SAndroid Build Coastguard Worker         /* If we were able to replace atomic_add for an inc/dec, then we
585*61046927SAndroid Build Coastguard Worker          * need/can to do things slightly different, like not loading the
586*61046927SAndroid Build Coastguard Worker          * amount to add/sub, as that is implicit.
587*61046927SAndroid Build Coastguard Worker          */
588*61046927SAndroid Build Coastguard Worker         bool atomic_add_replaced =
589*61046927SAndroid Build Coastguard Worker                 instr->intrinsic == nir_intrinsic_image_atomic &&
590*61046927SAndroid Build Coastguard Worker                 nir_intrinsic_atomic_op(instr) == nir_atomic_op_iadd &&
591*61046927SAndroid Build Coastguard Worker                 (p2_unpacked.op == V3D_TMU_OP_WRITE_AND_READ_INC ||
592*61046927SAndroid Build Coastguard Worker                  p2_unpacked.op == V3D_TMU_OP_WRITE_OR_READ_DEC);
593*61046927SAndroid Build Coastguard Worker 
594*61046927SAndroid Build Coastguard Worker         uint32_t p0_packed;
595*61046927SAndroid Build Coastguard Worker         V3D42_TMU_CONFIG_PARAMETER_0_pack(NULL,
596*61046927SAndroid Build Coastguard Worker                                           (uint8_t *)&p0_packed,
597*61046927SAndroid Build Coastguard Worker                                           &p0_unpacked);
598*61046927SAndroid Build Coastguard Worker 
599*61046927SAndroid Build Coastguard Worker         /* Load unit number into the high bits of the texture or sampler
600*61046927SAndroid Build Coastguard Worker          * address field, which will be be used by the driver to decide which
601*61046927SAndroid Build Coastguard Worker          * texture to put in the actual address field.
602*61046927SAndroid Build Coastguard Worker          */
603*61046927SAndroid Build Coastguard Worker         p0_packed |= unit << 24;
604*61046927SAndroid Build Coastguard Worker 
605*61046927SAndroid Build Coastguard Worker         uint32_t p1_packed;
606*61046927SAndroid Build Coastguard Worker         V3D42_TMU_CONFIG_PARAMETER_1_pack(NULL,
607*61046927SAndroid Build Coastguard Worker                                           (uint8_t *)&p1_packed,
608*61046927SAndroid Build Coastguard Worker                                           &p1_unpacked);
609*61046927SAndroid Build Coastguard Worker 
610*61046927SAndroid Build Coastguard Worker         uint32_t p2_packed;
611*61046927SAndroid Build Coastguard Worker         V3D42_TMU_CONFIG_PARAMETER_2_pack(NULL,
612*61046927SAndroid Build Coastguard Worker                                           (uint8_t *)&p2_packed,
613*61046927SAndroid Build Coastguard Worker                                           &p2_unpacked);
614*61046927SAndroid Build Coastguard Worker 
615*61046927SAndroid Build Coastguard Worker         if (instr->intrinsic != nir_intrinsic_image_load)
616*61046927SAndroid Build Coastguard Worker                 c->tmu_dirty_rcl = true;
617*61046927SAndroid Build Coastguard Worker 
618*61046927SAndroid Build Coastguard Worker 
619*61046927SAndroid Build Coastguard Worker         const uint32_t tmu_writes =
620*61046927SAndroid Build Coastguard Worker                 get_required_image_tmu_writes(c, instr, atomic_add_replaced);
621*61046927SAndroid Build Coastguard Worker 
622*61046927SAndroid Build Coastguard Worker         /* The input FIFO has 16 slots across all threads so if we require
623*61046927SAndroid Build Coastguard Worker          * more than that we need to lower thread count.
624*61046927SAndroid Build Coastguard Worker          */
625*61046927SAndroid Build Coastguard Worker         while (tmu_writes > 16 / c->threads)
626*61046927SAndroid Build Coastguard Worker                 c->threads /= 2;
627*61046927SAndroid Build Coastguard Worker 
628*61046927SAndroid Build Coastguard Worker        /* If pipelining this TMU operation would overflow TMU fifos, we need
629*61046927SAndroid Build Coastguard Worker         * to flush any outstanding TMU operations.
630*61046927SAndroid Build Coastguard Worker         */
631*61046927SAndroid Build Coastguard Worker         if (ntq_tmu_fifo_overflow(c, return_channels))
632*61046927SAndroid Build Coastguard Worker                 ntq_flush_tmu(c);
633*61046927SAndroid Build Coastguard Worker 
634*61046927SAndroid Build Coastguard Worker         vir_WRTMUC(c, QUNIFORM_IMAGE_TMU_CONFIG_P0, p0_packed);
635*61046927SAndroid Build Coastguard Worker         if (memcmp(&p1_unpacked, &p1_unpacked_default, sizeof(p1_unpacked)))
636*61046927SAndroid Build Coastguard Worker                    vir_WRTMUC(c, QUNIFORM_CONSTANT, p1_packed);
637*61046927SAndroid Build Coastguard Worker         if (memcmp(&p2_unpacked, &p2_unpacked_default, sizeof(p2_unpacked)))
638*61046927SAndroid Build Coastguard Worker                    vir_WRTMUC(c, QUNIFORM_CONSTANT, p2_packed);
639*61046927SAndroid Build Coastguard Worker 
640*61046927SAndroid Build Coastguard Worker         struct qinst *retiring =
641*61046927SAndroid Build Coastguard Worker                 vir_image_emit_register_writes(c, instr, atomic_add_replaced, NULL);
642*61046927SAndroid Build Coastguard Worker         retiring->ldtmu_count = p0_unpacked.return_words_of_texture_data;
643*61046927SAndroid Build Coastguard Worker         ntq_add_pending_tmu_flush(c, &instr->def,
644*61046927SAndroid Build Coastguard Worker                                   p0_unpacked.return_words_of_texture_data);
645*61046927SAndroid Build Coastguard Worker }
646