1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker * Copyright (C) 2020 Collabora Ltd.
3*61046927SAndroid Build Coastguard Worker * Copyright (C) 2022 Alyssa Rosenzweig <[email protected]>
4*61046927SAndroid Build Coastguard Worker *
5*61046927SAndroid Build Coastguard Worker * Permission is hereby granted, free of charge, to any person obtaining a
6*61046927SAndroid Build Coastguard Worker * copy of this software and associated documentation files (the "Software"),
7*61046927SAndroid Build Coastguard Worker * to deal in the Software without restriction, including without limitation
8*61046927SAndroid Build Coastguard Worker * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9*61046927SAndroid Build Coastguard Worker * and/or sell copies of the Software, and to permit persons to whom the
10*61046927SAndroid Build Coastguard Worker * Software is furnished to do so, subject to the following conditions:
11*61046927SAndroid Build Coastguard Worker *
12*61046927SAndroid Build Coastguard Worker * The above copyright notice and this permission notice (including the next
13*61046927SAndroid Build Coastguard Worker * paragraph) shall be included in all copies or substantial portions of the
14*61046927SAndroid Build Coastguard Worker * Software.
15*61046927SAndroid Build Coastguard Worker *
16*61046927SAndroid Build Coastguard Worker * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17*61046927SAndroid Build Coastguard Worker * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18*61046927SAndroid Build Coastguard Worker * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19*61046927SAndroid Build Coastguard Worker * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20*61046927SAndroid Build Coastguard Worker * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21*61046927SAndroid Build Coastguard Worker * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22*61046927SAndroid Build Coastguard Worker * SOFTWARE.
23*61046927SAndroid Build Coastguard Worker *
24*61046927SAndroid Build Coastguard Worker * Authors (Collabora):
25*61046927SAndroid Build Coastguard Worker * Alyssa Rosenzweig <[email protected]>
26*61046927SAndroid Build Coastguard Worker */
27*61046927SAndroid Build Coastguard Worker
28*61046927SAndroid Build Coastguard Worker #include "compiler/glsl/glsl_to_nir.h"
29*61046927SAndroid Build Coastguard Worker #include "compiler/glsl_types.h"
30*61046927SAndroid Build Coastguard Worker #include "compiler/nir/nir_builder.h"
31*61046927SAndroid Build Coastguard Worker #include "util/u_debug.h"
32*61046927SAndroid Build Coastguard Worker
33*61046927SAndroid Build Coastguard Worker #include "bifrost/disassemble.h"
34*61046927SAndroid Build Coastguard Worker #include "panfrost/lib/pan_props.h"
35*61046927SAndroid Build Coastguard Worker #include "valhall/disassemble.h"
36*61046927SAndroid Build Coastguard Worker #include "valhall/va_compiler.h"
37*61046927SAndroid Build Coastguard Worker #include "bi_builder.h"
38*61046927SAndroid Build Coastguard Worker #include "bi_quirks.h"
39*61046927SAndroid Build Coastguard Worker #include "bifrost_compile.h"
40*61046927SAndroid Build Coastguard Worker #include "bifrost_nir.h"
41*61046927SAndroid Build Coastguard Worker #include "compiler.h"
42*61046927SAndroid Build Coastguard Worker
43*61046927SAndroid Build Coastguard Worker /* clang-format off */
44*61046927SAndroid Build Coastguard Worker static const struct debug_named_value bifrost_debug_options[] = {
45*61046927SAndroid Build Coastguard Worker {"msgs", BIFROST_DBG_MSGS, "Print debug messages"},
46*61046927SAndroid Build Coastguard Worker {"shaders", BIFROST_DBG_SHADERS, "Dump shaders in NIR and MIR"},
47*61046927SAndroid Build Coastguard Worker {"shaderdb", BIFROST_DBG_SHADERDB, "Print statistics"},
48*61046927SAndroid Build Coastguard Worker {"verbose", BIFROST_DBG_VERBOSE, "Disassemble verbosely"},
49*61046927SAndroid Build Coastguard Worker {"internal", BIFROST_DBG_INTERNAL, "Dump even internal shaders"},
50*61046927SAndroid Build Coastguard Worker {"nosched", BIFROST_DBG_NOSCHED, "Force trivial bundling"},
51*61046927SAndroid Build Coastguard Worker {"nopsched", BIFROST_DBG_NOPSCHED, "Disable scheduling for pressure"},
52*61046927SAndroid Build Coastguard Worker {"inorder", BIFROST_DBG_INORDER, "Force in-order bundling"},
53*61046927SAndroid Build Coastguard Worker {"novalidate", BIFROST_DBG_NOVALIDATE, "Skip IR validation"},
54*61046927SAndroid Build Coastguard Worker {"noopt", BIFROST_DBG_NOOPT, "Skip optimization passes"},
55*61046927SAndroid Build Coastguard Worker {"noidvs", BIFROST_DBG_NOIDVS, "Disable IDVS"},
56*61046927SAndroid Build Coastguard Worker {"nosb", BIFROST_DBG_NOSB, "Disable scoreboarding"},
57*61046927SAndroid Build Coastguard Worker {"nopreload", BIFROST_DBG_NOPRELOAD, "Disable message preloading"},
58*61046927SAndroid Build Coastguard Worker {"spill", BIFROST_DBG_SPILL, "Test register spilling"},
59*61046927SAndroid Build Coastguard Worker DEBUG_NAMED_VALUE_END
60*61046927SAndroid Build Coastguard Worker };
61*61046927SAndroid Build Coastguard Worker /* clang-format on */
62*61046927SAndroid Build Coastguard Worker
63*61046927SAndroid Build Coastguard Worker DEBUG_GET_ONCE_FLAGS_OPTION(bifrost_debug, "BIFROST_MESA_DEBUG",
64*61046927SAndroid Build Coastguard Worker bifrost_debug_options, 0)
65*61046927SAndroid Build Coastguard Worker
66*61046927SAndroid Build Coastguard Worker /* How many bytes are prefetched by the Bifrost shader core. From the final
67*61046927SAndroid Build Coastguard Worker * clause of the shader, this range must be valid instructions or zero. */
68*61046927SAndroid Build Coastguard Worker #define BIFROST_SHADER_PREFETCH 128
69*61046927SAndroid Build Coastguard Worker
70*61046927SAndroid Build Coastguard Worker int bifrost_debug = 0;
71*61046927SAndroid Build Coastguard Worker
72*61046927SAndroid Build Coastguard Worker #define DBG(fmt, ...) \
73*61046927SAndroid Build Coastguard Worker do { \
74*61046927SAndroid Build Coastguard Worker if (bifrost_debug & BIFROST_DBG_MSGS) \
75*61046927SAndroid Build Coastguard Worker fprintf(stderr, "%s:%d: " fmt, __func__, __LINE__, ##__VA_ARGS__); \
76*61046927SAndroid Build Coastguard Worker } while (0)
77*61046927SAndroid Build Coastguard Worker
78*61046927SAndroid Build Coastguard Worker static bi_block *emit_cf_list(bi_context *ctx, struct exec_list *list);
79*61046927SAndroid Build Coastguard Worker
80*61046927SAndroid Build Coastguard Worker static bi_index
bi_preload(bi_builder * b,unsigned reg)81*61046927SAndroid Build Coastguard Worker bi_preload(bi_builder *b, unsigned reg)
82*61046927SAndroid Build Coastguard Worker {
83*61046927SAndroid Build Coastguard Worker if (bi_is_null(b->shader->preloaded[reg])) {
84*61046927SAndroid Build Coastguard Worker /* Insert at the beginning of the shader */
85*61046927SAndroid Build Coastguard Worker bi_builder b_ = *b;
86*61046927SAndroid Build Coastguard Worker b_.cursor = bi_before_block(bi_start_block(&b->shader->blocks));
87*61046927SAndroid Build Coastguard Worker
88*61046927SAndroid Build Coastguard Worker /* Cache the result */
89*61046927SAndroid Build Coastguard Worker b->shader->preloaded[reg] = bi_mov_i32(&b_, bi_register(reg));
90*61046927SAndroid Build Coastguard Worker }
91*61046927SAndroid Build Coastguard Worker
92*61046927SAndroid Build Coastguard Worker return b->shader->preloaded[reg];
93*61046927SAndroid Build Coastguard Worker }
94*61046927SAndroid Build Coastguard Worker
95*61046927SAndroid Build Coastguard Worker static bi_index
bi_coverage(bi_builder * b)96*61046927SAndroid Build Coastguard Worker bi_coverage(bi_builder *b)
97*61046927SAndroid Build Coastguard Worker {
98*61046927SAndroid Build Coastguard Worker if (bi_is_null(b->shader->coverage))
99*61046927SAndroid Build Coastguard Worker b->shader->coverage = bi_preload(b, 60);
100*61046927SAndroid Build Coastguard Worker
101*61046927SAndroid Build Coastguard Worker return b->shader->coverage;
102*61046927SAndroid Build Coastguard Worker }
103*61046927SAndroid Build Coastguard Worker
104*61046927SAndroid Build Coastguard Worker /*
105*61046927SAndroid Build Coastguard Worker * Vertex ID and Instance ID are preloaded registers. Where they are preloaded
106*61046927SAndroid Build Coastguard Worker * changed from Bifrost to Valhall. Provide helpers that smooth over the
107*61046927SAndroid Build Coastguard Worker * architectural difference.
108*61046927SAndroid Build Coastguard Worker */
109*61046927SAndroid Build Coastguard Worker static inline bi_index
bi_vertex_id(bi_builder * b)110*61046927SAndroid Build Coastguard Worker bi_vertex_id(bi_builder *b)
111*61046927SAndroid Build Coastguard Worker {
112*61046927SAndroid Build Coastguard Worker return bi_preload(b, (b->shader->arch >= 9) ? 60 : 61);
113*61046927SAndroid Build Coastguard Worker }
114*61046927SAndroid Build Coastguard Worker
115*61046927SAndroid Build Coastguard Worker static inline bi_index
bi_instance_id(bi_builder * b)116*61046927SAndroid Build Coastguard Worker bi_instance_id(bi_builder *b)
117*61046927SAndroid Build Coastguard Worker {
118*61046927SAndroid Build Coastguard Worker return bi_preload(b, (b->shader->arch >= 9) ? 61 : 62);
119*61046927SAndroid Build Coastguard Worker }
120*61046927SAndroid Build Coastguard Worker
121*61046927SAndroid Build Coastguard Worker static inline bi_index
bi_draw_id(bi_builder * b)122*61046927SAndroid Build Coastguard Worker bi_draw_id(bi_builder *b)
123*61046927SAndroid Build Coastguard Worker {
124*61046927SAndroid Build Coastguard Worker assert(b->shader->arch >= 9);
125*61046927SAndroid Build Coastguard Worker return bi_preload(b, 62);
126*61046927SAndroid Build Coastguard Worker }
127*61046927SAndroid Build Coastguard Worker
128*61046927SAndroid Build Coastguard Worker static void
bi_emit_jump(bi_builder * b,nir_jump_instr * instr)129*61046927SAndroid Build Coastguard Worker bi_emit_jump(bi_builder *b, nir_jump_instr *instr)
130*61046927SAndroid Build Coastguard Worker {
131*61046927SAndroid Build Coastguard Worker bi_instr *branch = bi_jump(b, bi_zero());
132*61046927SAndroid Build Coastguard Worker
133*61046927SAndroid Build Coastguard Worker switch (instr->type) {
134*61046927SAndroid Build Coastguard Worker case nir_jump_break:
135*61046927SAndroid Build Coastguard Worker branch->branch_target = b->shader->break_block;
136*61046927SAndroid Build Coastguard Worker break;
137*61046927SAndroid Build Coastguard Worker case nir_jump_continue:
138*61046927SAndroid Build Coastguard Worker branch->branch_target = b->shader->continue_block;
139*61046927SAndroid Build Coastguard Worker break;
140*61046927SAndroid Build Coastguard Worker default:
141*61046927SAndroid Build Coastguard Worker unreachable("Unhandled jump type");
142*61046927SAndroid Build Coastguard Worker }
143*61046927SAndroid Build Coastguard Worker
144*61046927SAndroid Build Coastguard Worker bi_block_add_successor(b->shader->current_block, branch->branch_target);
145*61046927SAndroid Build Coastguard Worker b->shader->current_block->unconditional_jumps = true;
146*61046927SAndroid Build Coastguard Worker }
147*61046927SAndroid Build Coastguard Worker
148*61046927SAndroid Build Coastguard Worker /* Builds a 64-bit hash table key for an index */
149*61046927SAndroid Build Coastguard Worker static uint64_t
bi_index_to_key(bi_index idx)150*61046927SAndroid Build Coastguard Worker bi_index_to_key(bi_index idx)
151*61046927SAndroid Build Coastguard Worker {
152*61046927SAndroid Build Coastguard Worker static_assert(sizeof(idx) <= sizeof(uint64_t), "too much padding");
153*61046927SAndroid Build Coastguard Worker
154*61046927SAndroid Build Coastguard Worker uint64_t key = 0;
155*61046927SAndroid Build Coastguard Worker memcpy(&key, &idx, sizeof(idx));
156*61046927SAndroid Build Coastguard Worker return key;
157*61046927SAndroid Build Coastguard Worker }
158*61046927SAndroid Build Coastguard Worker
159*61046927SAndroid Build Coastguard Worker /*
160*61046927SAndroid Build Coastguard Worker * Extract a single channel out of a vector source. We split vectors with SPLIT
161*61046927SAndroid Build Coastguard Worker * so we can use the split components directly, without emitting an extract.
162*61046927SAndroid Build Coastguard Worker * This has advantages of RA, as the split can usually be optimized away.
163*61046927SAndroid Build Coastguard Worker */
164*61046927SAndroid Build Coastguard Worker static bi_index
bi_extract(bi_builder * b,bi_index vec,unsigned channel)165*61046927SAndroid Build Coastguard Worker bi_extract(bi_builder *b, bi_index vec, unsigned channel)
166*61046927SAndroid Build Coastguard Worker {
167*61046927SAndroid Build Coastguard Worker bi_index *components = _mesa_hash_table_u64_search(b->shader->allocated_vec,
168*61046927SAndroid Build Coastguard Worker bi_index_to_key(vec));
169*61046927SAndroid Build Coastguard Worker
170*61046927SAndroid Build Coastguard Worker /* No extract needed for scalars.
171*61046927SAndroid Build Coastguard Worker *
172*61046927SAndroid Build Coastguard Worker * This is a bit imprecise, but actual bugs (missing splits for vectors)
173*61046927SAndroid Build Coastguard Worker * should be caught by the following assertion. It is too difficult to
174*61046927SAndroid Build Coastguard Worker * ensure bi_extract is only called for real vectors.
175*61046927SAndroid Build Coastguard Worker */
176*61046927SAndroid Build Coastguard Worker if (components == NULL && channel == 0)
177*61046927SAndroid Build Coastguard Worker return vec;
178*61046927SAndroid Build Coastguard Worker
179*61046927SAndroid Build Coastguard Worker assert(components != NULL && "missing bi_cache_collect()");
180*61046927SAndroid Build Coastguard Worker return components[channel];
181*61046927SAndroid Build Coastguard Worker }
182*61046927SAndroid Build Coastguard Worker
183*61046927SAndroid Build Coastguard Worker static void
bi_cache_collect(bi_builder * b,bi_index dst,bi_index * s,unsigned n)184*61046927SAndroid Build Coastguard Worker bi_cache_collect(bi_builder *b, bi_index dst, bi_index *s, unsigned n)
185*61046927SAndroid Build Coastguard Worker {
186*61046927SAndroid Build Coastguard Worker /* Lifetime of a hash table entry has to be at least as long as the table */
187*61046927SAndroid Build Coastguard Worker bi_index *channels = ralloc_array(b->shader, bi_index, n);
188*61046927SAndroid Build Coastguard Worker memcpy(channels, s, sizeof(bi_index) * n);
189*61046927SAndroid Build Coastguard Worker
190*61046927SAndroid Build Coastguard Worker _mesa_hash_table_u64_insert(b->shader->allocated_vec, bi_index_to_key(dst),
191*61046927SAndroid Build Coastguard Worker channels);
192*61046927SAndroid Build Coastguard Worker }
193*61046927SAndroid Build Coastguard Worker
194*61046927SAndroid Build Coastguard Worker /*
195*61046927SAndroid Build Coastguard Worker * Splits an n-component vector (vec) into n scalar destinations (dests) using a
196*61046927SAndroid Build Coastguard Worker * split pseudo-instruction.
197*61046927SAndroid Build Coastguard Worker *
198*61046927SAndroid Build Coastguard Worker * Pre-condition: dests is filled with bi_null().
199*61046927SAndroid Build Coastguard Worker */
200*61046927SAndroid Build Coastguard Worker static void
bi_emit_split_i32(bi_builder * b,bi_index dests[4],bi_index vec,unsigned n)201*61046927SAndroid Build Coastguard Worker bi_emit_split_i32(bi_builder *b, bi_index dests[4], bi_index vec, unsigned n)
202*61046927SAndroid Build Coastguard Worker {
203*61046927SAndroid Build Coastguard Worker /* Setup the destinations */
204*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < n; ++i) {
205*61046927SAndroid Build Coastguard Worker dests[i] = bi_temp(b->shader);
206*61046927SAndroid Build Coastguard Worker }
207*61046927SAndroid Build Coastguard Worker
208*61046927SAndroid Build Coastguard Worker /* Emit the split */
209*61046927SAndroid Build Coastguard Worker if (n == 1) {
210*61046927SAndroid Build Coastguard Worker bi_mov_i32_to(b, dests[0], vec);
211*61046927SAndroid Build Coastguard Worker } else {
212*61046927SAndroid Build Coastguard Worker bi_instr *I = bi_split_i32_to(b, n, vec);
213*61046927SAndroid Build Coastguard Worker
214*61046927SAndroid Build Coastguard Worker bi_foreach_dest(I, j)
215*61046927SAndroid Build Coastguard Worker I->dest[j] = dests[j];
216*61046927SAndroid Build Coastguard Worker }
217*61046927SAndroid Build Coastguard Worker }
218*61046927SAndroid Build Coastguard Worker
219*61046927SAndroid Build Coastguard Worker static void
bi_emit_cached_split_i32(bi_builder * b,bi_index vec,unsigned n)220*61046927SAndroid Build Coastguard Worker bi_emit_cached_split_i32(bi_builder *b, bi_index vec, unsigned n)
221*61046927SAndroid Build Coastguard Worker {
222*61046927SAndroid Build Coastguard Worker bi_index dests[4] = {bi_null(), bi_null(), bi_null(), bi_null()};
223*61046927SAndroid Build Coastguard Worker bi_emit_split_i32(b, dests, vec, n);
224*61046927SAndroid Build Coastguard Worker bi_cache_collect(b, vec, dests, n);
225*61046927SAndroid Build Coastguard Worker }
226*61046927SAndroid Build Coastguard Worker
227*61046927SAndroid Build Coastguard Worker /*
228*61046927SAndroid Build Coastguard Worker * Emit and cache a split for a vector of a given bitsize. The vector may not be
229*61046927SAndroid Build Coastguard Worker * composed of 32-bit words, but it will be split at 32-bit word boundaries.
230*61046927SAndroid Build Coastguard Worker */
231*61046927SAndroid Build Coastguard Worker static void
bi_emit_cached_split(bi_builder * b,bi_index vec,unsigned bits)232*61046927SAndroid Build Coastguard Worker bi_emit_cached_split(bi_builder *b, bi_index vec, unsigned bits)
233*61046927SAndroid Build Coastguard Worker {
234*61046927SAndroid Build Coastguard Worker bi_emit_cached_split_i32(b, vec, DIV_ROUND_UP(bits, 32));
235*61046927SAndroid Build Coastguard Worker }
236*61046927SAndroid Build Coastguard Worker
237*61046927SAndroid Build Coastguard Worker static void
bi_split_def(bi_builder * b,nir_def * def)238*61046927SAndroid Build Coastguard Worker bi_split_def(bi_builder *b, nir_def *def)
239*61046927SAndroid Build Coastguard Worker {
240*61046927SAndroid Build Coastguard Worker bi_emit_cached_split(b, bi_def_index(def),
241*61046927SAndroid Build Coastguard Worker def->bit_size * def->num_components);
242*61046927SAndroid Build Coastguard Worker }
243*61046927SAndroid Build Coastguard Worker
244*61046927SAndroid Build Coastguard Worker static bi_instr *
bi_emit_collect_to(bi_builder * b,bi_index dst,bi_index * chan,unsigned n)245*61046927SAndroid Build Coastguard Worker bi_emit_collect_to(bi_builder *b, bi_index dst, bi_index *chan, unsigned n)
246*61046927SAndroid Build Coastguard Worker {
247*61046927SAndroid Build Coastguard Worker /* Special case: COLLECT of a single value is a scalar move */
248*61046927SAndroid Build Coastguard Worker if (n == 1)
249*61046927SAndroid Build Coastguard Worker return bi_mov_i32_to(b, dst, chan[0]);
250*61046927SAndroid Build Coastguard Worker
251*61046927SAndroid Build Coastguard Worker bi_instr *I = bi_collect_i32_to(b, dst, n);
252*61046927SAndroid Build Coastguard Worker
253*61046927SAndroid Build Coastguard Worker bi_foreach_src(I, i)
254*61046927SAndroid Build Coastguard Worker I->src[i] = chan[i];
255*61046927SAndroid Build Coastguard Worker
256*61046927SAndroid Build Coastguard Worker bi_cache_collect(b, dst, chan, n);
257*61046927SAndroid Build Coastguard Worker return I;
258*61046927SAndroid Build Coastguard Worker }
259*61046927SAndroid Build Coastguard Worker
260*61046927SAndroid Build Coastguard Worker static bi_instr *
bi_collect_v2i32_to(bi_builder * b,bi_index dst,bi_index s0,bi_index s1)261*61046927SAndroid Build Coastguard Worker bi_collect_v2i32_to(bi_builder *b, bi_index dst, bi_index s0, bi_index s1)
262*61046927SAndroid Build Coastguard Worker {
263*61046927SAndroid Build Coastguard Worker return bi_emit_collect_to(b, dst, (bi_index[]){s0, s1}, 2);
264*61046927SAndroid Build Coastguard Worker }
265*61046927SAndroid Build Coastguard Worker
266*61046927SAndroid Build Coastguard Worker static bi_instr *
bi_collect_v3i32_to(bi_builder * b,bi_index dst,bi_index s0,bi_index s1,bi_index s2)267*61046927SAndroid Build Coastguard Worker bi_collect_v3i32_to(bi_builder *b, bi_index dst, bi_index s0, bi_index s1,
268*61046927SAndroid Build Coastguard Worker bi_index s2)
269*61046927SAndroid Build Coastguard Worker {
270*61046927SAndroid Build Coastguard Worker return bi_emit_collect_to(b, dst, (bi_index[]){s0, s1, s2}, 3);
271*61046927SAndroid Build Coastguard Worker }
272*61046927SAndroid Build Coastguard Worker
273*61046927SAndroid Build Coastguard Worker static bi_index
bi_collect_v2i32(bi_builder * b,bi_index s0,bi_index s1)274*61046927SAndroid Build Coastguard Worker bi_collect_v2i32(bi_builder *b, bi_index s0, bi_index s1)
275*61046927SAndroid Build Coastguard Worker {
276*61046927SAndroid Build Coastguard Worker bi_index dst = bi_temp(b->shader);
277*61046927SAndroid Build Coastguard Worker bi_collect_v2i32_to(b, dst, s0, s1);
278*61046927SAndroid Build Coastguard Worker return dst;
279*61046927SAndroid Build Coastguard Worker }
280*61046927SAndroid Build Coastguard Worker
281*61046927SAndroid Build Coastguard Worker static bi_index
bi_varying_src0_for_barycentric(bi_builder * b,nir_intrinsic_instr * intr)282*61046927SAndroid Build Coastguard Worker bi_varying_src0_for_barycentric(bi_builder *b, nir_intrinsic_instr *intr)
283*61046927SAndroid Build Coastguard Worker {
284*61046927SAndroid Build Coastguard Worker switch (intr->intrinsic) {
285*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_barycentric_centroid:
286*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_barycentric_sample:
287*61046927SAndroid Build Coastguard Worker return bi_preload(b, 61);
288*61046927SAndroid Build Coastguard Worker
289*61046927SAndroid Build Coastguard Worker /* Need to put the sample ID in the top 16-bits */
290*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_barycentric_at_sample:
291*61046927SAndroid Build Coastguard Worker return bi_mkvec_v2i16(b, bi_half(bi_dontcare(b), false),
292*61046927SAndroid Build Coastguard Worker bi_half(bi_src_index(&intr->src[0]), false));
293*61046927SAndroid Build Coastguard Worker
294*61046927SAndroid Build Coastguard Worker /* Interpret as 8:8 signed fixed point positions in pixels along X and
295*61046927SAndroid Build Coastguard Worker * Y axes respectively, relative to top-left of pixel. In NIR, (0, 0)
296*61046927SAndroid Build Coastguard Worker * is the center of the pixel so we first fixup and then convert. For
297*61046927SAndroid Build Coastguard Worker * fp16 input:
298*61046927SAndroid Build Coastguard Worker *
299*61046927SAndroid Build Coastguard Worker * f2i16(((x, y) + (0.5, 0.5)) * 2**8) =
300*61046927SAndroid Build Coastguard Worker * f2i16((256 * (x, y)) + (128, 128)) =
301*61046927SAndroid Build Coastguard Worker * V2F16_TO_V2S16(FMA.v2f16((x, y), #256, #128))
302*61046927SAndroid Build Coastguard Worker *
303*61046927SAndroid Build Coastguard Worker * For fp32 input, that lacks enough precision for MSAA 16x, but the
304*61046927SAndroid Build Coastguard Worker * idea is the same. FIXME: still doesn't pass
305*61046927SAndroid Build Coastguard Worker */
306*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_barycentric_at_offset: {
307*61046927SAndroid Build Coastguard Worker bi_index offset = bi_src_index(&intr->src[0]);
308*61046927SAndroid Build Coastguard Worker bi_index f16 = bi_null();
309*61046927SAndroid Build Coastguard Worker unsigned sz = nir_src_bit_size(intr->src[0]);
310*61046927SAndroid Build Coastguard Worker
311*61046927SAndroid Build Coastguard Worker if (sz == 16) {
312*61046927SAndroid Build Coastguard Worker f16 = bi_fma_v2f16(b, offset, bi_imm_f16(256.0), bi_imm_f16(128.0));
313*61046927SAndroid Build Coastguard Worker } else {
314*61046927SAndroid Build Coastguard Worker assert(sz == 32);
315*61046927SAndroid Build Coastguard Worker bi_index f[2];
316*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < 2; ++i) {
317*61046927SAndroid Build Coastguard Worker f[i] =
318*61046927SAndroid Build Coastguard Worker bi_fadd_rscale_f32(b, bi_extract(b, offset, i), bi_imm_f32(0.5),
319*61046927SAndroid Build Coastguard Worker bi_imm_u32(8), BI_SPECIAL_NONE);
320*61046927SAndroid Build Coastguard Worker }
321*61046927SAndroid Build Coastguard Worker
322*61046927SAndroid Build Coastguard Worker f16 = bi_v2f32_to_v2f16(b, f[0], f[1]);
323*61046927SAndroid Build Coastguard Worker }
324*61046927SAndroid Build Coastguard Worker
325*61046927SAndroid Build Coastguard Worker return bi_v2f16_to_v2s16(b, f16);
326*61046927SAndroid Build Coastguard Worker }
327*61046927SAndroid Build Coastguard Worker
328*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_barycentric_pixel:
329*61046927SAndroid Build Coastguard Worker default:
330*61046927SAndroid Build Coastguard Worker return b->shader->arch >= 9 ? bi_preload(b, 61) : bi_dontcare(b);
331*61046927SAndroid Build Coastguard Worker }
332*61046927SAndroid Build Coastguard Worker }
333*61046927SAndroid Build Coastguard Worker
334*61046927SAndroid Build Coastguard Worker static enum bi_sample
bi_interp_for_intrinsic(nir_intrinsic_op op)335*61046927SAndroid Build Coastguard Worker bi_interp_for_intrinsic(nir_intrinsic_op op)
336*61046927SAndroid Build Coastguard Worker {
337*61046927SAndroid Build Coastguard Worker switch (op) {
338*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_barycentric_centroid:
339*61046927SAndroid Build Coastguard Worker return BI_SAMPLE_CENTROID;
340*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_barycentric_sample:
341*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_barycentric_at_sample:
342*61046927SAndroid Build Coastguard Worker return BI_SAMPLE_SAMPLE;
343*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_barycentric_at_offset:
344*61046927SAndroid Build Coastguard Worker return BI_SAMPLE_EXPLICIT;
345*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_barycentric_pixel:
346*61046927SAndroid Build Coastguard Worker default:
347*61046927SAndroid Build Coastguard Worker return BI_SAMPLE_CENTER;
348*61046927SAndroid Build Coastguard Worker }
349*61046927SAndroid Build Coastguard Worker }
350*61046927SAndroid Build Coastguard Worker
351*61046927SAndroid Build Coastguard Worker /* auto, 64-bit omitted */
352*61046927SAndroid Build Coastguard Worker static enum bi_register_format
bi_reg_fmt_for_nir(nir_alu_type T)353*61046927SAndroid Build Coastguard Worker bi_reg_fmt_for_nir(nir_alu_type T)
354*61046927SAndroid Build Coastguard Worker {
355*61046927SAndroid Build Coastguard Worker switch (T) {
356*61046927SAndroid Build Coastguard Worker case nir_type_float16:
357*61046927SAndroid Build Coastguard Worker return BI_REGISTER_FORMAT_F16;
358*61046927SAndroid Build Coastguard Worker case nir_type_float32:
359*61046927SAndroid Build Coastguard Worker return BI_REGISTER_FORMAT_F32;
360*61046927SAndroid Build Coastguard Worker case nir_type_int16:
361*61046927SAndroid Build Coastguard Worker return BI_REGISTER_FORMAT_S16;
362*61046927SAndroid Build Coastguard Worker case nir_type_uint16:
363*61046927SAndroid Build Coastguard Worker return BI_REGISTER_FORMAT_U16;
364*61046927SAndroid Build Coastguard Worker case nir_type_int32:
365*61046927SAndroid Build Coastguard Worker return BI_REGISTER_FORMAT_S32;
366*61046927SAndroid Build Coastguard Worker case nir_type_uint32:
367*61046927SAndroid Build Coastguard Worker return BI_REGISTER_FORMAT_U32;
368*61046927SAndroid Build Coastguard Worker default:
369*61046927SAndroid Build Coastguard Worker unreachable("Invalid type for register format");
370*61046927SAndroid Build Coastguard Worker }
371*61046927SAndroid Build Coastguard Worker }
372*61046927SAndroid Build Coastguard Worker
373*61046927SAndroid Build Coastguard Worker static bool
va_is_valid_const_narrow_index(bi_index idx)374*61046927SAndroid Build Coastguard Worker va_is_valid_const_narrow_index(bi_index idx)
375*61046927SAndroid Build Coastguard Worker {
376*61046927SAndroid Build Coastguard Worker if (idx.type != BI_INDEX_CONSTANT)
377*61046927SAndroid Build Coastguard Worker return false;
378*61046927SAndroid Build Coastguard Worker
379*61046927SAndroid Build Coastguard Worker unsigned index = pan_res_handle_get_index(idx.value);
380*61046927SAndroid Build Coastguard Worker unsigned table_index = pan_res_handle_get_table(idx.value);
381*61046927SAndroid Build Coastguard Worker
382*61046927SAndroid Build Coastguard Worker return index < 1024 && va_is_valid_const_table(table_index);
383*61046927SAndroid Build Coastguard Worker }
384*61046927SAndroid Build Coastguard Worker
385*61046927SAndroid Build Coastguard Worker /* Checks if the _IMM variant of an intrinsic can be used, returning in imm the
386*61046927SAndroid Build Coastguard Worker * immediate to be used (which applies even if _IMM can't be used) */
387*61046927SAndroid Build Coastguard Worker
388*61046927SAndroid Build Coastguard Worker static bool
bi_is_intr_immediate(nir_intrinsic_instr * instr,unsigned * immediate,unsigned max)389*61046927SAndroid Build Coastguard Worker bi_is_intr_immediate(nir_intrinsic_instr *instr, unsigned *immediate,
390*61046927SAndroid Build Coastguard Worker unsigned max)
391*61046927SAndroid Build Coastguard Worker {
392*61046927SAndroid Build Coastguard Worker nir_src *offset = nir_get_io_offset_src(instr);
393*61046927SAndroid Build Coastguard Worker
394*61046927SAndroid Build Coastguard Worker if (!nir_src_is_const(*offset))
395*61046927SAndroid Build Coastguard Worker return false;
396*61046927SAndroid Build Coastguard Worker
397*61046927SAndroid Build Coastguard Worker *immediate = nir_intrinsic_base(instr) + nir_src_as_uint(*offset);
398*61046927SAndroid Build Coastguard Worker return (*immediate) < max;
399*61046927SAndroid Build Coastguard Worker }
400*61046927SAndroid Build Coastguard Worker
401*61046927SAndroid Build Coastguard Worker static bool
bi_is_imm_desc_handle(bi_builder * b,nir_intrinsic_instr * instr,uint32_t * immediate,unsigned max)402*61046927SAndroid Build Coastguard Worker bi_is_imm_desc_handle(bi_builder *b, nir_intrinsic_instr *instr,
403*61046927SAndroid Build Coastguard Worker uint32_t *immediate, unsigned max)
404*61046927SAndroid Build Coastguard Worker {
405*61046927SAndroid Build Coastguard Worker nir_src *offset = nir_get_io_offset_src(instr);
406*61046927SAndroid Build Coastguard Worker
407*61046927SAndroid Build Coastguard Worker if (!nir_src_is_const(*offset))
408*61046927SAndroid Build Coastguard Worker return false;
409*61046927SAndroid Build Coastguard Worker
410*61046927SAndroid Build Coastguard Worker if (b->shader->arch >= 9) {
411*61046927SAndroid Build Coastguard Worker uint32_t res_handle =
412*61046927SAndroid Build Coastguard Worker nir_intrinsic_base(instr) + nir_src_as_uint(*offset);
413*61046927SAndroid Build Coastguard Worker uint32_t table_index = pan_res_handle_get_table(res_handle);
414*61046927SAndroid Build Coastguard Worker uint32_t res_index = pan_res_handle_get_index(res_handle);
415*61046927SAndroid Build Coastguard Worker
416*61046927SAndroid Build Coastguard Worker if (!va_is_valid_const_table(table_index) || res_index >= max)
417*61046927SAndroid Build Coastguard Worker return false;
418*61046927SAndroid Build Coastguard Worker
419*61046927SAndroid Build Coastguard Worker *immediate = res_handle;
420*61046927SAndroid Build Coastguard Worker return true;
421*61046927SAndroid Build Coastguard Worker }
422*61046927SAndroid Build Coastguard Worker
423*61046927SAndroid Build Coastguard Worker return bi_is_intr_immediate(instr, immediate, max);
424*61046927SAndroid Build Coastguard Worker }
425*61046927SAndroid Build Coastguard Worker
426*61046927SAndroid Build Coastguard Worker static bool
bi_is_imm_var_desc_handle(bi_builder * b,nir_intrinsic_instr * instr,uint32_t * immediate)427*61046927SAndroid Build Coastguard Worker bi_is_imm_var_desc_handle(bi_builder *b, nir_intrinsic_instr *instr,
428*61046927SAndroid Build Coastguard Worker uint32_t *immediate)
429*61046927SAndroid Build Coastguard Worker {
430*61046927SAndroid Build Coastguard Worker unsigned max = b->shader->arch >= 9 ? 256 : 20;
431*61046927SAndroid Build Coastguard Worker
432*61046927SAndroid Build Coastguard Worker return bi_is_imm_desc_handle(b, instr, immediate, max);
433*61046927SAndroid Build Coastguard Worker }
434*61046927SAndroid Build Coastguard Worker
435*61046927SAndroid Build Coastguard Worker static void bi_make_vec_to(bi_builder *b, bi_index final_dst, bi_index *src,
436*61046927SAndroid Build Coastguard Worker unsigned *channel, unsigned count, unsigned bitsize);
437*61046927SAndroid Build Coastguard Worker
438*61046927SAndroid Build Coastguard Worker /* Bifrost's load instructions lack a component offset despite operating in
439*61046927SAndroid Build Coastguard Worker * terms of vec4 slots. Usually I/O vectorization avoids nonzero components,
440*61046927SAndroid Build Coastguard Worker * but they may be unavoidable with separate shaders in use. To solve this, we
441*61046927SAndroid Build Coastguard Worker * lower to a larger load and an explicit copy of the desired components. */
442*61046927SAndroid Build Coastguard Worker
443*61046927SAndroid Build Coastguard Worker static void
bi_copy_component(bi_builder * b,nir_intrinsic_instr * instr,bi_index tmp)444*61046927SAndroid Build Coastguard Worker bi_copy_component(bi_builder *b, nir_intrinsic_instr *instr, bi_index tmp)
445*61046927SAndroid Build Coastguard Worker {
446*61046927SAndroid Build Coastguard Worker unsigned component = nir_intrinsic_component(instr);
447*61046927SAndroid Build Coastguard Worker unsigned nr = instr->num_components;
448*61046927SAndroid Build Coastguard Worker unsigned total = nr + component;
449*61046927SAndroid Build Coastguard Worker unsigned bitsize = instr->def.bit_size;
450*61046927SAndroid Build Coastguard Worker
451*61046927SAndroid Build Coastguard Worker assert(total <= 4 && "should be vec4");
452*61046927SAndroid Build Coastguard Worker bi_emit_cached_split(b, tmp, total * bitsize);
453*61046927SAndroid Build Coastguard Worker
454*61046927SAndroid Build Coastguard Worker if (component == 0)
455*61046927SAndroid Build Coastguard Worker return;
456*61046927SAndroid Build Coastguard Worker
457*61046927SAndroid Build Coastguard Worker bi_index srcs[] = {tmp, tmp, tmp};
458*61046927SAndroid Build Coastguard Worker unsigned channels[] = {component, component + 1, component + 2};
459*61046927SAndroid Build Coastguard Worker
460*61046927SAndroid Build Coastguard Worker bi_make_vec_to(b, bi_def_index(&instr->def), srcs, channels, nr,
461*61046927SAndroid Build Coastguard Worker instr->def.bit_size);
462*61046927SAndroid Build Coastguard Worker }
463*61046927SAndroid Build Coastguard Worker
464*61046927SAndroid Build Coastguard Worker static void
bi_emit_load_attr(bi_builder * b,nir_intrinsic_instr * instr)465*61046927SAndroid Build Coastguard Worker bi_emit_load_attr(bi_builder *b, nir_intrinsic_instr *instr)
466*61046927SAndroid Build Coastguard Worker {
467*61046927SAndroid Build Coastguard Worker /* Disregard the signedness of an integer, since loading 32-bits into a
468*61046927SAndroid Build Coastguard Worker * 32-bit register should be bit exact so should not incur any clamping.
469*61046927SAndroid Build Coastguard Worker *
470*61046927SAndroid Build Coastguard Worker * If we are reading as a u32, then it must be paired with an integer (u32 or
471*61046927SAndroid Build Coastguard Worker * s32) source, so use .auto32 to disregard.
472*61046927SAndroid Build Coastguard Worker */
473*61046927SAndroid Build Coastguard Worker nir_alu_type T = nir_intrinsic_dest_type(instr);
474*61046927SAndroid Build Coastguard Worker assert(T == nir_type_uint32 || T == nir_type_int32 || T == nir_type_float32);
475*61046927SAndroid Build Coastguard Worker enum bi_register_format regfmt =
476*61046927SAndroid Build Coastguard Worker T == nir_type_float32 ? BI_REGISTER_FORMAT_F32 : BI_REGISTER_FORMAT_AUTO;
477*61046927SAndroid Build Coastguard Worker
478*61046927SAndroid Build Coastguard Worker nir_src *offset = nir_get_io_offset_src(instr);
479*61046927SAndroid Build Coastguard Worker unsigned component = nir_intrinsic_component(instr);
480*61046927SAndroid Build Coastguard Worker enum bi_vecsize vecsize = (instr->num_components + component - 1);
481*61046927SAndroid Build Coastguard Worker unsigned imm_index = 0;
482*61046927SAndroid Build Coastguard Worker unsigned base = nir_intrinsic_base(instr);
483*61046927SAndroid Build Coastguard Worker bool constant = nir_src_is_const(*offset);
484*61046927SAndroid Build Coastguard Worker bool immediate = bi_is_imm_desc_handle(b, instr, &imm_index, 16);
485*61046927SAndroid Build Coastguard Worker bi_index dest =
486*61046927SAndroid Build Coastguard Worker (component == 0) ? bi_def_index(&instr->def) : bi_temp(b->shader);
487*61046927SAndroid Build Coastguard Worker bi_instr *I;
488*61046927SAndroid Build Coastguard Worker
489*61046927SAndroid Build Coastguard Worker if (immediate) {
490*61046927SAndroid Build Coastguard Worker I = bi_ld_attr_imm_to(b, dest, bi_vertex_id(b), bi_instance_id(b), regfmt,
491*61046927SAndroid Build Coastguard Worker vecsize, pan_res_handle_get_index(imm_index));
492*61046927SAndroid Build Coastguard Worker
493*61046927SAndroid Build Coastguard Worker if (b->shader->arch >= 9)
494*61046927SAndroid Build Coastguard Worker I->table = va_res_fold_table_idx(pan_res_handle_get_table(base));
495*61046927SAndroid Build Coastguard Worker } else {
496*61046927SAndroid Build Coastguard Worker bi_index idx = bi_src_index(&instr->src[0]);
497*61046927SAndroid Build Coastguard Worker
498*61046927SAndroid Build Coastguard Worker if (constant)
499*61046927SAndroid Build Coastguard Worker idx = bi_imm_u32(imm_index);
500*61046927SAndroid Build Coastguard Worker else if (base != 0)
501*61046927SAndroid Build Coastguard Worker idx = bi_iadd_u32(b, idx, bi_imm_u32(base), false);
502*61046927SAndroid Build Coastguard Worker
503*61046927SAndroid Build Coastguard Worker I = bi_ld_attr_to(b, dest, bi_vertex_id(b), bi_instance_id(b), idx,
504*61046927SAndroid Build Coastguard Worker regfmt, vecsize);
505*61046927SAndroid Build Coastguard Worker }
506*61046927SAndroid Build Coastguard Worker
507*61046927SAndroid Build Coastguard Worker bi_copy_component(b, instr, dest);
508*61046927SAndroid Build Coastguard Worker }
509*61046927SAndroid Build Coastguard Worker
510*61046927SAndroid Build Coastguard Worker /*
511*61046927SAndroid Build Coastguard Worker * ABI: Special (desktop GL) slots come first, tightly packed. General varyings
512*61046927SAndroid Build Coastguard Worker * come later, sparsely packed. This handles both linked and separable shaders
513*61046927SAndroid Build Coastguard Worker * with a common code path, with minimal keying only for desktop GL. Each slot
514*61046927SAndroid Build Coastguard Worker * consumes 16 bytes (TODO: fp16, partial vectors).
515*61046927SAndroid Build Coastguard Worker */
516*61046927SAndroid Build Coastguard Worker static unsigned
bi_varying_base_bytes(bi_context * ctx,nir_intrinsic_instr * intr)517*61046927SAndroid Build Coastguard Worker bi_varying_base_bytes(bi_context *ctx, nir_intrinsic_instr *intr)
518*61046927SAndroid Build Coastguard Worker {
519*61046927SAndroid Build Coastguard Worker nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
520*61046927SAndroid Build Coastguard Worker uint32_t mask = ctx->inputs->fixed_varying_mask;
521*61046927SAndroid Build Coastguard Worker
522*61046927SAndroid Build Coastguard Worker if (sem.location >= VARYING_SLOT_VAR0) {
523*61046927SAndroid Build Coastguard Worker unsigned nr_special = util_bitcount(mask);
524*61046927SAndroid Build Coastguard Worker unsigned general_index = (sem.location - VARYING_SLOT_VAR0);
525*61046927SAndroid Build Coastguard Worker
526*61046927SAndroid Build Coastguard Worker return 16 * (nr_special + general_index);
527*61046927SAndroid Build Coastguard Worker } else {
528*61046927SAndroid Build Coastguard Worker return 16 * (util_bitcount(mask & BITFIELD_MASK(sem.location)));
529*61046927SAndroid Build Coastguard Worker }
530*61046927SAndroid Build Coastguard Worker }
531*61046927SAndroid Build Coastguard Worker
532*61046927SAndroid Build Coastguard Worker /*
533*61046927SAndroid Build Coastguard Worker * Compute the offset in bytes of a varying with an immediate offset, adding the
534*61046927SAndroid Build Coastguard Worker * offset to the base computed above. Convenience method.
535*61046927SAndroid Build Coastguard Worker */
536*61046927SAndroid Build Coastguard Worker static unsigned
bi_varying_offset(bi_context * ctx,nir_intrinsic_instr * intr)537*61046927SAndroid Build Coastguard Worker bi_varying_offset(bi_context *ctx, nir_intrinsic_instr *intr)
538*61046927SAndroid Build Coastguard Worker {
539*61046927SAndroid Build Coastguard Worker nir_src *src = nir_get_io_offset_src(intr);
540*61046927SAndroid Build Coastguard Worker assert(nir_src_is_const(*src) && "assumes immediate offset");
541*61046927SAndroid Build Coastguard Worker
542*61046927SAndroid Build Coastguard Worker return bi_varying_base_bytes(ctx, intr) + (nir_src_as_uint(*src) * 16);
543*61046927SAndroid Build Coastguard Worker }
544*61046927SAndroid Build Coastguard Worker
545*61046927SAndroid Build Coastguard Worker static void
bi_emit_load_vary(bi_builder * b,nir_intrinsic_instr * instr)546*61046927SAndroid Build Coastguard Worker bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr)
547*61046927SAndroid Build Coastguard Worker {
548*61046927SAndroid Build Coastguard Worker enum bi_sample sample = BI_SAMPLE_CENTER;
549*61046927SAndroid Build Coastguard Worker enum bi_update update = BI_UPDATE_STORE;
550*61046927SAndroid Build Coastguard Worker enum bi_register_format regfmt = BI_REGISTER_FORMAT_AUTO;
551*61046927SAndroid Build Coastguard Worker bool smooth = instr->intrinsic == nir_intrinsic_load_interpolated_input;
552*61046927SAndroid Build Coastguard Worker bi_index src0 = bi_null();
553*61046927SAndroid Build Coastguard Worker
554*61046927SAndroid Build Coastguard Worker unsigned component = nir_intrinsic_component(instr);
555*61046927SAndroid Build Coastguard Worker enum bi_vecsize vecsize = (instr->num_components + component - 1);
556*61046927SAndroid Build Coastguard Worker bi_index dest =
557*61046927SAndroid Build Coastguard Worker (component == 0) ? bi_def_index(&instr->def) : bi_temp(b->shader);
558*61046927SAndroid Build Coastguard Worker
559*61046927SAndroid Build Coastguard Worker unsigned sz = instr->def.bit_size;
560*61046927SAndroid Build Coastguard Worker
561*61046927SAndroid Build Coastguard Worker if (smooth) {
562*61046927SAndroid Build Coastguard Worker nir_intrinsic_instr *parent = nir_src_as_intrinsic(instr->src[0]);
563*61046927SAndroid Build Coastguard Worker assert(parent);
564*61046927SAndroid Build Coastguard Worker
565*61046927SAndroid Build Coastguard Worker sample = bi_interp_for_intrinsic(parent->intrinsic);
566*61046927SAndroid Build Coastguard Worker src0 = bi_varying_src0_for_barycentric(b, parent);
567*61046927SAndroid Build Coastguard Worker
568*61046927SAndroid Build Coastguard Worker assert(sz == 16 || sz == 32);
569*61046927SAndroid Build Coastguard Worker regfmt = (sz == 16) ? BI_REGISTER_FORMAT_F16 : BI_REGISTER_FORMAT_F32;
570*61046927SAndroid Build Coastguard Worker } else {
571*61046927SAndroid Build Coastguard Worker assert(sz == 32);
572*61046927SAndroid Build Coastguard Worker regfmt = BI_REGISTER_FORMAT_U32;
573*61046927SAndroid Build Coastguard Worker
574*61046927SAndroid Build Coastguard Worker /* Valhall can't have bi_null() here, although the source is
575*61046927SAndroid Build Coastguard Worker * logically unused for flat varyings
576*61046927SAndroid Build Coastguard Worker */
577*61046927SAndroid Build Coastguard Worker if (b->shader->arch >= 9)
578*61046927SAndroid Build Coastguard Worker src0 = bi_preload(b, 61);
579*61046927SAndroid Build Coastguard Worker
580*61046927SAndroid Build Coastguard Worker /* Gather info as we go */
581*61046927SAndroid Build Coastguard Worker b->shader->info.bifrost->uses_flat_shading = true;
582*61046927SAndroid Build Coastguard Worker }
583*61046927SAndroid Build Coastguard Worker
584*61046927SAndroid Build Coastguard Worker enum bi_source_format source_format =
585*61046927SAndroid Build Coastguard Worker smooth ? BI_SOURCE_FORMAT_F32 : BI_SOURCE_FORMAT_FLAT32;
586*61046927SAndroid Build Coastguard Worker
587*61046927SAndroid Build Coastguard Worker nir_src *offset = nir_get_io_offset_src(instr);
588*61046927SAndroid Build Coastguard Worker unsigned imm_index = 0;
589*61046927SAndroid Build Coastguard Worker bool immediate = bi_is_imm_var_desc_handle(b, instr, &imm_index);
590*61046927SAndroid Build Coastguard Worker unsigned base = nir_intrinsic_base(instr);
591*61046927SAndroid Build Coastguard Worker
592*61046927SAndroid Build Coastguard Worker /* On Valhall, ensure the table and index are valid for usage with immediate
593*61046927SAndroid Build Coastguard Worker * form when IDVS isn't used */
594*61046927SAndroid Build Coastguard Worker if (b->shader->arch >= 9 && !b->shader->malloc_idvs)
595*61046927SAndroid Build Coastguard Worker immediate &= va_is_valid_const_table(pan_res_handle_get_table(base)) &&
596*61046927SAndroid Build Coastguard Worker pan_res_handle_get_index(base) < 256;
597*61046927SAndroid Build Coastguard Worker
598*61046927SAndroid Build Coastguard Worker if (b->shader->malloc_idvs && immediate) {
599*61046927SAndroid Build Coastguard Worker /* Immediate index given in bytes. */
600*61046927SAndroid Build Coastguard Worker bi_ld_var_buf_imm_to(b, sz, dest, src0, regfmt, sample, source_format,
601*61046927SAndroid Build Coastguard Worker update, vecsize,
602*61046927SAndroid Build Coastguard Worker bi_varying_offset(b->shader, instr));
603*61046927SAndroid Build Coastguard Worker } else if (immediate) {
604*61046927SAndroid Build Coastguard Worker bi_instr *I;
605*61046927SAndroid Build Coastguard Worker
606*61046927SAndroid Build Coastguard Worker if (smooth) {
607*61046927SAndroid Build Coastguard Worker I = bi_ld_var_imm_to(b, dest, src0, regfmt, sample, update, vecsize,
608*61046927SAndroid Build Coastguard Worker pan_res_handle_get_index(imm_index));
609*61046927SAndroid Build Coastguard Worker } else {
610*61046927SAndroid Build Coastguard Worker I = bi_ld_var_flat_imm_to(b, dest, BI_FUNCTION_NONE, regfmt, vecsize,
611*61046927SAndroid Build Coastguard Worker pan_res_handle_get_index(imm_index));
612*61046927SAndroid Build Coastguard Worker }
613*61046927SAndroid Build Coastguard Worker
614*61046927SAndroid Build Coastguard Worker /* Valhall usually uses machine-allocated IDVS. If this is disabled,
615*61046927SAndroid Build Coastguard Worker * use a simple Midgard-style ABI.
616*61046927SAndroid Build Coastguard Worker */
617*61046927SAndroid Build Coastguard Worker if (b->shader->arch >= 9)
618*61046927SAndroid Build Coastguard Worker I->table = va_res_fold_table_idx(pan_res_handle_get_table(base));
619*61046927SAndroid Build Coastguard Worker } else {
620*61046927SAndroid Build Coastguard Worker bi_index idx = bi_src_index(offset);
621*61046927SAndroid Build Coastguard Worker
622*61046927SAndroid Build Coastguard Worker if (b->shader->malloc_idvs) {
623*61046927SAndroid Build Coastguard Worker /* Index needs to be in bytes, but NIR gives the index
624*61046927SAndroid Build Coastguard Worker * in slots. For now assume 16 bytes per element.
625*61046927SAndroid Build Coastguard Worker */
626*61046927SAndroid Build Coastguard Worker bi_index idx_bytes = bi_lshift_or_i32(b, idx, bi_zero(), bi_imm_u8(4));
627*61046927SAndroid Build Coastguard Worker unsigned vbase = bi_varying_base_bytes(b->shader, instr);
628*61046927SAndroid Build Coastguard Worker
629*61046927SAndroid Build Coastguard Worker if (vbase != 0)
630*61046927SAndroid Build Coastguard Worker idx_bytes = bi_iadd_u32(b, idx, bi_imm_u32(vbase), false);
631*61046927SAndroid Build Coastguard Worker
632*61046927SAndroid Build Coastguard Worker bi_ld_var_buf_to(b, sz, dest, src0, idx_bytes, regfmt, sample,
633*61046927SAndroid Build Coastguard Worker source_format, update, vecsize);
634*61046927SAndroid Build Coastguard Worker } else {
635*61046927SAndroid Build Coastguard Worker if (base != 0)
636*61046927SAndroid Build Coastguard Worker idx = bi_iadd_u32(b, idx, bi_imm_u32(base), false);
637*61046927SAndroid Build Coastguard Worker
638*61046927SAndroid Build Coastguard Worker if (smooth)
639*61046927SAndroid Build Coastguard Worker bi_ld_var_to(b, dest, src0, idx, regfmt, sample, update, vecsize);
640*61046927SAndroid Build Coastguard Worker else
641*61046927SAndroid Build Coastguard Worker bi_ld_var_flat_to(b, dest, idx, BI_FUNCTION_NONE, regfmt, vecsize);
642*61046927SAndroid Build Coastguard Worker }
643*61046927SAndroid Build Coastguard Worker }
644*61046927SAndroid Build Coastguard Worker
645*61046927SAndroid Build Coastguard Worker bi_copy_component(b, instr, dest);
646*61046927SAndroid Build Coastguard Worker }
647*61046927SAndroid Build Coastguard Worker
648*61046927SAndroid Build Coastguard Worker static bi_index
bi_make_vec8_helper(bi_builder * b,bi_index * src,unsigned * channel,unsigned count)649*61046927SAndroid Build Coastguard Worker bi_make_vec8_helper(bi_builder *b, bi_index *src, unsigned *channel,
650*61046927SAndroid Build Coastguard Worker unsigned count)
651*61046927SAndroid Build Coastguard Worker {
652*61046927SAndroid Build Coastguard Worker assert(1 <= count && count <= 4);
653*61046927SAndroid Build Coastguard Worker
654*61046927SAndroid Build Coastguard Worker bi_index bytes[4] = {bi_imm_u8(0), bi_imm_u8(0), bi_imm_u8(0), bi_imm_u8(0)};
655*61046927SAndroid Build Coastguard Worker
656*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < count; ++i) {
657*61046927SAndroid Build Coastguard Worker unsigned chan = channel ? channel[i] : 0;
658*61046927SAndroid Build Coastguard Worker unsigned lane = chan & 3;
659*61046927SAndroid Build Coastguard Worker bi_index raw_data = bi_extract(b, src[i], chan >> 2);
660*61046927SAndroid Build Coastguard Worker
661*61046927SAndroid Build Coastguard Worker /* On Bifrost, MKVEC.v4i8 cannot select b1 or b3 */
662*61046927SAndroid Build Coastguard Worker if (b->shader->arch < 9 && lane != 0 && lane != 2) {
663*61046927SAndroid Build Coastguard Worker bytes[i] = bi_byte(bi_rshift_or(b, 32, raw_data, bi_zero(),
664*61046927SAndroid Build Coastguard Worker bi_imm_u8(lane * 8), false),
665*61046927SAndroid Build Coastguard Worker 0);
666*61046927SAndroid Build Coastguard Worker } else {
667*61046927SAndroid Build Coastguard Worker bytes[i] = bi_byte(raw_data, lane);
668*61046927SAndroid Build Coastguard Worker }
669*61046927SAndroid Build Coastguard Worker
670*61046927SAndroid Build Coastguard Worker assert(b->shader->arch >= 9 || bytes[i].swizzle == BI_SWIZZLE_B0000 ||
671*61046927SAndroid Build Coastguard Worker bytes[i].swizzle == BI_SWIZZLE_B2222);
672*61046927SAndroid Build Coastguard Worker }
673*61046927SAndroid Build Coastguard Worker
674*61046927SAndroid Build Coastguard Worker if (b->shader->arch >= 9) {
675*61046927SAndroid Build Coastguard Worker bi_index vec = bi_zero();
676*61046927SAndroid Build Coastguard Worker
677*61046927SAndroid Build Coastguard Worker if (count >= 3)
678*61046927SAndroid Build Coastguard Worker vec = bi_mkvec_v2i8(b, bytes[2], bytes[3], vec);
679*61046927SAndroid Build Coastguard Worker
680*61046927SAndroid Build Coastguard Worker return bi_mkvec_v2i8(b, bytes[0], bytes[1], vec);
681*61046927SAndroid Build Coastguard Worker } else {
682*61046927SAndroid Build Coastguard Worker return bi_mkvec_v4i8(b, bytes[0], bytes[1], bytes[2], bytes[3]);
683*61046927SAndroid Build Coastguard Worker }
684*61046927SAndroid Build Coastguard Worker }
685*61046927SAndroid Build Coastguard Worker
686*61046927SAndroid Build Coastguard Worker static bi_index
bi_make_vec16_helper(bi_builder * b,bi_index * src,unsigned * channel,unsigned count)687*61046927SAndroid Build Coastguard Worker bi_make_vec16_helper(bi_builder *b, bi_index *src, unsigned *channel,
688*61046927SAndroid Build Coastguard Worker unsigned count)
689*61046927SAndroid Build Coastguard Worker {
690*61046927SAndroid Build Coastguard Worker unsigned chan0 = channel ? channel[0] : 0;
691*61046927SAndroid Build Coastguard Worker bi_index w0 = bi_extract(b, src[0], chan0 >> 1);
692*61046927SAndroid Build Coastguard Worker bi_index h0 = bi_half(w0, chan0 & 1);
693*61046927SAndroid Build Coastguard Worker
694*61046927SAndroid Build Coastguard Worker /* Zero extend */
695*61046927SAndroid Build Coastguard Worker if (count == 1)
696*61046927SAndroid Build Coastguard Worker return bi_mkvec_v2i16(b, h0, bi_imm_u16(0));
697*61046927SAndroid Build Coastguard Worker
698*61046927SAndroid Build Coastguard Worker /* Else, create a vector */
699*61046927SAndroid Build Coastguard Worker assert(count == 2);
700*61046927SAndroid Build Coastguard Worker
701*61046927SAndroid Build Coastguard Worker unsigned chan1 = channel ? channel[1] : 0;
702*61046927SAndroid Build Coastguard Worker bi_index w1 = bi_extract(b, src[1], chan1 >> 1);
703*61046927SAndroid Build Coastguard Worker bi_index h1 = bi_half(w1, chan1 & 1);
704*61046927SAndroid Build Coastguard Worker
705*61046927SAndroid Build Coastguard Worker if (bi_is_word_equiv(w0, w1) && (chan0 & 1) == 0 && ((chan1 & 1) == 1))
706*61046927SAndroid Build Coastguard Worker return bi_mov_i32(b, w0);
707*61046927SAndroid Build Coastguard Worker else if (bi_is_word_equiv(w0, w1))
708*61046927SAndroid Build Coastguard Worker return bi_swz_v2i16(b, bi_swz_16(w0, chan0 & 1, chan1 & 1));
709*61046927SAndroid Build Coastguard Worker else
710*61046927SAndroid Build Coastguard Worker return bi_mkvec_v2i16(b, h0, h1);
711*61046927SAndroid Build Coastguard Worker }
712*61046927SAndroid Build Coastguard Worker
713*61046927SAndroid Build Coastguard Worker static void
bi_make_vec_to(bi_builder * b,bi_index dst,bi_index * src,unsigned * channel,unsigned count,unsigned bitsize)714*61046927SAndroid Build Coastguard Worker bi_make_vec_to(bi_builder *b, bi_index dst, bi_index *src, unsigned *channel,
715*61046927SAndroid Build Coastguard Worker unsigned count, unsigned bitsize)
716*61046927SAndroid Build Coastguard Worker {
717*61046927SAndroid Build Coastguard Worker assert(bitsize == 8 || bitsize == 16 || bitsize == 32);
718*61046927SAndroid Build Coastguard Worker unsigned shift = (bitsize == 32) ? 0 : (bitsize == 16) ? 1 : 2;
719*61046927SAndroid Build Coastguard Worker unsigned chan_per_word = 1 << shift;
720*61046927SAndroid Build Coastguard Worker
721*61046927SAndroid Build Coastguard Worker assert(DIV_ROUND_UP(count * bitsize, 32) <= BI_MAX_SRCS &&
722*61046927SAndroid Build Coastguard Worker "unnecessarily large vector should have been lowered");
723*61046927SAndroid Build Coastguard Worker
724*61046927SAndroid Build Coastguard Worker bi_index srcs[BI_MAX_VEC];
725*61046927SAndroid Build Coastguard Worker
726*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < count; i += chan_per_word) {
727*61046927SAndroid Build Coastguard Worker unsigned rem = MIN2(count - i, chan_per_word);
728*61046927SAndroid Build Coastguard Worker unsigned *channel_offset = channel ? (channel + i) : NULL;
729*61046927SAndroid Build Coastguard Worker
730*61046927SAndroid Build Coastguard Worker if (bitsize == 32)
731*61046927SAndroid Build Coastguard Worker srcs[i] = bi_extract(b, src[i], channel_offset ? *channel_offset : 0);
732*61046927SAndroid Build Coastguard Worker else if (bitsize == 16)
733*61046927SAndroid Build Coastguard Worker srcs[i >> 1] = bi_make_vec16_helper(b, src + i, channel_offset, rem);
734*61046927SAndroid Build Coastguard Worker else
735*61046927SAndroid Build Coastguard Worker srcs[i >> 2] = bi_make_vec8_helper(b, src + i, channel_offset, rem);
736*61046927SAndroid Build Coastguard Worker }
737*61046927SAndroid Build Coastguard Worker
738*61046927SAndroid Build Coastguard Worker bi_emit_collect_to(b, dst, srcs, DIV_ROUND_UP(count, chan_per_word));
739*61046927SAndroid Build Coastguard Worker }
740*61046927SAndroid Build Coastguard Worker
741*61046927SAndroid Build Coastguard Worker static inline bi_instr *
bi_load_ubo_to(bi_builder * b,unsigned bitsize,bi_index dest0,bi_index src0,bi_index src1)742*61046927SAndroid Build Coastguard Worker bi_load_ubo_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0,
743*61046927SAndroid Build Coastguard Worker bi_index src1)
744*61046927SAndroid Build Coastguard Worker {
745*61046927SAndroid Build Coastguard Worker bi_instr *I;
746*61046927SAndroid Build Coastguard Worker
747*61046927SAndroid Build Coastguard Worker if (b->shader->arch >= 9) {
748*61046927SAndroid Build Coastguard Worker I = bi_ld_buffer_to(b, bitsize, dest0, src0, src1);
749*61046927SAndroid Build Coastguard Worker I->seg = BI_SEG_UBO;
750*61046927SAndroid Build Coastguard Worker } else {
751*61046927SAndroid Build Coastguard Worker I = bi_load_to(b, bitsize, dest0, src0, src1, BI_SEG_UBO, 0);
752*61046927SAndroid Build Coastguard Worker }
753*61046927SAndroid Build Coastguard Worker
754*61046927SAndroid Build Coastguard Worker bi_emit_cached_split(b, dest0, bitsize);
755*61046927SAndroid Build Coastguard Worker return I;
756*61046927SAndroid Build Coastguard Worker }
757*61046927SAndroid Build Coastguard Worker
758*61046927SAndroid Build Coastguard Worker static void
bi_load_sample_id_to(bi_builder * b,bi_index dst)759*61046927SAndroid Build Coastguard Worker bi_load_sample_id_to(bi_builder *b, bi_index dst)
760*61046927SAndroid Build Coastguard Worker {
761*61046927SAndroid Build Coastguard Worker /* r61[16:23] contains the sampleID, mask it out. Upper bits
762*61046927SAndroid Build Coastguard Worker * seem to read garbage (despite being architecturally defined
763*61046927SAndroid Build Coastguard Worker * as zero), so use a 5-bit mask instead of 8-bits */
764*61046927SAndroid Build Coastguard Worker
765*61046927SAndroid Build Coastguard Worker bi_rshift_and_i32_to(b, dst, bi_preload(b, 61), bi_imm_u32(0x1f),
766*61046927SAndroid Build Coastguard Worker bi_imm_u8(16), false);
767*61046927SAndroid Build Coastguard Worker }
768*61046927SAndroid Build Coastguard Worker
769*61046927SAndroid Build Coastguard Worker static bi_index
bi_load_sample_id(bi_builder * b)770*61046927SAndroid Build Coastguard Worker bi_load_sample_id(bi_builder *b)
771*61046927SAndroid Build Coastguard Worker {
772*61046927SAndroid Build Coastguard Worker bi_index sample_id = bi_temp(b->shader);
773*61046927SAndroid Build Coastguard Worker bi_load_sample_id_to(b, sample_id);
774*61046927SAndroid Build Coastguard Worker return sample_id;
775*61046927SAndroid Build Coastguard Worker }
776*61046927SAndroid Build Coastguard Worker
777*61046927SAndroid Build Coastguard Worker static bi_index
bi_pixel_indices(bi_builder * b,unsigned rt)778*61046927SAndroid Build Coastguard Worker bi_pixel_indices(bi_builder *b, unsigned rt)
779*61046927SAndroid Build Coastguard Worker {
780*61046927SAndroid Build Coastguard Worker /* We want to load the current pixel. */
781*61046927SAndroid Build Coastguard Worker struct bifrost_pixel_indices pix = {.y = BIFROST_CURRENT_PIXEL, .rt = rt};
782*61046927SAndroid Build Coastguard Worker
783*61046927SAndroid Build Coastguard Worker uint32_t indices_u32 = 0;
784*61046927SAndroid Build Coastguard Worker memcpy(&indices_u32, &pix, sizeof(indices_u32));
785*61046927SAndroid Build Coastguard Worker bi_index indices = bi_imm_u32(indices_u32);
786*61046927SAndroid Build Coastguard Worker
787*61046927SAndroid Build Coastguard Worker /* Sample index above is left as zero. For multisampling, we need to
788*61046927SAndroid Build Coastguard Worker * fill in the actual sample ID in the lower byte */
789*61046927SAndroid Build Coastguard Worker
790*61046927SAndroid Build Coastguard Worker if (b->shader->inputs->blend.nr_samples > 1)
791*61046927SAndroid Build Coastguard Worker indices = bi_iadd_u32(b, indices, bi_load_sample_id(b), false);
792*61046927SAndroid Build Coastguard Worker
793*61046927SAndroid Build Coastguard Worker return indices;
794*61046927SAndroid Build Coastguard Worker }
795*61046927SAndroid Build Coastguard Worker
796*61046927SAndroid Build Coastguard Worker /* Source color is passed through r0-r3, or r4-r7 for the second source when
797*61046927SAndroid Build Coastguard Worker * dual-source blending. Preload the corresponding vector.
798*61046927SAndroid Build Coastguard Worker */
799*61046927SAndroid Build Coastguard Worker static void
bi_emit_load_blend_input(bi_builder * b,nir_intrinsic_instr * instr)800*61046927SAndroid Build Coastguard Worker bi_emit_load_blend_input(bi_builder *b, nir_intrinsic_instr *instr)
801*61046927SAndroid Build Coastguard Worker {
802*61046927SAndroid Build Coastguard Worker nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
803*61046927SAndroid Build Coastguard Worker unsigned base = (sem.location == VARYING_SLOT_VAR0) ? 4 : 0;
804*61046927SAndroid Build Coastguard Worker unsigned size = nir_alu_type_get_type_size(nir_intrinsic_dest_type(instr));
805*61046927SAndroid Build Coastguard Worker assert(size == 16 || size == 32);
806*61046927SAndroid Build Coastguard Worker
807*61046927SAndroid Build Coastguard Worker bi_index srcs[] = {bi_preload(b, base + 0), bi_preload(b, base + 1),
808*61046927SAndroid Build Coastguard Worker bi_preload(b, base + 2), bi_preload(b, base + 3)};
809*61046927SAndroid Build Coastguard Worker
810*61046927SAndroid Build Coastguard Worker bi_emit_collect_to(b, bi_def_index(&instr->def), srcs, size == 32 ? 4 : 2);
811*61046927SAndroid Build Coastguard Worker }
812*61046927SAndroid Build Coastguard Worker
813*61046927SAndroid Build Coastguard Worker static void
bi_emit_blend_op(bi_builder * b,bi_index rgba,nir_alu_type T,bi_index rgba2,nir_alu_type T2,unsigned rt)814*61046927SAndroid Build Coastguard Worker bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T, bi_index rgba2,
815*61046927SAndroid Build Coastguard Worker nir_alu_type T2, unsigned rt)
816*61046927SAndroid Build Coastguard Worker {
817*61046927SAndroid Build Coastguard Worker /* Reads 2 or 4 staging registers to cover the input */
818*61046927SAndroid Build Coastguard Worker unsigned size = nir_alu_type_get_type_size(T);
819*61046927SAndroid Build Coastguard Worker unsigned size_2 = nir_alu_type_get_type_size(T2);
820*61046927SAndroid Build Coastguard Worker unsigned sr_count = (size <= 16) ? 2 : 4;
821*61046927SAndroid Build Coastguard Worker unsigned sr_count_2 = (size_2 <= 16) ? 2 : 4;
822*61046927SAndroid Build Coastguard Worker const struct panfrost_compile_inputs *inputs = b->shader->inputs;
823*61046927SAndroid Build Coastguard Worker uint64_t blend_desc = inputs->blend.bifrost_blend_desc;
824*61046927SAndroid Build Coastguard Worker enum bi_register_format regfmt = bi_reg_fmt_for_nir(T);
825*61046927SAndroid Build Coastguard Worker
826*61046927SAndroid Build Coastguard Worker /* Workaround for NIR-to-TGSI */
827*61046927SAndroid Build Coastguard Worker if (b->shader->nir->info.fs.untyped_color_outputs)
828*61046927SAndroid Build Coastguard Worker regfmt = BI_REGISTER_FORMAT_AUTO;
829*61046927SAndroid Build Coastguard Worker
830*61046927SAndroid Build Coastguard Worker if (inputs->is_blend && inputs->blend.nr_samples > 1) {
831*61046927SAndroid Build Coastguard Worker /* Conversion descriptor comes from the compile inputs, pixel
832*61046927SAndroid Build Coastguard Worker * indices derived at run time based on sample ID */
833*61046927SAndroid Build Coastguard Worker bi_st_tile(b, rgba, bi_pixel_indices(b, rt), bi_coverage(b),
834*61046927SAndroid Build Coastguard Worker bi_imm_u32(blend_desc >> 32), regfmt, BI_VECSIZE_V4);
835*61046927SAndroid Build Coastguard Worker } else if (b->shader->inputs->is_blend) {
836*61046927SAndroid Build Coastguard Worker uint64_t blend_desc = b->shader->inputs->blend.bifrost_blend_desc;
837*61046927SAndroid Build Coastguard Worker
838*61046927SAndroid Build Coastguard Worker /* Blend descriptor comes from the compile inputs */
839*61046927SAndroid Build Coastguard Worker /* Put the result in r0 */
840*61046927SAndroid Build Coastguard Worker
841*61046927SAndroid Build Coastguard Worker bi_blend_to(b, bi_temp(b->shader), rgba, bi_coverage(b),
842*61046927SAndroid Build Coastguard Worker bi_imm_u32(blend_desc), bi_imm_u32(blend_desc >> 32),
843*61046927SAndroid Build Coastguard Worker bi_null(), regfmt, sr_count, 0);
844*61046927SAndroid Build Coastguard Worker } else {
845*61046927SAndroid Build Coastguard Worker /* Blend descriptor comes from the FAU RAM. By convention, the
846*61046927SAndroid Build Coastguard Worker * return address on Bifrost is stored in r48 and will be used
847*61046927SAndroid Build Coastguard Worker * by the blend shader to jump back to the fragment shader */
848*61046927SAndroid Build Coastguard Worker
849*61046927SAndroid Build Coastguard Worker bi_blend_to(b, bi_temp(b->shader), rgba, bi_coverage(b),
850*61046927SAndroid Build Coastguard Worker bi_fau(BIR_FAU_BLEND_0 + rt, false),
851*61046927SAndroid Build Coastguard Worker bi_fau(BIR_FAU_BLEND_0 + rt, true), rgba2, regfmt, sr_count,
852*61046927SAndroid Build Coastguard Worker sr_count_2);
853*61046927SAndroid Build Coastguard Worker }
854*61046927SAndroid Build Coastguard Worker
855*61046927SAndroid Build Coastguard Worker assert(rt < 8);
856*61046927SAndroid Build Coastguard Worker b->shader->info.bifrost->blend[rt].type = T;
857*61046927SAndroid Build Coastguard Worker
858*61046927SAndroid Build Coastguard Worker if (T2)
859*61046927SAndroid Build Coastguard Worker b->shader->info.bifrost->blend_src1_type = T2;
860*61046927SAndroid Build Coastguard Worker }
861*61046927SAndroid Build Coastguard Worker
862*61046927SAndroid Build Coastguard Worker /* Blend shaders do not need to run ATEST since they are dependent on a
863*61046927SAndroid Build Coastguard Worker * fragment shader that runs it. Blit shaders may not need to run ATEST, since
864*61046927SAndroid Build Coastguard Worker * ATEST is not needed if early-z is forced, alpha-to-coverage is disabled, and
865*61046927SAndroid Build Coastguard Worker * there are no writes to the coverage mask. The latter two are satisfied for
866*61046927SAndroid Build Coastguard Worker * all blit shaders, so we just care about early-z, which blit shaders force
867*61046927SAndroid Build Coastguard Worker * iff they do not write depth or stencil */
868*61046927SAndroid Build Coastguard Worker
869*61046927SAndroid Build Coastguard Worker static bool
bi_skip_atest(bi_context * ctx,bool emit_zs)870*61046927SAndroid Build Coastguard Worker bi_skip_atest(bi_context *ctx, bool emit_zs)
871*61046927SAndroid Build Coastguard Worker {
872*61046927SAndroid Build Coastguard Worker return (ctx->inputs->is_blit && !emit_zs) || ctx->inputs->is_blend;
873*61046927SAndroid Build Coastguard Worker }
874*61046927SAndroid Build Coastguard Worker
875*61046927SAndroid Build Coastguard Worker static void
bi_emit_atest(bi_builder * b,bi_index alpha)876*61046927SAndroid Build Coastguard Worker bi_emit_atest(bi_builder *b, bi_index alpha)
877*61046927SAndroid Build Coastguard Worker {
878*61046927SAndroid Build Coastguard Worker b->shader->coverage =
879*61046927SAndroid Build Coastguard Worker bi_atest(b, bi_coverage(b), alpha, bi_fau(BIR_FAU_ATEST_PARAM, false));
880*61046927SAndroid Build Coastguard Worker b->shader->emitted_atest = true;
881*61046927SAndroid Build Coastguard Worker }
882*61046927SAndroid Build Coastguard Worker
883*61046927SAndroid Build Coastguard Worker static bi_index
bi_src_color_vec4(bi_builder * b,nir_src * src,nir_alu_type T)884*61046927SAndroid Build Coastguard Worker bi_src_color_vec4(bi_builder *b, nir_src *src, nir_alu_type T)
885*61046927SAndroid Build Coastguard Worker {
886*61046927SAndroid Build Coastguard Worker unsigned num_components = nir_src_num_components(*src);
887*61046927SAndroid Build Coastguard Worker bi_index base = bi_src_index(src);
888*61046927SAndroid Build Coastguard Worker
889*61046927SAndroid Build Coastguard Worker /* short-circuit the common case */
890*61046927SAndroid Build Coastguard Worker if (num_components == 4)
891*61046927SAndroid Build Coastguard Worker return base;
892*61046927SAndroid Build Coastguard Worker
893*61046927SAndroid Build Coastguard Worker unsigned size = nir_alu_type_get_type_size(T);
894*61046927SAndroid Build Coastguard Worker assert(size == 16 || size == 32);
895*61046927SAndroid Build Coastguard Worker
896*61046927SAndroid Build Coastguard Worker bi_index src_vals[4];
897*61046927SAndroid Build Coastguard Worker
898*61046927SAndroid Build Coastguard Worker unsigned i;
899*61046927SAndroid Build Coastguard Worker for (i = 0; i < num_components; i++)
900*61046927SAndroid Build Coastguard Worker src_vals[i] = bi_extract(b, base, i);
901*61046927SAndroid Build Coastguard Worker
902*61046927SAndroid Build Coastguard Worker for (; i < 3; i++)
903*61046927SAndroid Build Coastguard Worker src_vals[i] = (size == 16) ? bi_imm_f16(0.0) : bi_imm_f32(0.0);
904*61046927SAndroid Build Coastguard Worker src_vals[3] = (size == 16) ? bi_imm_f16(1.0) : bi_imm_f32(1.0);
905*61046927SAndroid Build Coastguard Worker bi_index temp = bi_temp(b->shader);
906*61046927SAndroid Build Coastguard Worker bi_make_vec_to(b, temp, src_vals, NULL, 4, size);
907*61046927SAndroid Build Coastguard Worker return temp;
908*61046927SAndroid Build Coastguard Worker }
909*61046927SAndroid Build Coastguard Worker
910*61046927SAndroid Build Coastguard Worker static void
bi_emit_fragment_out(bi_builder * b,nir_intrinsic_instr * instr)911*61046927SAndroid Build Coastguard Worker bi_emit_fragment_out(bi_builder *b, nir_intrinsic_instr *instr)
912*61046927SAndroid Build Coastguard Worker {
913*61046927SAndroid Build Coastguard Worker bool combined = instr->intrinsic == nir_intrinsic_store_combined_output_pan;
914*61046927SAndroid Build Coastguard Worker
915*61046927SAndroid Build Coastguard Worker unsigned writeout =
916*61046927SAndroid Build Coastguard Worker combined ? nir_intrinsic_component(instr) : PAN_WRITEOUT_C;
917*61046927SAndroid Build Coastguard Worker
918*61046927SAndroid Build Coastguard Worker bool emit_blend = writeout & (PAN_WRITEOUT_C);
919*61046927SAndroid Build Coastguard Worker bool emit_zs = writeout & (PAN_WRITEOUT_Z | PAN_WRITEOUT_S);
920*61046927SAndroid Build Coastguard Worker
921*61046927SAndroid Build Coastguard Worker unsigned loc = nir_intrinsic_io_semantics(instr).location;
922*61046927SAndroid Build Coastguard Worker bi_index src0 = bi_src_index(&instr->src[0]);
923*61046927SAndroid Build Coastguard Worker
924*61046927SAndroid Build Coastguard Worker /* By ISA convention, the coverage mask is stored in R60. The store
925*61046927SAndroid Build Coastguard Worker * itself will be handled by a subsequent ATEST instruction */
926*61046927SAndroid Build Coastguard Worker if (loc == FRAG_RESULT_SAMPLE_MASK) {
927*61046927SAndroid Build Coastguard Worker b->shader->coverage = bi_extract(b, src0, 0);
928*61046927SAndroid Build Coastguard Worker return;
929*61046927SAndroid Build Coastguard Worker }
930*61046927SAndroid Build Coastguard Worker
931*61046927SAndroid Build Coastguard Worker /* Emit ATEST if we have to, note ATEST requires a floating-point alpha
932*61046927SAndroid Build Coastguard Worker * value, but render target #0 might not be floating point. However the
933*61046927SAndroid Build Coastguard Worker * alpha value is only used for alpha-to-coverage, a stage which is
934*61046927SAndroid Build Coastguard Worker * skipped for pure integer framebuffers, so the issue is moot. */
935*61046927SAndroid Build Coastguard Worker
936*61046927SAndroid Build Coastguard Worker if (!b->shader->emitted_atest && !bi_skip_atest(b->shader, emit_zs)) {
937*61046927SAndroid Build Coastguard Worker nir_alu_type T = nir_intrinsic_src_type(instr);
938*61046927SAndroid Build Coastguard Worker
939*61046927SAndroid Build Coastguard Worker bi_index rgba = bi_src_index(&instr->src[0]);
940*61046927SAndroid Build Coastguard Worker bi_index alpha;
941*61046927SAndroid Build Coastguard Worker
942*61046927SAndroid Build Coastguard Worker if (nir_src_num_components(instr->src[0]) < 4) {
943*61046927SAndroid Build Coastguard Worker /* Don't read out-of-bounds */
944*61046927SAndroid Build Coastguard Worker alpha = bi_imm_f32(1.0);
945*61046927SAndroid Build Coastguard Worker } else if (T == nir_type_float16) {
946*61046927SAndroid Build Coastguard Worker alpha = bi_half(bi_extract(b, rgba, 1), true);
947*61046927SAndroid Build Coastguard Worker } else if (T == nir_type_float32) {
948*61046927SAndroid Build Coastguard Worker alpha = bi_extract(b, rgba, 3);
949*61046927SAndroid Build Coastguard Worker } else {
950*61046927SAndroid Build Coastguard Worker alpha = bi_dontcare(b);
951*61046927SAndroid Build Coastguard Worker }
952*61046927SAndroid Build Coastguard Worker bi_emit_atest(b, alpha);
953*61046927SAndroid Build Coastguard Worker }
954*61046927SAndroid Build Coastguard Worker
955*61046927SAndroid Build Coastguard Worker if (emit_zs) {
956*61046927SAndroid Build Coastguard Worker bi_index z = bi_dontcare(b), s = bi_dontcare(b);
957*61046927SAndroid Build Coastguard Worker
958*61046927SAndroid Build Coastguard Worker if (writeout & PAN_WRITEOUT_Z)
959*61046927SAndroid Build Coastguard Worker z = bi_src_index(&instr->src[2]);
960*61046927SAndroid Build Coastguard Worker
961*61046927SAndroid Build Coastguard Worker if (writeout & PAN_WRITEOUT_S)
962*61046927SAndroid Build Coastguard Worker s = bi_src_index(&instr->src[3]);
963*61046927SAndroid Build Coastguard Worker
964*61046927SAndroid Build Coastguard Worker b->shader->coverage =
965*61046927SAndroid Build Coastguard Worker bi_zs_emit(b, z, s, bi_coverage(b), writeout & PAN_WRITEOUT_S,
966*61046927SAndroid Build Coastguard Worker writeout & PAN_WRITEOUT_Z);
967*61046927SAndroid Build Coastguard Worker }
968*61046927SAndroid Build Coastguard Worker
969*61046927SAndroid Build Coastguard Worker if (emit_blend) {
970*61046927SAndroid Build Coastguard Worker unsigned rt = loc ? (loc - FRAG_RESULT_DATA0) : 0;
971*61046927SAndroid Build Coastguard Worker bool dual = (writeout & PAN_WRITEOUT_2);
972*61046927SAndroid Build Coastguard Worker nir_alu_type T = nir_intrinsic_src_type(instr);
973*61046927SAndroid Build Coastguard Worker nir_alu_type T2 = dual ? nir_intrinsic_dest_type(instr) : 0;
974*61046927SAndroid Build Coastguard Worker bi_index color = bi_src_color_vec4(b, &instr->src[0], T);
975*61046927SAndroid Build Coastguard Worker bi_index color2 =
976*61046927SAndroid Build Coastguard Worker dual ? bi_src_color_vec4(b, &instr->src[4], T2) : bi_null();
977*61046927SAndroid Build Coastguard Worker
978*61046927SAndroid Build Coastguard Worker if (instr->intrinsic == nir_intrinsic_store_output &&
979*61046927SAndroid Build Coastguard Worker loc >= FRAG_RESULT_DATA0 && loc <= FRAG_RESULT_DATA7) {
980*61046927SAndroid Build Coastguard Worker assert(nir_src_is_const(instr->src[1]) && "no indirect outputs");
981*61046927SAndroid Build Coastguard Worker
982*61046927SAndroid Build Coastguard Worker unsigned rt_offs = nir_src_as_uint(instr->src[1]);
983*61046927SAndroid Build Coastguard Worker
984*61046927SAndroid Build Coastguard Worker assert(rt + rt_offs < 8 && "RT not in the [0-7] range");
985*61046927SAndroid Build Coastguard Worker rt += rt_offs;
986*61046927SAndroid Build Coastguard Worker }
987*61046927SAndroid Build Coastguard Worker
988*61046927SAndroid Build Coastguard Worker /* Explicit copy since BLEND inputs are precoloured to R0-R3,
989*61046927SAndroid Build Coastguard Worker * TODO: maybe schedule around this or implement in RA as a
990*61046927SAndroid Build Coastguard Worker * spill */
991*61046927SAndroid Build Coastguard Worker bool has_mrt =
992*61046927SAndroid Build Coastguard Worker (b->shader->nir->info.outputs_written >> FRAG_RESULT_DATA1);
993*61046927SAndroid Build Coastguard Worker
994*61046927SAndroid Build Coastguard Worker if (has_mrt) {
995*61046927SAndroid Build Coastguard Worker bi_index srcs[4] = {color, color, color, color};
996*61046927SAndroid Build Coastguard Worker unsigned channels[4] = {0, 1, 2, 3};
997*61046927SAndroid Build Coastguard Worker color = bi_temp(b->shader);
998*61046927SAndroid Build Coastguard Worker bi_make_vec_to(
999*61046927SAndroid Build Coastguard Worker b, color, srcs, channels, nir_src_num_components(instr->src[0]),
1000*61046927SAndroid Build Coastguard Worker nir_alu_type_get_type_size(nir_intrinsic_src_type(instr)));
1001*61046927SAndroid Build Coastguard Worker }
1002*61046927SAndroid Build Coastguard Worker
1003*61046927SAndroid Build Coastguard Worker bi_emit_blend_op(b, color, nir_intrinsic_src_type(instr), color2, T2, rt);
1004*61046927SAndroid Build Coastguard Worker }
1005*61046927SAndroid Build Coastguard Worker
1006*61046927SAndroid Build Coastguard Worker if (b->shader->inputs->is_blend) {
1007*61046927SAndroid Build Coastguard Worker /* Jump back to the fragment shader, return address is stored
1008*61046927SAndroid Build Coastguard Worker * in r48 (see above). On Valhall, only jump if the address is
1009*61046927SAndroid Build Coastguard Worker * nonzero. The check is free there and it implements the "jump
1010*61046927SAndroid Build Coastguard Worker * to 0 terminates the blend shader" that's automatic on
1011*61046927SAndroid Build Coastguard Worker * Bifrost.
1012*61046927SAndroid Build Coastguard Worker */
1013*61046927SAndroid Build Coastguard Worker if (b->shader->arch >= 8)
1014*61046927SAndroid Build Coastguard Worker bi_branchzi(b, bi_preload(b, 48), bi_preload(b, 48), BI_CMPF_NE);
1015*61046927SAndroid Build Coastguard Worker else
1016*61046927SAndroid Build Coastguard Worker bi_jump(b, bi_preload(b, 48));
1017*61046927SAndroid Build Coastguard Worker }
1018*61046927SAndroid Build Coastguard Worker }
1019*61046927SAndroid Build Coastguard Worker
1020*61046927SAndroid Build Coastguard Worker /**
1021*61046927SAndroid Build Coastguard Worker * In a vertex shader, is the specified variable a position output? These kinds
1022*61046927SAndroid Build Coastguard Worker * of outputs are written from position shaders when IDVS is enabled. All other
1023*61046927SAndroid Build Coastguard Worker * outputs are written from the varying shader.
1024*61046927SAndroid Build Coastguard Worker */
1025*61046927SAndroid Build Coastguard Worker static bool
bi_should_remove_store(nir_intrinsic_instr * intr,enum bi_idvs_mode idvs)1026*61046927SAndroid Build Coastguard Worker bi_should_remove_store(nir_intrinsic_instr *intr, enum bi_idvs_mode idvs)
1027*61046927SAndroid Build Coastguard Worker {
1028*61046927SAndroid Build Coastguard Worker nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
1029*61046927SAndroid Build Coastguard Worker
1030*61046927SAndroid Build Coastguard Worker switch (sem.location) {
1031*61046927SAndroid Build Coastguard Worker case VARYING_SLOT_POS:
1032*61046927SAndroid Build Coastguard Worker case VARYING_SLOT_PSIZ:
1033*61046927SAndroid Build Coastguard Worker case VARYING_SLOT_LAYER:
1034*61046927SAndroid Build Coastguard Worker return idvs == BI_IDVS_VARYING;
1035*61046927SAndroid Build Coastguard Worker default:
1036*61046927SAndroid Build Coastguard Worker return idvs == BI_IDVS_POSITION;
1037*61046927SAndroid Build Coastguard Worker }
1038*61046927SAndroid Build Coastguard Worker }
1039*61046927SAndroid Build Coastguard Worker
1040*61046927SAndroid Build Coastguard Worker static bool
bifrost_nir_specialize_idvs(nir_builder * b,nir_instr * instr,void * data)1041*61046927SAndroid Build Coastguard Worker bifrost_nir_specialize_idvs(nir_builder *b, nir_instr *instr, void *data)
1042*61046927SAndroid Build Coastguard Worker {
1043*61046927SAndroid Build Coastguard Worker enum bi_idvs_mode *idvs = data;
1044*61046927SAndroid Build Coastguard Worker
1045*61046927SAndroid Build Coastguard Worker if (instr->type != nir_instr_type_intrinsic)
1046*61046927SAndroid Build Coastguard Worker return false;
1047*61046927SAndroid Build Coastguard Worker
1048*61046927SAndroid Build Coastguard Worker nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1049*61046927SAndroid Build Coastguard Worker
1050*61046927SAndroid Build Coastguard Worker if (intr->intrinsic != nir_intrinsic_store_output)
1051*61046927SAndroid Build Coastguard Worker return false;
1052*61046927SAndroid Build Coastguard Worker
1053*61046927SAndroid Build Coastguard Worker if (bi_should_remove_store(intr, *idvs)) {
1054*61046927SAndroid Build Coastguard Worker nir_instr_remove(instr);
1055*61046927SAndroid Build Coastguard Worker return true;
1056*61046927SAndroid Build Coastguard Worker }
1057*61046927SAndroid Build Coastguard Worker
1058*61046927SAndroid Build Coastguard Worker return false;
1059*61046927SAndroid Build Coastguard Worker }
1060*61046927SAndroid Build Coastguard Worker
1061*61046927SAndroid Build Coastguard Worker static void
bi_emit_store_vary(bi_builder * b,nir_intrinsic_instr * instr)1062*61046927SAndroid Build Coastguard Worker bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr)
1063*61046927SAndroid Build Coastguard Worker {
1064*61046927SAndroid Build Coastguard Worker /* In principle we can do better for 16-bit. At the moment we require
1065*61046927SAndroid Build Coastguard Worker * 32-bit to permit the use of .auto, in order to force .u32 for flat
1066*61046927SAndroid Build Coastguard Worker * varyings, to handle internal TGSI shaders that set flat in the VS
1067*61046927SAndroid Build Coastguard Worker * but smooth in the FS */
1068*61046927SAndroid Build Coastguard Worker
1069*61046927SAndroid Build Coastguard Worker ASSERTED nir_alu_type T = nir_intrinsic_src_type(instr);
1070*61046927SAndroid Build Coastguard Worker ASSERTED unsigned T_size = nir_alu_type_get_type_size(T);
1071*61046927SAndroid Build Coastguard Worker assert(T_size == 32 || (b->shader->arch >= 9 && T_size == 16));
1072*61046927SAndroid Build Coastguard Worker enum bi_register_format regfmt = BI_REGISTER_FORMAT_AUTO;
1073*61046927SAndroid Build Coastguard Worker
1074*61046927SAndroid Build Coastguard Worker unsigned imm_index = 0;
1075*61046927SAndroid Build Coastguard Worker bool immediate = bi_is_intr_immediate(instr, &imm_index, 16);
1076*61046927SAndroid Build Coastguard Worker
1077*61046927SAndroid Build Coastguard Worker /* Only look at the total components needed. In effect, we fill in all
1078*61046927SAndroid Build Coastguard Worker * the intermediate "holes" in the write mask, since we can't mask off
1079*61046927SAndroid Build Coastguard Worker * stores. Since nir_lower_io_to_temporaries ensures each varying is
1080*61046927SAndroid Build Coastguard Worker * written at most once, anything that's masked out is undefined, so it
1081*61046927SAndroid Build Coastguard Worker * doesn't matter what we write there. So we may as well do the
1082*61046927SAndroid Build Coastguard Worker * simplest thing possible. */
1083*61046927SAndroid Build Coastguard Worker unsigned nr = util_last_bit(nir_intrinsic_write_mask(instr));
1084*61046927SAndroid Build Coastguard Worker assert(nr > 0 && nr <= nir_intrinsic_src_components(instr, 0));
1085*61046927SAndroid Build Coastguard Worker
1086*61046927SAndroid Build Coastguard Worker bi_index data = bi_src_index(&instr->src[0]);
1087*61046927SAndroid Build Coastguard Worker
1088*61046927SAndroid Build Coastguard Worker /* To keep the vector dimensions consistent, we need to drop some
1089*61046927SAndroid Build Coastguard Worker * components. This should be coalesced.
1090*61046927SAndroid Build Coastguard Worker *
1091*61046927SAndroid Build Coastguard Worker * TODO: This is ugly and maybe inefficient. Would we rather
1092*61046927SAndroid Build Coastguard Worker * introduce a TRIM.i32 pseudoinstruction?
1093*61046927SAndroid Build Coastguard Worker */
1094*61046927SAndroid Build Coastguard Worker if (nr < nir_intrinsic_src_components(instr, 0)) {
1095*61046927SAndroid Build Coastguard Worker assert(T_size == 32 && "todo: 16-bit trim");
1096*61046927SAndroid Build Coastguard Worker
1097*61046927SAndroid Build Coastguard Worker bi_index chans[4] = {bi_null(), bi_null(), bi_null(), bi_null()};
1098*61046927SAndroid Build Coastguard Worker unsigned src_comps = nir_intrinsic_src_components(instr, 0);
1099*61046927SAndroid Build Coastguard Worker
1100*61046927SAndroid Build Coastguard Worker bi_emit_split_i32(b, chans, data, src_comps);
1101*61046927SAndroid Build Coastguard Worker
1102*61046927SAndroid Build Coastguard Worker bi_index tmp = bi_temp(b->shader);
1103*61046927SAndroid Build Coastguard Worker bi_instr *collect = bi_collect_i32_to(b, tmp, nr);
1104*61046927SAndroid Build Coastguard Worker
1105*61046927SAndroid Build Coastguard Worker bi_foreach_src(collect, w)
1106*61046927SAndroid Build Coastguard Worker collect->src[w] = chans[w];
1107*61046927SAndroid Build Coastguard Worker
1108*61046927SAndroid Build Coastguard Worker data = tmp;
1109*61046927SAndroid Build Coastguard Worker }
1110*61046927SAndroid Build Coastguard Worker
1111*61046927SAndroid Build Coastguard Worker bool psiz =
1112*61046927SAndroid Build Coastguard Worker (nir_intrinsic_io_semantics(instr).location == VARYING_SLOT_PSIZ);
1113*61046927SAndroid Build Coastguard Worker bool layer =
1114*61046927SAndroid Build Coastguard Worker (nir_intrinsic_io_semantics(instr).location == VARYING_SLOT_LAYER);
1115*61046927SAndroid Build Coastguard Worker
1116*61046927SAndroid Build Coastguard Worker bi_index a[4] = {bi_null()};
1117*61046927SAndroid Build Coastguard Worker
1118*61046927SAndroid Build Coastguard Worker if (b->shader->arch <= 8 && b->shader->idvs == BI_IDVS_POSITION) {
1119*61046927SAndroid Build Coastguard Worker /* Bifrost position shaders have a fast path */
1120*61046927SAndroid Build Coastguard Worker assert(T == nir_type_float16 || T == nir_type_float32);
1121*61046927SAndroid Build Coastguard Worker unsigned regfmt = (T == nir_type_float16) ? 0 : 1;
1122*61046927SAndroid Build Coastguard Worker unsigned identity = (b->shader->arch == 6) ? 0x688 : 0;
1123*61046927SAndroid Build Coastguard Worker unsigned snap4 = 0x5E;
1124*61046927SAndroid Build Coastguard Worker uint32_t format = identity | (snap4 << 12) | (regfmt << 24);
1125*61046927SAndroid Build Coastguard Worker
1126*61046927SAndroid Build Coastguard Worker bi_st_cvt(b, data, bi_preload(b, 58), bi_preload(b, 59),
1127*61046927SAndroid Build Coastguard Worker bi_imm_u32(format), regfmt, nr - 1);
1128*61046927SAndroid Build Coastguard Worker } else if (b->shader->arch >= 9 && b->shader->idvs != BI_IDVS_NONE) {
1129*61046927SAndroid Build Coastguard Worker bi_index index = bi_preload(b, 59);
1130*61046927SAndroid Build Coastguard Worker unsigned pos_attr_offset = 0;
1131*61046927SAndroid Build Coastguard Worker unsigned src_bit_sz = nir_src_bit_size(instr->src[0]);
1132*61046927SAndroid Build Coastguard Worker
1133*61046927SAndroid Build Coastguard Worker if (psiz || layer)
1134*61046927SAndroid Build Coastguard Worker index = bi_iadd_imm_i32(b, index, 4);
1135*61046927SAndroid Build Coastguard Worker
1136*61046927SAndroid Build Coastguard Worker if (layer) {
1137*61046927SAndroid Build Coastguard Worker assert(nr == 1 && src_bit_sz == 32);
1138*61046927SAndroid Build Coastguard Worker src_bit_sz = 8;
1139*61046927SAndroid Build Coastguard Worker pos_attr_offset = 2;
1140*61046927SAndroid Build Coastguard Worker data = bi_byte(data, 0);
1141*61046927SAndroid Build Coastguard Worker }
1142*61046927SAndroid Build Coastguard Worker
1143*61046927SAndroid Build Coastguard Worker if (psiz)
1144*61046927SAndroid Build Coastguard Worker assert(T_size == 16 && "should've been lowered");
1145*61046927SAndroid Build Coastguard Worker
1146*61046927SAndroid Build Coastguard Worker bi_index address = bi_lea_buf_imm(b, index);
1147*61046927SAndroid Build Coastguard Worker bi_emit_split_i32(b, a, address, 2);
1148*61046927SAndroid Build Coastguard Worker
1149*61046927SAndroid Build Coastguard Worker bool varying = (b->shader->idvs == BI_IDVS_VARYING);
1150*61046927SAndroid Build Coastguard Worker
1151*61046927SAndroid Build Coastguard Worker bi_store(b, nr * src_bit_sz, data, a[0], a[1],
1152*61046927SAndroid Build Coastguard Worker varying ? BI_SEG_VARY : BI_SEG_POS,
1153*61046927SAndroid Build Coastguard Worker varying ? bi_varying_offset(b->shader, instr) : pos_attr_offset);
1154*61046927SAndroid Build Coastguard Worker } else if (immediate) {
1155*61046927SAndroid Build Coastguard Worker bi_index address = bi_lea_attr_imm(b, bi_vertex_id(b), bi_instance_id(b),
1156*61046927SAndroid Build Coastguard Worker regfmt, imm_index);
1157*61046927SAndroid Build Coastguard Worker bi_emit_split_i32(b, a, address, 3);
1158*61046927SAndroid Build Coastguard Worker
1159*61046927SAndroid Build Coastguard Worker bi_st_cvt(b, data, a[0], a[1], a[2], regfmt, nr - 1);
1160*61046927SAndroid Build Coastguard Worker } else {
1161*61046927SAndroid Build Coastguard Worker bi_index idx = bi_iadd_u32(b, bi_src_index(nir_get_io_offset_src(instr)),
1162*61046927SAndroid Build Coastguard Worker bi_imm_u32(nir_intrinsic_base(instr)), false);
1163*61046927SAndroid Build Coastguard Worker bi_index address =
1164*61046927SAndroid Build Coastguard Worker bi_lea_attr(b, bi_vertex_id(b), bi_instance_id(b), idx, regfmt);
1165*61046927SAndroid Build Coastguard Worker bi_emit_split_i32(b, a, address, 3);
1166*61046927SAndroid Build Coastguard Worker
1167*61046927SAndroid Build Coastguard Worker bi_st_cvt(b, data, a[0], a[1], a[2], regfmt, nr - 1);
1168*61046927SAndroid Build Coastguard Worker }
1169*61046927SAndroid Build Coastguard Worker }
1170*61046927SAndroid Build Coastguard Worker
1171*61046927SAndroid Build Coastguard Worker static void
bi_emit_load_ubo(bi_builder * b,nir_intrinsic_instr * instr)1172*61046927SAndroid Build Coastguard Worker bi_emit_load_ubo(bi_builder *b, nir_intrinsic_instr *instr)
1173*61046927SAndroid Build Coastguard Worker {
1174*61046927SAndroid Build Coastguard Worker nir_src *offset = nir_get_io_offset_src(instr);
1175*61046927SAndroid Build Coastguard Worker
1176*61046927SAndroid Build Coastguard Worker bool offset_is_const = nir_src_is_const(*offset);
1177*61046927SAndroid Build Coastguard Worker bi_index dyn_offset = bi_src_index(offset);
1178*61046927SAndroid Build Coastguard Worker uint32_t const_offset = offset_is_const ? nir_src_as_uint(*offset) : 0;
1179*61046927SAndroid Build Coastguard Worker
1180*61046927SAndroid Build Coastguard Worker bi_load_ubo_to(b, instr->num_components * instr->def.bit_size,
1181*61046927SAndroid Build Coastguard Worker bi_def_index(&instr->def),
1182*61046927SAndroid Build Coastguard Worker offset_is_const ? bi_imm_u32(const_offset) : dyn_offset,
1183*61046927SAndroid Build Coastguard Worker bi_src_index(&instr->src[0]));
1184*61046927SAndroid Build Coastguard Worker }
1185*61046927SAndroid Build Coastguard Worker
1186*61046927SAndroid Build Coastguard Worker static void
bi_emit_load_push_constant(bi_builder * b,nir_intrinsic_instr * instr)1187*61046927SAndroid Build Coastguard Worker bi_emit_load_push_constant(bi_builder *b, nir_intrinsic_instr *instr)
1188*61046927SAndroid Build Coastguard Worker {
1189*61046927SAndroid Build Coastguard Worker assert(b->shader->inputs->no_ubo_to_push && "can't mix push constant forms");
1190*61046927SAndroid Build Coastguard Worker
1191*61046927SAndroid Build Coastguard Worker nir_src *offset = &instr->src[0];
1192*61046927SAndroid Build Coastguard Worker assert(nir_src_is_const(*offset) && "no indirect push constants");
1193*61046927SAndroid Build Coastguard Worker uint32_t base = nir_intrinsic_base(instr) + nir_src_as_uint(*offset);
1194*61046927SAndroid Build Coastguard Worker assert((base & 3) == 0 && "unaligned push constants");
1195*61046927SAndroid Build Coastguard Worker
1196*61046927SAndroid Build Coastguard Worker unsigned bits = instr->def.bit_size * instr->def.num_components;
1197*61046927SAndroid Build Coastguard Worker
1198*61046927SAndroid Build Coastguard Worker unsigned n = DIV_ROUND_UP(bits, 32);
1199*61046927SAndroid Build Coastguard Worker assert(n <= 4);
1200*61046927SAndroid Build Coastguard Worker bi_index channels[4] = {bi_null()};
1201*61046927SAndroid Build Coastguard Worker
1202*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < n; ++i) {
1203*61046927SAndroid Build Coastguard Worker unsigned word = (base >> 2) + i;
1204*61046927SAndroid Build Coastguard Worker
1205*61046927SAndroid Build Coastguard Worker channels[i] = bi_fau(BIR_FAU_UNIFORM | (word >> 1), word & 1);
1206*61046927SAndroid Build Coastguard Worker }
1207*61046927SAndroid Build Coastguard Worker
1208*61046927SAndroid Build Coastguard Worker bi_emit_collect_to(b, bi_def_index(&instr->def), channels, n);
1209*61046927SAndroid Build Coastguard Worker
1210*61046927SAndroid Build Coastguard Worker /* Update push->count to report the highest push constant word being accessed
1211*61046927SAndroid Build Coastguard Worker * by this shader.
1212*61046927SAndroid Build Coastguard Worker */
1213*61046927SAndroid Build Coastguard Worker b->shader->info.push->count =
1214*61046927SAndroid Build Coastguard Worker MAX2((base / 4) + n, b->shader->info.push->count);
1215*61046927SAndroid Build Coastguard Worker }
1216*61046927SAndroid Build Coastguard Worker
1217*61046927SAndroid Build Coastguard Worker static bi_index
bi_addr_high(bi_builder * b,nir_src * src)1218*61046927SAndroid Build Coastguard Worker bi_addr_high(bi_builder *b, nir_src *src)
1219*61046927SAndroid Build Coastguard Worker {
1220*61046927SAndroid Build Coastguard Worker return (nir_src_bit_size(*src) == 64) ? bi_extract(b, bi_src_index(src), 1)
1221*61046927SAndroid Build Coastguard Worker : bi_zero();
1222*61046927SAndroid Build Coastguard Worker }
1223*61046927SAndroid Build Coastguard Worker
1224*61046927SAndroid Build Coastguard Worker static void
bi_handle_segment(bi_builder * b,bi_index * addr_lo,bi_index * addr_hi,enum bi_seg seg,int16_t * offset)1225*61046927SAndroid Build Coastguard Worker bi_handle_segment(bi_builder *b, bi_index *addr_lo, bi_index *addr_hi,
1226*61046927SAndroid Build Coastguard Worker enum bi_seg seg, int16_t *offset)
1227*61046927SAndroid Build Coastguard Worker {
1228*61046927SAndroid Build Coastguard Worker /* Not needed on Bifrost or for global accesses */
1229*61046927SAndroid Build Coastguard Worker if (b->shader->arch < 9 || seg == BI_SEG_NONE)
1230*61046927SAndroid Build Coastguard Worker return;
1231*61046927SAndroid Build Coastguard Worker
1232*61046927SAndroid Build Coastguard Worker /* There is no segment modifier on Valhall. Instead, we need to
1233*61046927SAndroid Build Coastguard Worker * emit the arithmetic ourselves. We do have an offset
1234*61046927SAndroid Build Coastguard Worker * available, which saves an instruction for constant offsets.
1235*61046927SAndroid Build Coastguard Worker */
1236*61046927SAndroid Build Coastguard Worker bool wls = (seg == BI_SEG_WLS);
1237*61046927SAndroid Build Coastguard Worker assert(wls || (seg == BI_SEG_TL));
1238*61046927SAndroid Build Coastguard Worker
1239*61046927SAndroid Build Coastguard Worker enum bir_fau fau = wls ? BIR_FAU_WLS_PTR : BIR_FAU_TLS_PTR;
1240*61046927SAndroid Build Coastguard Worker
1241*61046927SAndroid Build Coastguard Worker bi_index base_lo = bi_fau(fau, false);
1242*61046927SAndroid Build Coastguard Worker
1243*61046927SAndroid Build Coastguard Worker if (offset && addr_lo->type == BI_INDEX_CONSTANT &&
1244*61046927SAndroid Build Coastguard Worker addr_lo->value == (int16_t)addr_lo->value) {
1245*61046927SAndroid Build Coastguard Worker *offset = addr_lo->value;
1246*61046927SAndroid Build Coastguard Worker *addr_lo = base_lo;
1247*61046927SAndroid Build Coastguard Worker } else {
1248*61046927SAndroid Build Coastguard Worker *addr_lo = bi_iadd_u32(b, base_lo, *addr_lo, false);
1249*61046927SAndroid Build Coastguard Worker }
1250*61046927SAndroid Build Coastguard Worker
1251*61046927SAndroid Build Coastguard Worker /* Do not allow overflow for WLS or TLS */
1252*61046927SAndroid Build Coastguard Worker *addr_hi = bi_fau(fau, true);
1253*61046927SAndroid Build Coastguard Worker }
1254*61046927SAndroid Build Coastguard Worker
1255*61046927SAndroid Build Coastguard Worker static void
bi_emit_load(bi_builder * b,nir_intrinsic_instr * instr,enum bi_seg seg)1256*61046927SAndroid Build Coastguard Worker bi_emit_load(bi_builder *b, nir_intrinsic_instr *instr, enum bi_seg seg)
1257*61046927SAndroid Build Coastguard Worker {
1258*61046927SAndroid Build Coastguard Worker int16_t offset = 0;
1259*61046927SAndroid Build Coastguard Worker unsigned bits = instr->num_components * instr->def.bit_size;
1260*61046927SAndroid Build Coastguard Worker bi_index dest = bi_def_index(&instr->def);
1261*61046927SAndroid Build Coastguard Worker bi_index addr_lo = bi_extract(b, bi_src_index(&instr->src[0]), 0);
1262*61046927SAndroid Build Coastguard Worker bi_index addr_hi = bi_addr_high(b, &instr->src[0]);
1263*61046927SAndroid Build Coastguard Worker
1264*61046927SAndroid Build Coastguard Worker bi_handle_segment(b, &addr_lo, &addr_hi, seg, &offset);
1265*61046927SAndroid Build Coastguard Worker
1266*61046927SAndroid Build Coastguard Worker bi_load_to(b, bits, dest, addr_lo, addr_hi, seg, offset);
1267*61046927SAndroid Build Coastguard Worker bi_emit_cached_split(b, dest, bits);
1268*61046927SAndroid Build Coastguard Worker }
1269*61046927SAndroid Build Coastguard Worker
1270*61046927SAndroid Build Coastguard Worker static void
bi_emit_store(bi_builder * b,nir_intrinsic_instr * instr,enum bi_seg seg)1271*61046927SAndroid Build Coastguard Worker bi_emit_store(bi_builder *b, nir_intrinsic_instr *instr, enum bi_seg seg)
1272*61046927SAndroid Build Coastguard Worker {
1273*61046927SAndroid Build Coastguard Worker /* Require contiguous masks, gauranteed by nir_lower_wrmasks */
1274*61046927SAndroid Build Coastguard Worker assert(nir_intrinsic_write_mask(instr) ==
1275*61046927SAndroid Build Coastguard Worker BITFIELD_MASK(instr->num_components));
1276*61046927SAndroid Build Coastguard Worker
1277*61046927SAndroid Build Coastguard Worker int16_t offset = 0;
1278*61046927SAndroid Build Coastguard Worker bi_index addr_lo = bi_extract(b, bi_src_index(&instr->src[1]), 0);
1279*61046927SAndroid Build Coastguard Worker bi_index addr_hi = bi_addr_high(b, &instr->src[1]);
1280*61046927SAndroid Build Coastguard Worker
1281*61046927SAndroid Build Coastguard Worker bi_handle_segment(b, &addr_lo, &addr_hi, seg, &offset);
1282*61046927SAndroid Build Coastguard Worker
1283*61046927SAndroid Build Coastguard Worker bi_store(b, instr->num_components * nir_src_bit_size(instr->src[0]),
1284*61046927SAndroid Build Coastguard Worker bi_src_index(&instr->src[0]), addr_lo, addr_hi, seg, offset);
1285*61046927SAndroid Build Coastguard Worker }
1286*61046927SAndroid Build Coastguard Worker
1287*61046927SAndroid Build Coastguard Worker /* Exchanges the staging register with memory */
1288*61046927SAndroid Build Coastguard Worker
1289*61046927SAndroid Build Coastguard Worker static void
bi_emit_axchg_to(bi_builder * b,bi_index dst,bi_index addr,nir_src * arg,enum bi_seg seg)1290*61046927SAndroid Build Coastguard Worker bi_emit_axchg_to(bi_builder *b, bi_index dst, bi_index addr, nir_src *arg,
1291*61046927SAndroid Build Coastguard Worker enum bi_seg seg)
1292*61046927SAndroid Build Coastguard Worker {
1293*61046927SAndroid Build Coastguard Worker assert(seg == BI_SEG_NONE || seg == BI_SEG_WLS);
1294*61046927SAndroid Build Coastguard Worker
1295*61046927SAndroid Build Coastguard Worker unsigned sz = nir_src_bit_size(*arg);
1296*61046927SAndroid Build Coastguard Worker assert(sz == 32 || sz == 64);
1297*61046927SAndroid Build Coastguard Worker
1298*61046927SAndroid Build Coastguard Worker bi_index data = bi_src_index(arg);
1299*61046927SAndroid Build Coastguard Worker
1300*61046927SAndroid Build Coastguard Worker bi_index addr_hi = (seg == BI_SEG_WLS) ? bi_zero() : bi_extract(b, addr, 1);
1301*61046927SAndroid Build Coastguard Worker
1302*61046927SAndroid Build Coastguard Worker if (b->shader->arch >= 9)
1303*61046927SAndroid Build Coastguard Worker bi_handle_segment(b, &addr, &addr_hi, seg, NULL);
1304*61046927SAndroid Build Coastguard Worker else if (seg == BI_SEG_WLS)
1305*61046927SAndroid Build Coastguard Worker addr_hi = bi_zero();
1306*61046927SAndroid Build Coastguard Worker
1307*61046927SAndroid Build Coastguard Worker bi_axchg_to(b, sz, dst, data, bi_extract(b, addr, 0), addr_hi, seg);
1308*61046927SAndroid Build Coastguard Worker }
1309*61046927SAndroid Build Coastguard Worker
1310*61046927SAndroid Build Coastguard Worker /* Exchanges the second staging register with memory if comparison with first
1311*61046927SAndroid Build Coastguard Worker * staging register passes */
1312*61046927SAndroid Build Coastguard Worker
1313*61046927SAndroid Build Coastguard Worker static void
bi_emit_acmpxchg_to(bi_builder * b,bi_index dst,bi_index addr,nir_src * arg_1,nir_src * arg_2,enum bi_seg seg)1314*61046927SAndroid Build Coastguard Worker bi_emit_acmpxchg_to(bi_builder *b, bi_index dst, bi_index addr, nir_src *arg_1,
1315*61046927SAndroid Build Coastguard Worker nir_src *arg_2, enum bi_seg seg)
1316*61046927SAndroid Build Coastguard Worker {
1317*61046927SAndroid Build Coastguard Worker assert(seg == BI_SEG_NONE || seg == BI_SEG_WLS);
1318*61046927SAndroid Build Coastguard Worker
1319*61046927SAndroid Build Coastguard Worker /* hardware is swapped from NIR */
1320*61046927SAndroid Build Coastguard Worker bi_index src0 = bi_src_index(arg_2);
1321*61046927SAndroid Build Coastguard Worker bi_index src1 = bi_src_index(arg_1);
1322*61046927SAndroid Build Coastguard Worker
1323*61046927SAndroid Build Coastguard Worker unsigned sz = nir_src_bit_size(*arg_1);
1324*61046927SAndroid Build Coastguard Worker assert(sz == 32 || sz == 64);
1325*61046927SAndroid Build Coastguard Worker
1326*61046927SAndroid Build Coastguard Worker bi_index data_words[] = {
1327*61046927SAndroid Build Coastguard Worker bi_extract(b, src0, 0),
1328*61046927SAndroid Build Coastguard Worker sz == 32 ? bi_extract(b, src1, 0) : bi_extract(b, src0, 1),
1329*61046927SAndroid Build Coastguard Worker
1330*61046927SAndroid Build Coastguard Worker /* 64-bit */
1331*61046927SAndroid Build Coastguard Worker bi_extract(b, src1, 0),
1332*61046927SAndroid Build Coastguard Worker sz == 32 ? bi_extract(b, src1, 0) : bi_extract(b, src1, 1),
1333*61046927SAndroid Build Coastguard Worker };
1334*61046927SAndroid Build Coastguard Worker
1335*61046927SAndroid Build Coastguard Worker bi_index in = bi_temp(b->shader);
1336*61046927SAndroid Build Coastguard Worker bi_emit_collect_to(b, in, data_words, 2 * (sz / 32));
1337*61046927SAndroid Build Coastguard Worker bi_index addr_hi = (seg == BI_SEG_WLS) ? bi_zero() : bi_extract(b, addr, 1);
1338*61046927SAndroid Build Coastguard Worker
1339*61046927SAndroid Build Coastguard Worker if (b->shader->arch >= 9)
1340*61046927SAndroid Build Coastguard Worker bi_handle_segment(b, &addr, &addr_hi, seg, NULL);
1341*61046927SAndroid Build Coastguard Worker else if (seg == BI_SEG_WLS)
1342*61046927SAndroid Build Coastguard Worker addr_hi = bi_zero();
1343*61046927SAndroid Build Coastguard Worker
1344*61046927SAndroid Build Coastguard Worker bi_index out = bi_acmpxchg(b, sz, in, bi_extract(b, addr, 0), addr_hi, seg);
1345*61046927SAndroid Build Coastguard Worker bi_emit_cached_split(b, out, sz);
1346*61046927SAndroid Build Coastguard Worker
1347*61046927SAndroid Build Coastguard Worker bi_index inout_words[] = {bi_extract(b, out, 0),
1348*61046927SAndroid Build Coastguard Worker sz == 64 ? bi_extract(b, out, 1) : bi_null()};
1349*61046927SAndroid Build Coastguard Worker
1350*61046927SAndroid Build Coastguard Worker bi_make_vec_to(b, dst, inout_words, NULL, sz / 32, 32);
1351*61046927SAndroid Build Coastguard Worker }
1352*61046927SAndroid Build Coastguard Worker
1353*61046927SAndroid Build Coastguard Worker static enum bi_atom_opc
bi_atom_opc_for_nir(nir_atomic_op op)1354*61046927SAndroid Build Coastguard Worker bi_atom_opc_for_nir(nir_atomic_op op)
1355*61046927SAndroid Build Coastguard Worker {
1356*61046927SAndroid Build Coastguard Worker /* clang-format off */
1357*61046927SAndroid Build Coastguard Worker switch (op) {
1358*61046927SAndroid Build Coastguard Worker case nir_atomic_op_iadd: return BI_ATOM_OPC_AADD;
1359*61046927SAndroid Build Coastguard Worker case nir_atomic_op_imin: return BI_ATOM_OPC_ASMIN;
1360*61046927SAndroid Build Coastguard Worker case nir_atomic_op_umin: return BI_ATOM_OPC_AUMIN;
1361*61046927SAndroid Build Coastguard Worker case nir_atomic_op_imax: return BI_ATOM_OPC_ASMAX;
1362*61046927SAndroid Build Coastguard Worker case nir_atomic_op_umax: return BI_ATOM_OPC_AUMAX;
1363*61046927SAndroid Build Coastguard Worker case nir_atomic_op_iand: return BI_ATOM_OPC_AAND;
1364*61046927SAndroid Build Coastguard Worker case nir_atomic_op_ior: return BI_ATOM_OPC_AOR;
1365*61046927SAndroid Build Coastguard Worker case nir_atomic_op_ixor: return BI_ATOM_OPC_AXOR;
1366*61046927SAndroid Build Coastguard Worker default: unreachable("Unexpected computational atomic");
1367*61046927SAndroid Build Coastguard Worker }
1368*61046927SAndroid Build Coastguard Worker /* clang-format on */
1369*61046927SAndroid Build Coastguard Worker }
1370*61046927SAndroid Build Coastguard Worker
1371*61046927SAndroid Build Coastguard Worker /* Optimized unary atomics are available with an implied #1 argument */
1372*61046927SAndroid Build Coastguard Worker
1373*61046927SAndroid Build Coastguard Worker static bool
bi_promote_atom_c1(enum bi_atom_opc op,bi_index arg,enum bi_atom_opc * out)1374*61046927SAndroid Build Coastguard Worker bi_promote_atom_c1(enum bi_atom_opc op, bi_index arg, enum bi_atom_opc *out)
1375*61046927SAndroid Build Coastguard Worker {
1376*61046927SAndroid Build Coastguard Worker /* Check we have a compatible constant */
1377*61046927SAndroid Build Coastguard Worker if (arg.type != BI_INDEX_CONSTANT)
1378*61046927SAndroid Build Coastguard Worker return false;
1379*61046927SAndroid Build Coastguard Worker
1380*61046927SAndroid Build Coastguard Worker if (!(arg.value == 1 || (arg.value == -1 && op == BI_ATOM_OPC_AADD)))
1381*61046927SAndroid Build Coastguard Worker return false;
1382*61046927SAndroid Build Coastguard Worker
1383*61046927SAndroid Build Coastguard Worker /* Check for a compatible operation */
1384*61046927SAndroid Build Coastguard Worker switch (op) {
1385*61046927SAndroid Build Coastguard Worker case BI_ATOM_OPC_AADD:
1386*61046927SAndroid Build Coastguard Worker *out = (arg.value == 1) ? BI_ATOM_OPC_AINC : BI_ATOM_OPC_ADEC;
1387*61046927SAndroid Build Coastguard Worker return true;
1388*61046927SAndroid Build Coastguard Worker case BI_ATOM_OPC_ASMAX:
1389*61046927SAndroid Build Coastguard Worker *out = BI_ATOM_OPC_ASMAX1;
1390*61046927SAndroid Build Coastguard Worker return true;
1391*61046927SAndroid Build Coastguard Worker case BI_ATOM_OPC_AUMAX:
1392*61046927SAndroid Build Coastguard Worker *out = BI_ATOM_OPC_AUMAX1;
1393*61046927SAndroid Build Coastguard Worker return true;
1394*61046927SAndroid Build Coastguard Worker case BI_ATOM_OPC_AOR:
1395*61046927SAndroid Build Coastguard Worker *out = BI_ATOM_OPC_AOR1;
1396*61046927SAndroid Build Coastguard Worker return true;
1397*61046927SAndroid Build Coastguard Worker default:
1398*61046927SAndroid Build Coastguard Worker return false;
1399*61046927SAndroid Build Coastguard Worker }
1400*61046927SAndroid Build Coastguard Worker }
1401*61046927SAndroid Build Coastguard Worker
1402*61046927SAndroid Build Coastguard Worker /*
1403*61046927SAndroid Build Coastguard Worker * Coordinates are 16-bit integers in Bifrost but 32-bit in NIR. We need to
1404*61046927SAndroid Build Coastguard Worker * translate between these forms (with MKVEC.v2i16).
1405*61046927SAndroid Build Coastguard Worker *
1406*61046927SAndroid Build Coastguard Worker * Aditionally on Valhall, cube maps in the attribute pipe are treated as 2D
1407*61046927SAndroid Build Coastguard Worker * arrays. For uniform handling, we also treat 3D textures like 2D arrays.
1408*61046927SAndroid Build Coastguard Worker *
1409*61046927SAndroid Build Coastguard Worker * Our indexing needs to reflects this. Since Valhall and Bifrost are quite
1410*61046927SAndroid Build Coastguard Worker * different, we provide separate functions for these.
1411*61046927SAndroid Build Coastguard Worker */
1412*61046927SAndroid Build Coastguard Worker static bi_index
bi_emit_image_coord(bi_builder * b,bi_index coord,unsigned src_idx,unsigned coord_comps,bool is_array,bool is_msaa)1413*61046927SAndroid Build Coastguard Worker bi_emit_image_coord(bi_builder *b, bi_index coord, unsigned src_idx,
1414*61046927SAndroid Build Coastguard Worker unsigned coord_comps, bool is_array, bool is_msaa)
1415*61046927SAndroid Build Coastguard Worker {
1416*61046927SAndroid Build Coastguard Worker assert(coord_comps > 0 && coord_comps <= 3);
1417*61046927SAndroid Build Coastguard Worker
1418*61046927SAndroid Build Coastguard Worker /* MSAA load store should have been lowered */
1419*61046927SAndroid Build Coastguard Worker assert(!is_msaa);
1420*61046927SAndroid Build Coastguard Worker if (src_idx == 0) {
1421*61046927SAndroid Build Coastguard Worker if (coord_comps == 1 || (coord_comps == 2 && is_array))
1422*61046927SAndroid Build Coastguard Worker return bi_extract(b, coord, 0);
1423*61046927SAndroid Build Coastguard Worker else
1424*61046927SAndroid Build Coastguard Worker return bi_mkvec_v2i16(b, bi_half(bi_extract(b, coord, 0), false),
1425*61046927SAndroid Build Coastguard Worker bi_half(bi_extract(b, coord, 1), false));
1426*61046927SAndroid Build Coastguard Worker } else {
1427*61046927SAndroid Build Coastguard Worker if (coord_comps == 3)
1428*61046927SAndroid Build Coastguard Worker return bi_extract(b, coord, 2);
1429*61046927SAndroid Build Coastguard Worker else if (coord_comps == 2 && is_array)
1430*61046927SAndroid Build Coastguard Worker return bi_extract(b, coord, 1);
1431*61046927SAndroid Build Coastguard Worker else
1432*61046927SAndroid Build Coastguard Worker return bi_zero();
1433*61046927SAndroid Build Coastguard Worker }
1434*61046927SAndroid Build Coastguard Worker }
1435*61046927SAndroid Build Coastguard Worker
1436*61046927SAndroid Build Coastguard Worker static bi_index
va_emit_image_coord(bi_builder * b,bi_index coord,bi_index sample_index,unsigned src_idx,unsigned coord_comps,bool is_array,bool is_msaa)1437*61046927SAndroid Build Coastguard Worker va_emit_image_coord(bi_builder *b, bi_index coord, bi_index sample_index,
1438*61046927SAndroid Build Coastguard Worker unsigned src_idx, unsigned coord_comps, bool is_array,
1439*61046927SAndroid Build Coastguard Worker bool is_msaa)
1440*61046927SAndroid Build Coastguard Worker {
1441*61046927SAndroid Build Coastguard Worker assert(coord_comps > 0 && coord_comps <= 3);
1442*61046927SAndroid Build Coastguard Worker if (src_idx == 0) {
1443*61046927SAndroid Build Coastguard Worker if (coord_comps == 1 || (coord_comps == 2 && is_array))
1444*61046927SAndroid Build Coastguard Worker return bi_extract(b, coord, 0);
1445*61046927SAndroid Build Coastguard Worker else
1446*61046927SAndroid Build Coastguard Worker return bi_mkvec_v2i16(b, bi_half(bi_extract(b, coord, 0), false),
1447*61046927SAndroid Build Coastguard Worker bi_half(bi_extract(b, coord, 1), false));
1448*61046927SAndroid Build Coastguard Worker } else if (is_msaa) {
1449*61046927SAndroid Build Coastguard Worker bi_index array_idx = bi_extract(b, sample_index, 0);
1450*61046927SAndroid Build Coastguard Worker if (coord_comps == 3)
1451*61046927SAndroid Build Coastguard Worker return bi_mkvec_v2i16(b, bi_half(array_idx, false),
1452*61046927SAndroid Build Coastguard Worker bi_half(bi_extract(b, coord, 2), false));
1453*61046927SAndroid Build Coastguard Worker else if (coord_comps == 2)
1454*61046927SAndroid Build Coastguard Worker return array_idx;
1455*61046927SAndroid Build Coastguard Worker } else if (coord_comps == 3)
1456*61046927SAndroid Build Coastguard Worker return bi_mkvec_v2i16(b, bi_imm_u16(0),
1457*61046927SAndroid Build Coastguard Worker bi_half(bi_extract(b, coord, 2), false));
1458*61046927SAndroid Build Coastguard Worker else if (coord_comps == 2 && is_array)
1459*61046927SAndroid Build Coastguard Worker return bi_mkvec_v2i16(b, bi_imm_u16(0),
1460*61046927SAndroid Build Coastguard Worker bi_half(bi_extract(b, coord, 1), false));
1461*61046927SAndroid Build Coastguard Worker return bi_zero();
1462*61046927SAndroid Build Coastguard Worker }
1463*61046927SAndroid Build Coastguard Worker
1464*61046927SAndroid Build Coastguard Worker static void
bi_emit_image_load(bi_builder * b,nir_intrinsic_instr * instr)1465*61046927SAndroid Build Coastguard Worker bi_emit_image_load(bi_builder *b, nir_intrinsic_instr *instr)
1466*61046927SAndroid Build Coastguard Worker {
1467*61046927SAndroid Build Coastguard Worker enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
1468*61046927SAndroid Build Coastguard Worker unsigned coord_comps = nir_image_intrinsic_coord_components(instr);
1469*61046927SAndroid Build Coastguard Worker bool array = nir_intrinsic_image_array(instr);
1470*61046927SAndroid Build Coastguard Worker
1471*61046927SAndroid Build Coastguard Worker bi_index coords = bi_src_index(&instr->src[1]);
1472*61046927SAndroid Build Coastguard Worker bi_index indexvar = bi_src_index(&instr->src[2]);
1473*61046927SAndroid Build Coastguard Worker bi_index xy, zw;
1474*61046927SAndroid Build Coastguard Worker bool is_ms = (dim == GLSL_SAMPLER_DIM_MS);
1475*61046927SAndroid Build Coastguard Worker if (b->shader->arch < 9) {
1476*61046927SAndroid Build Coastguard Worker xy = bi_emit_image_coord(b, coords, 0, coord_comps, array, is_ms);
1477*61046927SAndroid Build Coastguard Worker zw = bi_emit_image_coord(b, coords, 1, coord_comps, array, is_ms);
1478*61046927SAndroid Build Coastguard Worker } else {
1479*61046927SAndroid Build Coastguard Worker xy =
1480*61046927SAndroid Build Coastguard Worker va_emit_image_coord(b, coords, indexvar, 0, coord_comps, array, is_ms);
1481*61046927SAndroid Build Coastguard Worker zw =
1482*61046927SAndroid Build Coastguard Worker va_emit_image_coord(b, coords, indexvar, 1, coord_comps, array, is_ms);
1483*61046927SAndroid Build Coastguard Worker }
1484*61046927SAndroid Build Coastguard Worker bi_index dest = bi_def_index(&instr->def);
1485*61046927SAndroid Build Coastguard Worker enum bi_register_format regfmt =
1486*61046927SAndroid Build Coastguard Worker bi_reg_fmt_for_nir(nir_intrinsic_dest_type(instr));
1487*61046927SAndroid Build Coastguard Worker enum bi_vecsize vecsize = instr->num_components - 1;
1488*61046927SAndroid Build Coastguard Worker
1489*61046927SAndroid Build Coastguard Worker if (b->shader->arch >= 9 && nir_src_is_const(instr->src[0])) {
1490*61046927SAndroid Build Coastguard Worker const unsigned raw_value = nir_src_as_uint(instr->src[0]);
1491*61046927SAndroid Build Coastguard Worker const unsigned table_index = pan_res_handle_get_table(raw_value);
1492*61046927SAndroid Build Coastguard Worker const unsigned texture_index = pan_res_handle_get_index(raw_value);
1493*61046927SAndroid Build Coastguard Worker
1494*61046927SAndroid Build Coastguard Worker if (texture_index < 16 && va_is_valid_const_table(table_index)) {
1495*61046927SAndroid Build Coastguard Worker bi_instr *I =
1496*61046927SAndroid Build Coastguard Worker bi_ld_tex_imm_to(b, dest, xy, zw, regfmt, vecsize, texture_index);
1497*61046927SAndroid Build Coastguard Worker I->table = va_res_fold_table_idx(table_index);
1498*61046927SAndroid Build Coastguard Worker } else {
1499*61046927SAndroid Build Coastguard Worker bi_ld_tex_to(b, dest, xy, zw, bi_src_index(&instr->src[0]), regfmt,
1500*61046927SAndroid Build Coastguard Worker vecsize);
1501*61046927SAndroid Build Coastguard Worker }
1502*61046927SAndroid Build Coastguard Worker } else if (b->shader->arch >= 9) {
1503*61046927SAndroid Build Coastguard Worker bi_ld_tex_to(b, dest, xy, zw, bi_src_index(&instr->src[0]), regfmt,
1504*61046927SAndroid Build Coastguard Worker vecsize);
1505*61046927SAndroid Build Coastguard Worker } else {
1506*61046927SAndroid Build Coastguard Worker bi_ld_attr_tex_to(b, dest, xy, zw, bi_src_index(&instr->src[0]), regfmt,
1507*61046927SAndroid Build Coastguard Worker vecsize);
1508*61046927SAndroid Build Coastguard Worker }
1509*61046927SAndroid Build Coastguard Worker
1510*61046927SAndroid Build Coastguard Worker bi_split_def(b, &instr->def);
1511*61046927SAndroid Build Coastguard Worker }
1512*61046927SAndroid Build Coastguard Worker
1513*61046927SAndroid Build Coastguard Worker static void
bi_emit_lea_image_to(bi_builder * b,bi_index dest,nir_intrinsic_instr * instr)1514*61046927SAndroid Build Coastguard Worker bi_emit_lea_image_to(bi_builder *b, bi_index dest, nir_intrinsic_instr *instr)
1515*61046927SAndroid Build Coastguard Worker {
1516*61046927SAndroid Build Coastguard Worker enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
1517*61046927SAndroid Build Coastguard Worker bool array = nir_intrinsic_image_array(instr);
1518*61046927SAndroid Build Coastguard Worker unsigned coord_comps = nir_image_intrinsic_coord_components(instr);
1519*61046927SAndroid Build Coastguard Worker
1520*61046927SAndroid Build Coastguard Worker enum bi_register_format type =
1521*61046927SAndroid Build Coastguard Worker (instr->intrinsic == nir_intrinsic_image_store)
1522*61046927SAndroid Build Coastguard Worker ? bi_reg_fmt_for_nir(nir_intrinsic_src_type(instr))
1523*61046927SAndroid Build Coastguard Worker : BI_REGISTER_FORMAT_AUTO;
1524*61046927SAndroid Build Coastguard Worker
1525*61046927SAndroid Build Coastguard Worker bi_index coords = bi_src_index(&instr->src[1]);
1526*61046927SAndroid Build Coastguard Worker bi_index indices = bi_src_index(&instr->src[2]);
1527*61046927SAndroid Build Coastguard Worker bi_index xy, zw;
1528*61046927SAndroid Build Coastguard Worker bool is_ms = dim == GLSL_SAMPLER_DIM_MS;
1529*61046927SAndroid Build Coastguard Worker if (b->shader->arch < 9) {
1530*61046927SAndroid Build Coastguard Worker xy = bi_emit_image_coord(b, coords, 0, coord_comps, array, is_ms);
1531*61046927SAndroid Build Coastguard Worker zw = bi_emit_image_coord(b, coords, 1, coord_comps, array, is_ms);
1532*61046927SAndroid Build Coastguard Worker } else {
1533*61046927SAndroid Build Coastguard Worker xy =
1534*61046927SAndroid Build Coastguard Worker va_emit_image_coord(b, coords, indices, 0, coord_comps, array, is_ms);
1535*61046927SAndroid Build Coastguard Worker zw =
1536*61046927SAndroid Build Coastguard Worker va_emit_image_coord(b, coords, indices, 1, coord_comps, array, is_ms);
1537*61046927SAndroid Build Coastguard Worker }
1538*61046927SAndroid Build Coastguard Worker
1539*61046927SAndroid Build Coastguard Worker if (b->shader->arch >= 9 && nir_src_is_const(instr->src[0])) {
1540*61046927SAndroid Build Coastguard Worker const unsigned raw_value = nir_src_as_uint(instr->src[0]);
1541*61046927SAndroid Build Coastguard Worker unsigned table_index = pan_res_handle_get_table(raw_value);
1542*61046927SAndroid Build Coastguard Worker unsigned texture_index = pan_res_handle_get_index(raw_value);
1543*61046927SAndroid Build Coastguard Worker
1544*61046927SAndroid Build Coastguard Worker if (texture_index < 16 && va_is_valid_const_table(table_index)) {
1545*61046927SAndroid Build Coastguard Worker bi_instr *I = bi_lea_tex_imm_to(b, dest, xy, zw, false, texture_index);
1546*61046927SAndroid Build Coastguard Worker I->table = va_res_fold_table_idx(table_index);
1547*61046927SAndroid Build Coastguard Worker } else {
1548*61046927SAndroid Build Coastguard Worker bi_lea_tex_to(b, dest, xy, zw, bi_src_index(&instr->src[0]), false);
1549*61046927SAndroid Build Coastguard Worker }
1550*61046927SAndroid Build Coastguard Worker } else if (b->shader->arch >= 9) {
1551*61046927SAndroid Build Coastguard Worker bi_lea_tex_to(b, dest, xy, zw, bi_src_index(&instr->src[0]), false);
1552*61046927SAndroid Build Coastguard Worker } else {
1553*61046927SAndroid Build Coastguard Worker bi_instr *I = bi_lea_attr_tex_to(b, dest, xy, zw,
1554*61046927SAndroid Build Coastguard Worker bi_src_index(&instr->src[0]), type);
1555*61046927SAndroid Build Coastguard Worker
1556*61046927SAndroid Build Coastguard Worker /* LEA_ATTR_TEX defaults to the secondary attribute table, but
1557*61046927SAndroid Build Coastguard Worker * our ABI has all images in the primary attribute table
1558*61046927SAndroid Build Coastguard Worker */
1559*61046927SAndroid Build Coastguard Worker I->table = BI_TABLE_ATTRIBUTE_1;
1560*61046927SAndroid Build Coastguard Worker }
1561*61046927SAndroid Build Coastguard Worker
1562*61046927SAndroid Build Coastguard Worker bi_emit_cached_split(b, dest, 3 * 32);
1563*61046927SAndroid Build Coastguard Worker }
1564*61046927SAndroid Build Coastguard Worker
1565*61046927SAndroid Build Coastguard Worker static bi_index
bi_emit_lea_image(bi_builder * b,nir_intrinsic_instr * instr)1566*61046927SAndroid Build Coastguard Worker bi_emit_lea_image(bi_builder *b, nir_intrinsic_instr *instr)
1567*61046927SAndroid Build Coastguard Worker {
1568*61046927SAndroid Build Coastguard Worker bi_index dest = bi_temp(b->shader);
1569*61046927SAndroid Build Coastguard Worker bi_emit_lea_image_to(b, dest, instr);
1570*61046927SAndroid Build Coastguard Worker return dest;
1571*61046927SAndroid Build Coastguard Worker }
1572*61046927SAndroid Build Coastguard Worker
1573*61046927SAndroid Build Coastguard Worker static void
bi_emit_image_store(bi_builder * b,nir_intrinsic_instr * instr)1574*61046927SAndroid Build Coastguard Worker bi_emit_image_store(bi_builder *b, nir_intrinsic_instr *instr)
1575*61046927SAndroid Build Coastguard Worker {
1576*61046927SAndroid Build Coastguard Worker bi_index a[4] = {bi_null()};
1577*61046927SAndroid Build Coastguard Worker bi_emit_split_i32(b, a, bi_emit_lea_image(b, instr), 3);
1578*61046927SAndroid Build Coastguard Worker
1579*61046927SAndroid Build Coastguard Worker /* Due to SPIR-V limitations, the source type is not fully reliable: it
1580*61046927SAndroid Build Coastguard Worker * reports uint32 even for write_imagei. This causes an incorrect
1581*61046927SAndroid Build Coastguard Worker * u32->s32->u32 roundtrip which incurs an unwanted clamping. Use auto32
1582*61046927SAndroid Build Coastguard Worker * instead, which will match per the OpenCL spec. Of course this does
1583*61046927SAndroid Build Coastguard Worker * not work for 16-bit stores, but those are not available in OpenCL.
1584*61046927SAndroid Build Coastguard Worker */
1585*61046927SAndroid Build Coastguard Worker nir_alu_type T = nir_intrinsic_src_type(instr);
1586*61046927SAndroid Build Coastguard Worker assert(nir_alu_type_get_type_size(T) == 32);
1587*61046927SAndroid Build Coastguard Worker
1588*61046927SAndroid Build Coastguard Worker bi_st_cvt(b, bi_src_index(&instr->src[3]), a[0], a[1], a[2],
1589*61046927SAndroid Build Coastguard Worker BI_REGISTER_FORMAT_AUTO, instr->num_components - 1);
1590*61046927SAndroid Build Coastguard Worker }
1591*61046927SAndroid Build Coastguard Worker
1592*61046927SAndroid Build Coastguard Worker static void
bi_emit_atomic_i32_to(bi_builder * b,bi_index dst,bi_index addr,bi_index arg,nir_atomic_op op)1593*61046927SAndroid Build Coastguard Worker bi_emit_atomic_i32_to(bi_builder *b, bi_index dst, bi_index addr, bi_index arg,
1594*61046927SAndroid Build Coastguard Worker nir_atomic_op op)
1595*61046927SAndroid Build Coastguard Worker {
1596*61046927SAndroid Build Coastguard Worker enum bi_atom_opc opc = bi_atom_opc_for_nir(op);
1597*61046927SAndroid Build Coastguard Worker enum bi_atom_opc post_opc = opc;
1598*61046927SAndroid Build Coastguard Worker bool bifrost = b->shader->arch <= 8;
1599*61046927SAndroid Build Coastguard Worker
1600*61046927SAndroid Build Coastguard Worker /* ATOM_C.i32 takes a vector with {arg, coalesced}, ATOM_C1.i32 doesn't
1601*61046927SAndroid Build Coastguard Worker * take any vector but can still output in RETURN mode */
1602*61046927SAndroid Build Coastguard Worker bi_index tmp_dest = bifrost ? bi_temp(b->shader) : dst;
1603*61046927SAndroid Build Coastguard Worker unsigned sr_count = bifrost ? 2 : 1;
1604*61046927SAndroid Build Coastguard Worker
1605*61046927SAndroid Build Coastguard Worker /* Generate either ATOM or ATOM1 as required */
1606*61046927SAndroid Build Coastguard Worker if (bi_promote_atom_c1(opc, arg, &opc)) {
1607*61046927SAndroid Build Coastguard Worker bi_atom1_return_i32_to(b, tmp_dest, bi_extract(b, addr, 0),
1608*61046927SAndroid Build Coastguard Worker bi_extract(b, addr, 1), opc, sr_count);
1609*61046927SAndroid Build Coastguard Worker } else {
1610*61046927SAndroid Build Coastguard Worker bi_atom_return_i32_to(b, tmp_dest, arg, bi_extract(b, addr, 0),
1611*61046927SAndroid Build Coastguard Worker bi_extract(b, addr, 1), opc, sr_count);
1612*61046927SAndroid Build Coastguard Worker }
1613*61046927SAndroid Build Coastguard Worker
1614*61046927SAndroid Build Coastguard Worker if (bifrost) {
1615*61046927SAndroid Build Coastguard Worker /* Post-process it */
1616*61046927SAndroid Build Coastguard Worker bi_emit_cached_split_i32(b, tmp_dest, 2);
1617*61046927SAndroid Build Coastguard Worker bi_atom_post_i32_to(b, dst, bi_extract(b, tmp_dest, 0),
1618*61046927SAndroid Build Coastguard Worker bi_extract(b, tmp_dest, 1), post_opc);
1619*61046927SAndroid Build Coastguard Worker }
1620*61046927SAndroid Build Coastguard Worker }
1621*61046927SAndroid Build Coastguard Worker
1622*61046927SAndroid Build Coastguard Worker static void
bi_emit_load_frag_coord_zw(bi_builder * b,bi_index dst,unsigned channel)1623*61046927SAndroid Build Coastguard Worker bi_emit_load_frag_coord_zw(bi_builder *b, bi_index dst, unsigned channel)
1624*61046927SAndroid Build Coastguard Worker {
1625*61046927SAndroid Build Coastguard Worker bi_ld_var_special_to(
1626*61046927SAndroid Build Coastguard Worker b, dst, bi_zero(), BI_REGISTER_FORMAT_F32, BI_SAMPLE_CENTER,
1627*61046927SAndroid Build Coastguard Worker BI_UPDATE_CLOBBER,
1628*61046927SAndroid Build Coastguard Worker (channel == 2) ? BI_VARYING_NAME_FRAG_Z : BI_VARYING_NAME_FRAG_W,
1629*61046927SAndroid Build Coastguard Worker BI_VECSIZE_NONE);
1630*61046927SAndroid Build Coastguard Worker }
1631*61046927SAndroid Build Coastguard Worker
1632*61046927SAndroid Build Coastguard Worker static void
bi_emit_ld_tile(bi_builder * b,nir_intrinsic_instr * instr)1633*61046927SAndroid Build Coastguard Worker bi_emit_ld_tile(bi_builder *b, nir_intrinsic_instr *instr)
1634*61046927SAndroid Build Coastguard Worker {
1635*61046927SAndroid Build Coastguard Worker bi_index dest = bi_def_index(&instr->def);
1636*61046927SAndroid Build Coastguard Worker nir_alu_type T = nir_intrinsic_dest_type(instr);
1637*61046927SAndroid Build Coastguard Worker enum bi_register_format regfmt = bi_reg_fmt_for_nir(T);
1638*61046927SAndroid Build Coastguard Worker unsigned size = instr->def.bit_size;
1639*61046927SAndroid Build Coastguard Worker unsigned nr = instr->num_components;
1640*61046927SAndroid Build Coastguard Worker
1641*61046927SAndroid Build Coastguard Worker /* Get the render target */
1642*61046927SAndroid Build Coastguard Worker nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
1643*61046927SAndroid Build Coastguard Worker unsigned loc = sem.location;
1644*61046927SAndroid Build Coastguard Worker assert(loc >= FRAG_RESULT_DATA0);
1645*61046927SAndroid Build Coastguard Worker unsigned rt = (loc - FRAG_RESULT_DATA0);
1646*61046927SAndroid Build Coastguard Worker
1647*61046927SAndroid Build Coastguard Worker bi_ld_tile_to(b, dest, bi_pixel_indices(b, rt), bi_coverage(b),
1648*61046927SAndroid Build Coastguard Worker bi_src_index(&instr->src[0]), regfmt, nr - 1);
1649*61046927SAndroid Build Coastguard Worker bi_emit_cached_split(b, dest, size * nr);
1650*61046927SAndroid Build Coastguard Worker }
1651*61046927SAndroid Build Coastguard Worker
1652*61046927SAndroid Build Coastguard Worker /*
1653*61046927SAndroid Build Coastguard Worker * Older Bifrost hardware has a limited CLPER instruction. Add a safe helper
1654*61046927SAndroid Build Coastguard Worker * that uses the hardware functionality if available and lowers otherwise.
1655*61046927SAndroid Build Coastguard Worker */
1656*61046927SAndroid Build Coastguard Worker static bi_index
bi_clper(bi_builder * b,bi_index s0,bi_index s1,enum bi_lane_op lop)1657*61046927SAndroid Build Coastguard Worker bi_clper(bi_builder *b, bi_index s0, bi_index s1, enum bi_lane_op lop)
1658*61046927SAndroid Build Coastguard Worker {
1659*61046927SAndroid Build Coastguard Worker if (b->shader->quirks & BIFROST_LIMITED_CLPER) {
1660*61046927SAndroid Build Coastguard Worker if (lop == BI_LANE_OP_XOR) {
1661*61046927SAndroid Build Coastguard Worker bi_index lane_id = bi_fau(BIR_FAU_LANE_ID, false);
1662*61046927SAndroid Build Coastguard Worker s1 = bi_lshift_xor_i32(b, lane_id, s1, bi_imm_u8(0));
1663*61046927SAndroid Build Coastguard Worker } else {
1664*61046927SAndroid Build Coastguard Worker assert(lop == BI_LANE_OP_NONE);
1665*61046927SAndroid Build Coastguard Worker }
1666*61046927SAndroid Build Coastguard Worker
1667*61046927SAndroid Build Coastguard Worker return bi_clper_old_i32(b, s0, s1);
1668*61046927SAndroid Build Coastguard Worker } else {
1669*61046927SAndroid Build Coastguard Worker return bi_clper_i32(b, s0, s1, BI_INACTIVE_RESULT_ZERO, lop,
1670*61046927SAndroid Build Coastguard Worker BI_SUBGROUP_SUBGROUP4);
1671*61046927SAndroid Build Coastguard Worker }
1672*61046927SAndroid Build Coastguard Worker }
1673*61046927SAndroid Build Coastguard Worker
1674*61046927SAndroid Build Coastguard Worker static bool
bi_nir_all_uses_fabs(nir_def * def)1675*61046927SAndroid Build Coastguard Worker bi_nir_all_uses_fabs(nir_def *def)
1676*61046927SAndroid Build Coastguard Worker {
1677*61046927SAndroid Build Coastguard Worker nir_foreach_use(use, def) {
1678*61046927SAndroid Build Coastguard Worker nir_instr *instr = nir_src_parent_instr(use);
1679*61046927SAndroid Build Coastguard Worker
1680*61046927SAndroid Build Coastguard Worker if (instr->type != nir_instr_type_alu ||
1681*61046927SAndroid Build Coastguard Worker nir_instr_as_alu(instr)->op != nir_op_fabs)
1682*61046927SAndroid Build Coastguard Worker return false;
1683*61046927SAndroid Build Coastguard Worker }
1684*61046927SAndroid Build Coastguard Worker
1685*61046927SAndroid Build Coastguard Worker return true;
1686*61046927SAndroid Build Coastguard Worker }
1687*61046927SAndroid Build Coastguard Worker
1688*61046927SAndroid Build Coastguard Worker static void
bi_emit_derivative(bi_builder * b,bi_index dst,nir_intrinsic_instr * instr,unsigned axis,bool coarse)1689*61046927SAndroid Build Coastguard Worker bi_emit_derivative(bi_builder *b, bi_index dst, nir_intrinsic_instr *instr,
1690*61046927SAndroid Build Coastguard Worker unsigned axis, bool coarse)
1691*61046927SAndroid Build Coastguard Worker {
1692*61046927SAndroid Build Coastguard Worker bi_index left, right;
1693*61046927SAndroid Build Coastguard Worker bi_index s0 = bi_src_index(&instr->src[0]);
1694*61046927SAndroid Build Coastguard Worker unsigned sz = instr->def.bit_size;
1695*61046927SAndroid Build Coastguard Worker
1696*61046927SAndroid Build Coastguard Worker /* If all uses are fabs, the sign of the derivative doesn't matter. This is
1697*61046927SAndroid Build Coastguard Worker * inherently based on fine derivatives so we can't do it for coarse.
1698*61046927SAndroid Build Coastguard Worker */
1699*61046927SAndroid Build Coastguard Worker if (bi_nir_all_uses_fabs(&instr->def) && !coarse) {
1700*61046927SAndroid Build Coastguard Worker left = s0;
1701*61046927SAndroid Build Coastguard Worker right = bi_clper(b, s0, bi_imm_u32(axis), BI_LANE_OP_XOR);
1702*61046927SAndroid Build Coastguard Worker } else {
1703*61046927SAndroid Build Coastguard Worker bi_index lane1, lane2;
1704*61046927SAndroid Build Coastguard Worker if (coarse) {
1705*61046927SAndroid Build Coastguard Worker lane1 = bi_imm_u32(0);
1706*61046927SAndroid Build Coastguard Worker lane2 = bi_imm_u32(axis);
1707*61046927SAndroid Build Coastguard Worker } else {
1708*61046927SAndroid Build Coastguard Worker lane1 = bi_lshift_and_i32(b, bi_fau(BIR_FAU_LANE_ID, false),
1709*61046927SAndroid Build Coastguard Worker bi_imm_u32(0x3 & ~axis), bi_imm_u8(0));
1710*61046927SAndroid Build Coastguard Worker
1711*61046927SAndroid Build Coastguard Worker lane2 = bi_iadd_u32(b, lane1, bi_imm_u32(axis), false);
1712*61046927SAndroid Build Coastguard Worker }
1713*61046927SAndroid Build Coastguard Worker
1714*61046927SAndroid Build Coastguard Worker left = bi_clper(b, s0, lane1, BI_LANE_OP_NONE);
1715*61046927SAndroid Build Coastguard Worker right = bi_clper(b, s0, lane2, BI_LANE_OP_NONE);
1716*61046927SAndroid Build Coastguard Worker }
1717*61046927SAndroid Build Coastguard Worker
1718*61046927SAndroid Build Coastguard Worker bi_fadd_to(b, sz, dst, right, bi_neg(left));
1719*61046927SAndroid Build Coastguard Worker }
1720*61046927SAndroid Build Coastguard Worker
1721*61046927SAndroid Build Coastguard Worker static void
bi_emit_intrinsic(bi_builder * b,nir_intrinsic_instr * instr)1722*61046927SAndroid Build Coastguard Worker bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
1723*61046927SAndroid Build Coastguard Worker {
1724*61046927SAndroid Build Coastguard Worker bi_index dst = nir_intrinsic_infos[instr->intrinsic].has_dest
1725*61046927SAndroid Build Coastguard Worker ? bi_def_index(&instr->def)
1726*61046927SAndroid Build Coastguard Worker : bi_null();
1727*61046927SAndroid Build Coastguard Worker gl_shader_stage stage = b->shader->stage;
1728*61046927SAndroid Build Coastguard Worker
1729*61046927SAndroid Build Coastguard Worker switch (instr->intrinsic) {
1730*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_barycentric_pixel:
1731*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_barycentric_centroid:
1732*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_barycentric_sample:
1733*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_barycentric_at_sample:
1734*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_barycentric_at_offset:
1735*61046927SAndroid Build Coastguard Worker /* handled later via load_vary */
1736*61046927SAndroid Build Coastguard Worker break;
1737*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_interpolated_input:
1738*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_input:
1739*61046927SAndroid Build Coastguard Worker if (b->shader->inputs->is_blend)
1740*61046927SAndroid Build Coastguard Worker bi_emit_load_blend_input(b, instr);
1741*61046927SAndroid Build Coastguard Worker else if (stage == MESA_SHADER_FRAGMENT)
1742*61046927SAndroid Build Coastguard Worker bi_emit_load_vary(b, instr);
1743*61046927SAndroid Build Coastguard Worker else if (stage == MESA_SHADER_VERTEX)
1744*61046927SAndroid Build Coastguard Worker bi_emit_load_attr(b, instr);
1745*61046927SAndroid Build Coastguard Worker else
1746*61046927SAndroid Build Coastguard Worker unreachable("Unsupported shader stage");
1747*61046927SAndroid Build Coastguard Worker break;
1748*61046927SAndroid Build Coastguard Worker
1749*61046927SAndroid Build Coastguard Worker case nir_intrinsic_store_output:
1750*61046927SAndroid Build Coastguard Worker if (stage == MESA_SHADER_FRAGMENT)
1751*61046927SAndroid Build Coastguard Worker bi_emit_fragment_out(b, instr);
1752*61046927SAndroid Build Coastguard Worker else if (stage == MESA_SHADER_VERTEX)
1753*61046927SAndroid Build Coastguard Worker bi_emit_store_vary(b, instr);
1754*61046927SAndroid Build Coastguard Worker else
1755*61046927SAndroid Build Coastguard Worker unreachable("Unsupported shader stage");
1756*61046927SAndroid Build Coastguard Worker break;
1757*61046927SAndroid Build Coastguard Worker
1758*61046927SAndroid Build Coastguard Worker case nir_intrinsic_store_combined_output_pan:
1759*61046927SAndroid Build Coastguard Worker assert(stage == MESA_SHADER_FRAGMENT);
1760*61046927SAndroid Build Coastguard Worker bi_emit_fragment_out(b, instr);
1761*61046927SAndroid Build Coastguard Worker break;
1762*61046927SAndroid Build Coastguard Worker
1763*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_ubo:
1764*61046927SAndroid Build Coastguard Worker bi_emit_load_ubo(b, instr);
1765*61046927SAndroid Build Coastguard Worker break;
1766*61046927SAndroid Build Coastguard Worker
1767*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_push_constant:
1768*61046927SAndroid Build Coastguard Worker bi_emit_load_push_constant(b, instr);
1769*61046927SAndroid Build Coastguard Worker break;
1770*61046927SAndroid Build Coastguard Worker
1771*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_global:
1772*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_global_constant:
1773*61046927SAndroid Build Coastguard Worker bi_emit_load(b, instr, BI_SEG_NONE);
1774*61046927SAndroid Build Coastguard Worker break;
1775*61046927SAndroid Build Coastguard Worker
1776*61046927SAndroid Build Coastguard Worker case nir_intrinsic_store_global:
1777*61046927SAndroid Build Coastguard Worker bi_emit_store(b, instr, BI_SEG_NONE);
1778*61046927SAndroid Build Coastguard Worker break;
1779*61046927SAndroid Build Coastguard Worker
1780*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_scratch:
1781*61046927SAndroid Build Coastguard Worker bi_emit_load(b, instr, BI_SEG_TL);
1782*61046927SAndroid Build Coastguard Worker break;
1783*61046927SAndroid Build Coastguard Worker
1784*61046927SAndroid Build Coastguard Worker case nir_intrinsic_store_scratch:
1785*61046927SAndroid Build Coastguard Worker bi_emit_store(b, instr, BI_SEG_TL);
1786*61046927SAndroid Build Coastguard Worker break;
1787*61046927SAndroid Build Coastguard Worker
1788*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_shared:
1789*61046927SAndroid Build Coastguard Worker bi_emit_load(b, instr, BI_SEG_WLS);
1790*61046927SAndroid Build Coastguard Worker break;
1791*61046927SAndroid Build Coastguard Worker
1792*61046927SAndroid Build Coastguard Worker case nir_intrinsic_store_shared:
1793*61046927SAndroid Build Coastguard Worker bi_emit_store(b, instr, BI_SEG_WLS);
1794*61046927SAndroid Build Coastguard Worker break;
1795*61046927SAndroid Build Coastguard Worker
1796*61046927SAndroid Build Coastguard Worker case nir_intrinsic_barrier:
1797*61046927SAndroid Build Coastguard Worker if (nir_intrinsic_execution_scope(instr) != SCOPE_NONE) {
1798*61046927SAndroid Build Coastguard Worker assert(b->shader->stage != MESA_SHADER_FRAGMENT);
1799*61046927SAndroid Build Coastguard Worker assert(nir_intrinsic_execution_scope(instr) > SCOPE_SUBGROUP &&
1800*61046927SAndroid Build Coastguard Worker "todo: subgroup barriers (different divergence rules)");
1801*61046927SAndroid Build Coastguard Worker bi_barrier(b);
1802*61046927SAndroid Build Coastguard Worker }
1803*61046927SAndroid Build Coastguard Worker /* Blob doesn't seem to do anything for memory barriers, so no need to
1804*61046927SAndroid Build Coastguard Worker * check nir_intrinsic_memory_scope().
1805*61046927SAndroid Build Coastguard Worker */
1806*61046927SAndroid Build Coastguard Worker break;
1807*61046927SAndroid Build Coastguard Worker
1808*61046927SAndroid Build Coastguard Worker case nir_intrinsic_shared_atomic: {
1809*61046927SAndroid Build Coastguard Worker nir_atomic_op op = nir_intrinsic_atomic_op(instr);
1810*61046927SAndroid Build Coastguard Worker
1811*61046927SAndroid Build Coastguard Worker if (op == nir_atomic_op_xchg) {
1812*61046927SAndroid Build Coastguard Worker bi_emit_axchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1],
1813*61046927SAndroid Build Coastguard Worker BI_SEG_WLS);
1814*61046927SAndroid Build Coastguard Worker } else {
1815*61046927SAndroid Build Coastguard Worker assert(nir_src_bit_size(instr->src[1]) == 32);
1816*61046927SAndroid Build Coastguard Worker
1817*61046927SAndroid Build Coastguard Worker bi_index addr = bi_src_index(&instr->src[0]);
1818*61046927SAndroid Build Coastguard Worker bi_index addr_hi;
1819*61046927SAndroid Build Coastguard Worker
1820*61046927SAndroid Build Coastguard Worker if (b->shader->arch >= 9) {
1821*61046927SAndroid Build Coastguard Worker bi_handle_segment(b, &addr, &addr_hi, BI_SEG_WLS, NULL);
1822*61046927SAndroid Build Coastguard Worker addr = bi_collect_v2i32(b, addr, addr_hi);
1823*61046927SAndroid Build Coastguard Worker } else {
1824*61046927SAndroid Build Coastguard Worker addr = bi_seg_add_i64(b, addr, bi_zero(), false, BI_SEG_WLS);
1825*61046927SAndroid Build Coastguard Worker bi_emit_cached_split(b, addr, 64);
1826*61046927SAndroid Build Coastguard Worker }
1827*61046927SAndroid Build Coastguard Worker
1828*61046927SAndroid Build Coastguard Worker bi_emit_atomic_i32_to(b, dst, addr, bi_src_index(&instr->src[1]), op);
1829*61046927SAndroid Build Coastguard Worker }
1830*61046927SAndroid Build Coastguard Worker
1831*61046927SAndroid Build Coastguard Worker bi_split_def(b, &instr->def);
1832*61046927SAndroid Build Coastguard Worker break;
1833*61046927SAndroid Build Coastguard Worker }
1834*61046927SAndroid Build Coastguard Worker
1835*61046927SAndroid Build Coastguard Worker case nir_intrinsic_global_atomic: {
1836*61046927SAndroid Build Coastguard Worker nir_atomic_op op = nir_intrinsic_atomic_op(instr);
1837*61046927SAndroid Build Coastguard Worker
1838*61046927SAndroid Build Coastguard Worker if (op == nir_atomic_op_xchg) {
1839*61046927SAndroid Build Coastguard Worker bi_emit_axchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1],
1840*61046927SAndroid Build Coastguard Worker BI_SEG_NONE);
1841*61046927SAndroid Build Coastguard Worker } else {
1842*61046927SAndroid Build Coastguard Worker assert(nir_src_bit_size(instr->src[1]) == 32);
1843*61046927SAndroid Build Coastguard Worker
1844*61046927SAndroid Build Coastguard Worker bi_emit_atomic_i32_to(b, dst, bi_src_index(&instr->src[0]),
1845*61046927SAndroid Build Coastguard Worker bi_src_index(&instr->src[1]), op);
1846*61046927SAndroid Build Coastguard Worker }
1847*61046927SAndroid Build Coastguard Worker
1848*61046927SAndroid Build Coastguard Worker bi_split_def(b, &instr->def);
1849*61046927SAndroid Build Coastguard Worker break;
1850*61046927SAndroid Build Coastguard Worker }
1851*61046927SAndroid Build Coastguard Worker
1852*61046927SAndroid Build Coastguard Worker case nir_intrinsic_image_texel_address:
1853*61046927SAndroid Build Coastguard Worker bi_emit_lea_image_to(b, dst, instr);
1854*61046927SAndroid Build Coastguard Worker break;
1855*61046927SAndroid Build Coastguard Worker
1856*61046927SAndroid Build Coastguard Worker case nir_intrinsic_image_load:
1857*61046927SAndroid Build Coastguard Worker bi_emit_image_load(b, instr);
1858*61046927SAndroid Build Coastguard Worker break;
1859*61046927SAndroid Build Coastguard Worker
1860*61046927SAndroid Build Coastguard Worker case nir_intrinsic_image_store:
1861*61046927SAndroid Build Coastguard Worker bi_emit_image_store(b, instr);
1862*61046927SAndroid Build Coastguard Worker break;
1863*61046927SAndroid Build Coastguard Worker
1864*61046927SAndroid Build Coastguard Worker case nir_intrinsic_global_atomic_swap:
1865*61046927SAndroid Build Coastguard Worker bi_emit_acmpxchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1],
1866*61046927SAndroid Build Coastguard Worker &instr->src[2], BI_SEG_NONE);
1867*61046927SAndroid Build Coastguard Worker bi_split_def(b, &instr->def);
1868*61046927SAndroid Build Coastguard Worker break;
1869*61046927SAndroid Build Coastguard Worker
1870*61046927SAndroid Build Coastguard Worker case nir_intrinsic_shared_atomic_swap:
1871*61046927SAndroid Build Coastguard Worker bi_emit_acmpxchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1],
1872*61046927SAndroid Build Coastguard Worker &instr->src[2], BI_SEG_WLS);
1873*61046927SAndroid Build Coastguard Worker bi_split_def(b, &instr->def);
1874*61046927SAndroid Build Coastguard Worker break;
1875*61046927SAndroid Build Coastguard Worker
1876*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_pixel_coord:
1877*61046927SAndroid Build Coastguard Worker /* Vectorized load of the preloaded i16vec2 */
1878*61046927SAndroid Build Coastguard Worker bi_mov_i32_to(b, dst, bi_preload(b, 59));
1879*61046927SAndroid Build Coastguard Worker break;
1880*61046927SAndroid Build Coastguard Worker
1881*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_frag_coord_zw:
1882*61046927SAndroid Build Coastguard Worker bi_emit_load_frag_coord_zw(b, dst, nir_intrinsic_component(instr));
1883*61046927SAndroid Build Coastguard Worker break;
1884*61046927SAndroid Build Coastguard Worker
1885*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_converted_output_pan:
1886*61046927SAndroid Build Coastguard Worker bi_emit_ld_tile(b, instr);
1887*61046927SAndroid Build Coastguard Worker break;
1888*61046927SAndroid Build Coastguard Worker
1889*61046927SAndroid Build Coastguard Worker case nir_intrinsic_terminate_if:
1890*61046927SAndroid Build Coastguard Worker bi_discard_b32(b, bi_src_index(&instr->src[0]));
1891*61046927SAndroid Build Coastguard Worker break;
1892*61046927SAndroid Build Coastguard Worker
1893*61046927SAndroid Build Coastguard Worker case nir_intrinsic_terminate:
1894*61046927SAndroid Build Coastguard Worker bi_discard_f32(b, bi_zero(), bi_zero(), BI_CMPF_EQ);
1895*61046927SAndroid Build Coastguard Worker break;
1896*61046927SAndroid Build Coastguard Worker
1897*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_sample_positions_pan:
1898*61046927SAndroid Build Coastguard Worker bi_collect_v2i32_to(b, dst, bi_fau(BIR_FAU_SAMPLE_POS_ARRAY, false),
1899*61046927SAndroid Build Coastguard Worker bi_fau(BIR_FAU_SAMPLE_POS_ARRAY, true));
1900*61046927SAndroid Build Coastguard Worker break;
1901*61046927SAndroid Build Coastguard Worker
1902*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_sample_mask_in:
1903*61046927SAndroid Build Coastguard Worker /* r61[0:15] contains the coverage bitmap */
1904*61046927SAndroid Build Coastguard Worker bi_u16_to_u32_to(b, dst, bi_half(bi_preload(b, 61), false));
1905*61046927SAndroid Build Coastguard Worker break;
1906*61046927SAndroid Build Coastguard Worker
1907*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_sample_mask:
1908*61046927SAndroid Build Coastguard Worker bi_mov_i32_to(b, dst, bi_coverage(b));
1909*61046927SAndroid Build Coastguard Worker break;
1910*61046927SAndroid Build Coastguard Worker
1911*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_sample_id:
1912*61046927SAndroid Build Coastguard Worker bi_load_sample_id_to(b, dst);
1913*61046927SAndroid Build Coastguard Worker break;
1914*61046927SAndroid Build Coastguard Worker
1915*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_front_face:
1916*61046927SAndroid Build Coastguard Worker /* r58 == 0 means primitive is front facing */
1917*61046927SAndroid Build Coastguard Worker bi_icmp_i32_to(b, dst, bi_preload(b, 58), bi_zero(), BI_CMPF_EQ,
1918*61046927SAndroid Build Coastguard Worker BI_RESULT_TYPE_M1);
1919*61046927SAndroid Build Coastguard Worker break;
1920*61046927SAndroid Build Coastguard Worker
1921*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_point_coord:
1922*61046927SAndroid Build Coastguard Worker bi_ld_var_special_to(b, dst, bi_zero(), BI_REGISTER_FORMAT_F32,
1923*61046927SAndroid Build Coastguard Worker BI_SAMPLE_CENTER, BI_UPDATE_CLOBBER,
1924*61046927SAndroid Build Coastguard Worker BI_VARYING_NAME_POINT, BI_VECSIZE_V2);
1925*61046927SAndroid Build Coastguard Worker bi_emit_cached_split_i32(b, dst, 2);
1926*61046927SAndroid Build Coastguard Worker break;
1927*61046927SAndroid Build Coastguard Worker
1928*61046927SAndroid Build Coastguard Worker /* It appears vertex_id is zero-based with Bifrost geometry flows, but
1929*61046927SAndroid Build Coastguard Worker * not with Valhall's memory-allocation IDVS geometry flow. We only support
1930*61046927SAndroid Build Coastguard Worker * the new flow on Valhall so this is lowered in NIR.
1931*61046927SAndroid Build Coastguard Worker */
1932*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_vertex_id:
1933*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_vertex_id_zero_base:
1934*61046927SAndroid Build Coastguard Worker assert(b->shader->malloc_idvs ==
1935*61046927SAndroid Build Coastguard Worker (instr->intrinsic == nir_intrinsic_load_vertex_id));
1936*61046927SAndroid Build Coastguard Worker
1937*61046927SAndroid Build Coastguard Worker bi_mov_i32_to(b, dst, bi_vertex_id(b));
1938*61046927SAndroid Build Coastguard Worker break;
1939*61046927SAndroid Build Coastguard Worker
1940*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_instance_id:
1941*61046927SAndroid Build Coastguard Worker bi_mov_i32_to(b, dst, bi_instance_id(b));
1942*61046927SAndroid Build Coastguard Worker break;
1943*61046927SAndroid Build Coastguard Worker
1944*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_draw_id:
1945*61046927SAndroid Build Coastguard Worker bi_mov_i32_to(b, dst, bi_draw_id(b));
1946*61046927SAndroid Build Coastguard Worker break;
1947*61046927SAndroid Build Coastguard Worker
1948*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_subgroup_invocation:
1949*61046927SAndroid Build Coastguard Worker bi_mov_i32_to(b, dst, bi_fau(BIR_FAU_LANE_ID, false));
1950*61046927SAndroid Build Coastguard Worker break;
1951*61046927SAndroid Build Coastguard Worker
1952*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_local_invocation_id:
1953*61046927SAndroid Build Coastguard Worker bi_collect_v3i32_to(b, dst,
1954*61046927SAndroid Build Coastguard Worker bi_u16_to_u32(b, bi_half(bi_preload(b, 55), 0)),
1955*61046927SAndroid Build Coastguard Worker bi_u16_to_u32(b, bi_half(bi_preload(b, 55), 1)),
1956*61046927SAndroid Build Coastguard Worker bi_u16_to_u32(b, bi_half(bi_preload(b, 56), 0)));
1957*61046927SAndroid Build Coastguard Worker break;
1958*61046927SAndroid Build Coastguard Worker
1959*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_workgroup_id:
1960*61046927SAndroid Build Coastguard Worker bi_collect_v3i32_to(b, dst, bi_preload(b, 57), bi_preload(b, 58),
1961*61046927SAndroid Build Coastguard Worker bi_preload(b, 59));
1962*61046927SAndroid Build Coastguard Worker break;
1963*61046927SAndroid Build Coastguard Worker
1964*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_global_invocation_id:
1965*61046927SAndroid Build Coastguard Worker bi_collect_v3i32_to(b, dst, bi_preload(b, 60), bi_preload(b, 61),
1966*61046927SAndroid Build Coastguard Worker bi_preload(b, 62));
1967*61046927SAndroid Build Coastguard Worker break;
1968*61046927SAndroid Build Coastguard Worker
1969*61046927SAndroid Build Coastguard Worker case nir_intrinsic_shader_clock:
1970*61046927SAndroid Build Coastguard Worker bi_ld_gclk_u64_to(b, dst, BI_SOURCE_CYCLE_COUNTER);
1971*61046927SAndroid Build Coastguard Worker bi_split_def(b, &instr->def);
1972*61046927SAndroid Build Coastguard Worker break;
1973*61046927SAndroid Build Coastguard Worker
1974*61046927SAndroid Build Coastguard Worker case nir_intrinsic_ddx:
1975*61046927SAndroid Build Coastguard Worker case nir_intrinsic_ddx_fine:
1976*61046927SAndroid Build Coastguard Worker bi_emit_derivative(b, dst, instr, 1, false);
1977*61046927SAndroid Build Coastguard Worker break;
1978*61046927SAndroid Build Coastguard Worker case nir_intrinsic_ddx_coarse:
1979*61046927SAndroid Build Coastguard Worker bi_emit_derivative(b, dst, instr, 1, true);
1980*61046927SAndroid Build Coastguard Worker break;
1981*61046927SAndroid Build Coastguard Worker case nir_intrinsic_ddy:
1982*61046927SAndroid Build Coastguard Worker case nir_intrinsic_ddy_fine:
1983*61046927SAndroid Build Coastguard Worker bi_emit_derivative(b, dst, instr, 2, false);
1984*61046927SAndroid Build Coastguard Worker break;
1985*61046927SAndroid Build Coastguard Worker case nir_intrinsic_ddy_coarse:
1986*61046927SAndroid Build Coastguard Worker bi_emit_derivative(b, dst, instr, 2, true);
1987*61046927SAndroid Build Coastguard Worker break;
1988*61046927SAndroid Build Coastguard Worker
1989*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_layer_id:
1990*61046927SAndroid Build Coastguard Worker assert(b->shader->arch >= 9);
1991*61046927SAndroid Build Coastguard Worker bi_mov_i32_to(b, dst, bi_u8_to_u32(b, bi_byte(bi_preload(b, 62), 0)));
1992*61046927SAndroid Build Coastguard Worker break;
1993*61046927SAndroid Build Coastguard Worker
1994*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_ssbo_address:
1995*61046927SAndroid Build Coastguard Worker assert(b->shader->arch >= 9);
1996*61046927SAndroid Build Coastguard Worker bi_lea_buffer_to(b, dst, bi_src_index(&instr->src[1]),
1997*61046927SAndroid Build Coastguard Worker bi_src_index(&instr->src[0]));
1998*61046927SAndroid Build Coastguard Worker bi_emit_cached_split(b, dst, 64);
1999*61046927SAndroid Build Coastguard Worker break;
2000*61046927SAndroid Build Coastguard Worker
2001*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_ssbo: {
2002*61046927SAndroid Build Coastguard Worker assert(b->shader->arch >= 9);
2003*61046927SAndroid Build Coastguard Worker unsigned dst_bits = instr->num_components * instr->def.bit_size;
2004*61046927SAndroid Build Coastguard Worker bi_ld_buffer_to(b, dst_bits, dst, bi_src_index(&instr->src[1]),
2005*61046927SAndroid Build Coastguard Worker bi_src_index(&instr->src[0]));
2006*61046927SAndroid Build Coastguard Worker bi_emit_cached_split(b, dst, dst_bits);
2007*61046927SAndroid Build Coastguard Worker break;
2008*61046927SAndroid Build Coastguard Worker }
2009*61046927SAndroid Build Coastguard Worker
2010*61046927SAndroid Build Coastguard Worker default:
2011*61046927SAndroid Build Coastguard Worker fprintf(stderr, "Unhandled intrinsic %s\n",
2012*61046927SAndroid Build Coastguard Worker nir_intrinsic_infos[instr->intrinsic].name);
2013*61046927SAndroid Build Coastguard Worker assert(0);
2014*61046927SAndroid Build Coastguard Worker }
2015*61046927SAndroid Build Coastguard Worker }
2016*61046927SAndroid Build Coastguard Worker
2017*61046927SAndroid Build Coastguard Worker static void
bi_emit_load_const(bi_builder * b,nir_load_const_instr * instr)2018*61046927SAndroid Build Coastguard Worker bi_emit_load_const(bi_builder *b, nir_load_const_instr *instr)
2019*61046927SAndroid Build Coastguard Worker {
2020*61046927SAndroid Build Coastguard Worker /* Make sure we've been lowered */
2021*61046927SAndroid Build Coastguard Worker assert(instr->def.num_components <= (32 / instr->def.bit_size));
2022*61046927SAndroid Build Coastguard Worker
2023*61046927SAndroid Build Coastguard Worker /* Accumulate all the channels of the constant, as if we did an
2024*61046927SAndroid Build Coastguard Worker * implicit SEL over them */
2025*61046927SAndroid Build Coastguard Worker uint32_t acc = 0;
2026*61046927SAndroid Build Coastguard Worker
2027*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < instr->def.num_components; ++i) {
2028*61046927SAndroid Build Coastguard Worker unsigned v =
2029*61046927SAndroid Build Coastguard Worker nir_const_value_as_uint(instr->value[i], instr->def.bit_size);
2030*61046927SAndroid Build Coastguard Worker acc |= (v << (i * instr->def.bit_size));
2031*61046927SAndroid Build Coastguard Worker }
2032*61046927SAndroid Build Coastguard Worker
2033*61046927SAndroid Build Coastguard Worker bi_mov_i32_to(b, bi_get_index(instr->def.index), bi_imm_u32(acc));
2034*61046927SAndroid Build Coastguard Worker }
2035*61046927SAndroid Build Coastguard Worker
2036*61046927SAndroid Build Coastguard Worker static bi_index
bi_alu_src_index(bi_builder * b,nir_alu_src src,unsigned comps)2037*61046927SAndroid Build Coastguard Worker bi_alu_src_index(bi_builder *b, nir_alu_src src, unsigned comps)
2038*61046927SAndroid Build Coastguard Worker {
2039*61046927SAndroid Build Coastguard Worker unsigned bitsize = nir_src_bit_size(src.src);
2040*61046927SAndroid Build Coastguard Worker
2041*61046927SAndroid Build Coastguard Worker /* the bi_index carries the 32-bit (word) offset separate from the
2042*61046927SAndroid Build Coastguard Worker * subword swizzle, first handle the offset */
2043*61046927SAndroid Build Coastguard Worker
2044*61046927SAndroid Build Coastguard Worker unsigned offset = 0;
2045*61046927SAndroid Build Coastguard Worker
2046*61046927SAndroid Build Coastguard Worker assert(bitsize == 8 || bitsize == 16 || bitsize == 32);
2047*61046927SAndroid Build Coastguard Worker unsigned subword_shift = (bitsize == 32) ? 0 : (bitsize == 16) ? 1 : 2;
2048*61046927SAndroid Build Coastguard Worker
2049*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < comps; ++i) {
2050*61046927SAndroid Build Coastguard Worker unsigned new_offset = (src.swizzle[i] >> subword_shift);
2051*61046927SAndroid Build Coastguard Worker
2052*61046927SAndroid Build Coastguard Worker if (i > 0)
2053*61046927SAndroid Build Coastguard Worker assert(offset == new_offset && "wrong vectorization");
2054*61046927SAndroid Build Coastguard Worker
2055*61046927SAndroid Build Coastguard Worker offset = new_offset;
2056*61046927SAndroid Build Coastguard Worker }
2057*61046927SAndroid Build Coastguard Worker
2058*61046927SAndroid Build Coastguard Worker bi_index idx = bi_extract(b, bi_src_index(&src.src), offset);
2059*61046927SAndroid Build Coastguard Worker
2060*61046927SAndroid Build Coastguard Worker /* Compose the subword swizzle with existing (identity) swizzle */
2061*61046927SAndroid Build Coastguard Worker assert(idx.swizzle == BI_SWIZZLE_H01);
2062*61046927SAndroid Build Coastguard Worker
2063*61046927SAndroid Build Coastguard Worker /* Bigger vectors should have been lowered */
2064*61046927SAndroid Build Coastguard Worker assert(comps <= (1 << subword_shift));
2065*61046927SAndroid Build Coastguard Worker
2066*61046927SAndroid Build Coastguard Worker if (bitsize == 16) {
2067*61046927SAndroid Build Coastguard Worker unsigned c0 = src.swizzle[0] & 1;
2068*61046927SAndroid Build Coastguard Worker unsigned c1 = (comps > 1) ? src.swizzle[1] & 1 : c0;
2069*61046927SAndroid Build Coastguard Worker idx.swizzle = BI_SWIZZLE_H00 + c1 + (c0 << 1);
2070*61046927SAndroid Build Coastguard Worker } else if (bitsize == 8 && comps == 1) {
2071*61046927SAndroid Build Coastguard Worker idx.swizzle = BI_SWIZZLE_B0000 + (src.swizzle[0] & 3);
2072*61046927SAndroid Build Coastguard Worker } else if (bitsize == 8) {
2073*61046927SAndroid Build Coastguard Worker /* XXX: Use optimized swizzle when posisble */
2074*61046927SAndroid Build Coastguard Worker bi_index unoffset_srcs[NIR_MAX_VEC_COMPONENTS] = {bi_null()};
2075*61046927SAndroid Build Coastguard Worker unsigned channels[NIR_MAX_VEC_COMPONENTS] = {0};
2076*61046927SAndroid Build Coastguard Worker
2077*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < comps; ++i) {
2078*61046927SAndroid Build Coastguard Worker unoffset_srcs[i] = bi_src_index(&src.src);
2079*61046927SAndroid Build Coastguard Worker channels[i] = src.swizzle[i];
2080*61046927SAndroid Build Coastguard Worker }
2081*61046927SAndroid Build Coastguard Worker
2082*61046927SAndroid Build Coastguard Worker bi_index temp = bi_temp(b->shader);
2083*61046927SAndroid Build Coastguard Worker bi_make_vec_to(b, temp, unoffset_srcs, channels, comps, bitsize);
2084*61046927SAndroid Build Coastguard Worker
2085*61046927SAndroid Build Coastguard Worker static const enum bi_swizzle swizzle_lut[] = {
2086*61046927SAndroid Build Coastguard Worker BI_SWIZZLE_B0000, BI_SWIZZLE_B0011, BI_SWIZZLE_H01, BI_SWIZZLE_H01};
2087*61046927SAndroid Build Coastguard Worker assert(comps - 1 < ARRAY_SIZE(swizzle_lut));
2088*61046927SAndroid Build Coastguard Worker
2089*61046927SAndroid Build Coastguard Worker /* Assign a coherent swizzle for the vector */
2090*61046927SAndroid Build Coastguard Worker temp.swizzle = swizzle_lut[comps - 1];
2091*61046927SAndroid Build Coastguard Worker
2092*61046927SAndroid Build Coastguard Worker return temp;
2093*61046927SAndroid Build Coastguard Worker }
2094*61046927SAndroid Build Coastguard Worker
2095*61046927SAndroid Build Coastguard Worker return idx;
2096*61046927SAndroid Build Coastguard Worker }
2097*61046927SAndroid Build Coastguard Worker
2098*61046927SAndroid Build Coastguard Worker static enum bi_round
bi_nir_round(nir_op op)2099*61046927SAndroid Build Coastguard Worker bi_nir_round(nir_op op)
2100*61046927SAndroid Build Coastguard Worker {
2101*61046927SAndroid Build Coastguard Worker switch (op) {
2102*61046927SAndroid Build Coastguard Worker case nir_op_fround_even:
2103*61046927SAndroid Build Coastguard Worker return BI_ROUND_NONE;
2104*61046927SAndroid Build Coastguard Worker case nir_op_ftrunc:
2105*61046927SAndroid Build Coastguard Worker return BI_ROUND_RTZ;
2106*61046927SAndroid Build Coastguard Worker case nir_op_fceil:
2107*61046927SAndroid Build Coastguard Worker return BI_ROUND_RTP;
2108*61046927SAndroid Build Coastguard Worker case nir_op_ffloor:
2109*61046927SAndroid Build Coastguard Worker return BI_ROUND_RTN;
2110*61046927SAndroid Build Coastguard Worker default:
2111*61046927SAndroid Build Coastguard Worker unreachable("invalid nir round op");
2112*61046927SAndroid Build Coastguard Worker }
2113*61046927SAndroid Build Coastguard Worker }
2114*61046927SAndroid Build Coastguard Worker
2115*61046927SAndroid Build Coastguard Worker /* Convenience for lowered transcendentals */
2116*61046927SAndroid Build Coastguard Worker
2117*61046927SAndroid Build Coastguard Worker static bi_index
bi_fmul_f32(bi_builder * b,bi_index s0,bi_index s1)2118*61046927SAndroid Build Coastguard Worker bi_fmul_f32(bi_builder *b, bi_index s0, bi_index s1)
2119*61046927SAndroid Build Coastguard Worker {
2120*61046927SAndroid Build Coastguard Worker return bi_fma_f32(b, s0, s1, bi_imm_f32(-0.0f));
2121*61046927SAndroid Build Coastguard Worker }
2122*61046927SAndroid Build Coastguard Worker
2123*61046927SAndroid Build Coastguard Worker /* Approximate with FRCP_APPROX.f32 and apply a single iteration of
2124*61046927SAndroid Build Coastguard Worker * Newton-Raphson to improve precision */
2125*61046927SAndroid Build Coastguard Worker
2126*61046927SAndroid Build Coastguard Worker static void
bi_lower_frcp_32(bi_builder * b,bi_index dst,bi_index s0)2127*61046927SAndroid Build Coastguard Worker bi_lower_frcp_32(bi_builder *b, bi_index dst, bi_index s0)
2128*61046927SAndroid Build Coastguard Worker {
2129*61046927SAndroid Build Coastguard Worker bi_index x1 = bi_frcp_approx_f32(b, s0);
2130*61046927SAndroid Build Coastguard Worker bi_index m = bi_frexpm_f32(b, s0, false, false);
2131*61046927SAndroid Build Coastguard Worker bi_index e = bi_frexpe_f32(b, bi_neg(s0), false, false);
2132*61046927SAndroid Build Coastguard Worker bi_index t1 = bi_fma_rscale_f32(b, m, bi_neg(x1), bi_imm_f32(1.0), bi_zero(),
2133*61046927SAndroid Build Coastguard Worker BI_SPECIAL_N);
2134*61046927SAndroid Build Coastguard Worker bi_fma_rscale_f32_to(b, dst, t1, x1, x1, e, BI_SPECIAL_NONE);
2135*61046927SAndroid Build Coastguard Worker }
2136*61046927SAndroid Build Coastguard Worker
2137*61046927SAndroid Build Coastguard Worker static void
bi_lower_frsq_32(bi_builder * b,bi_index dst,bi_index s0)2138*61046927SAndroid Build Coastguard Worker bi_lower_frsq_32(bi_builder *b, bi_index dst, bi_index s0)
2139*61046927SAndroid Build Coastguard Worker {
2140*61046927SAndroid Build Coastguard Worker bi_index x1 = bi_frsq_approx_f32(b, s0);
2141*61046927SAndroid Build Coastguard Worker bi_index m = bi_frexpm_f32(b, s0, false, true);
2142*61046927SAndroid Build Coastguard Worker bi_index e = bi_frexpe_f32(b, bi_neg(s0), false, true);
2143*61046927SAndroid Build Coastguard Worker bi_index t1 = bi_fmul_f32(b, x1, x1);
2144*61046927SAndroid Build Coastguard Worker bi_index t2 = bi_fma_rscale_f32(b, m, bi_neg(t1), bi_imm_f32(1.0),
2145*61046927SAndroid Build Coastguard Worker bi_imm_u32(-1), BI_SPECIAL_N);
2146*61046927SAndroid Build Coastguard Worker bi_fma_rscale_f32_to(b, dst, t2, x1, x1, e, BI_SPECIAL_N);
2147*61046927SAndroid Build Coastguard Worker }
2148*61046927SAndroid Build Coastguard Worker
2149*61046927SAndroid Build Coastguard Worker /* More complex transcendentals, see
2150*61046927SAndroid Build Coastguard Worker * https://gitlab.freedesktop.org/panfrost/mali-isa-docs/-/blob/master/Bifrost.adoc
2151*61046927SAndroid Build Coastguard Worker * for documentation */
2152*61046927SAndroid Build Coastguard Worker
2153*61046927SAndroid Build Coastguard Worker static void
bi_lower_fexp2_32(bi_builder * b,bi_index dst,bi_index s0)2154*61046927SAndroid Build Coastguard Worker bi_lower_fexp2_32(bi_builder *b, bi_index dst, bi_index s0)
2155*61046927SAndroid Build Coastguard Worker {
2156*61046927SAndroid Build Coastguard Worker bi_index t1 = bi_temp(b->shader);
2157*61046927SAndroid Build Coastguard Worker bi_instr *t1_instr = bi_fadd_f32_to(b, t1, s0, bi_imm_u32(0x49400000));
2158*61046927SAndroid Build Coastguard Worker t1_instr->clamp = BI_CLAMP_CLAMP_0_INF;
2159*61046927SAndroid Build Coastguard Worker
2160*61046927SAndroid Build Coastguard Worker bi_index t2 = bi_fadd_f32(b, t1, bi_imm_u32(0xc9400000));
2161*61046927SAndroid Build Coastguard Worker
2162*61046927SAndroid Build Coastguard Worker bi_instr *a2 = bi_fadd_f32_to(b, bi_temp(b->shader), s0, bi_neg(t2));
2163*61046927SAndroid Build Coastguard Worker a2->clamp = BI_CLAMP_CLAMP_M1_1;
2164*61046927SAndroid Build Coastguard Worker
2165*61046927SAndroid Build Coastguard Worker bi_index a1t = bi_fexp_table_u4(b, t1, BI_ADJ_NONE);
2166*61046927SAndroid Build Coastguard Worker bi_index t3 = bi_isub_u32(b, t1, bi_imm_u32(0x49400000), false);
2167*61046927SAndroid Build Coastguard Worker bi_index a1i = bi_arshift_i32(b, t3, bi_null(), bi_imm_u8(4));
2168*61046927SAndroid Build Coastguard Worker bi_index p1 = bi_fma_f32(b, a2->dest[0], bi_imm_u32(0x3d635635),
2169*61046927SAndroid Build Coastguard Worker bi_imm_u32(0x3e75fffa));
2170*61046927SAndroid Build Coastguard Worker bi_index p2 = bi_fma_f32(b, p1, a2->dest[0], bi_imm_u32(0x3f317218));
2171*61046927SAndroid Build Coastguard Worker bi_index p3 = bi_fmul_f32(b, a2->dest[0], p2);
2172*61046927SAndroid Build Coastguard Worker bi_instr *x = bi_fma_rscale_f32_to(b, bi_temp(b->shader), p3, a1t, a1t, a1i,
2173*61046927SAndroid Build Coastguard Worker BI_SPECIAL_NONE);
2174*61046927SAndroid Build Coastguard Worker x->clamp = BI_CLAMP_CLAMP_0_INF;
2175*61046927SAndroid Build Coastguard Worker
2176*61046927SAndroid Build Coastguard Worker bi_instr *max = bi_fmax_f32_to(b, dst, x->dest[0], s0);
2177*61046927SAndroid Build Coastguard Worker max->sem = BI_SEM_NAN_PROPAGATE;
2178*61046927SAndroid Build Coastguard Worker }
2179*61046927SAndroid Build Coastguard Worker
2180*61046927SAndroid Build Coastguard Worker static void
bi_fexp_32(bi_builder * b,bi_index dst,bi_index s0,bi_index log2_base)2181*61046927SAndroid Build Coastguard Worker bi_fexp_32(bi_builder *b, bi_index dst, bi_index s0, bi_index log2_base)
2182*61046927SAndroid Build Coastguard Worker {
2183*61046927SAndroid Build Coastguard Worker /* Scale by base, Multiply by 2*24 and convert to integer to get a 8:24
2184*61046927SAndroid Build Coastguard Worker * fixed-point input */
2185*61046927SAndroid Build Coastguard Worker bi_index scale = bi_fma_rscale_f32(b, s0, log2_base, bi_negzero(),
2186*61046927SAndroid Build Coastguard Worker bi_imm_u32(24), BI_SPECIAL_NONE);
2187*61046927SAndroid Build Coastguard Worker bi_instr *fixed_pt = bi_f32_to_s32_to(b, bi_temp(b->shader), scale);
2188*61046927SAndroid Build Coastguard Worker fixed_pt->round = BI_ROUND_NONE; // XXX
2189*61046927SAndroid Build Coastguard Worker
2190*61046927SAndroid Build Coastguard Worker /* Compute the result for the fixed-point input, but pass along
2191*61046927SAndroid Build Coastguard Worker * the floating-point scale for correct NaN propagation */
2192*61046927SAndroid Build Coastguard Worker bi_fexp_f32_to(b, dst, fixed_pt->dest[0], scale);
2193*61046927SAndroid Build Coastguard Worker }
2194*61046927SAndroid Build Coastguard Worker
2195*61046927SAndroid Build Coastguard Worker static void
bi_lower_flog2_32(bi_builder * b,bi_index dst,bi_index s0)2196*61046927SAndroid Build Coastguard Worker bi_lower_flog2_32(bi_builder *b, bi_index dst, bi_index s0)
2197*61046927SAndroid Build Coastguard Worker {
2198*61046927SAndroid Build Coastguard Worker /* s0 = a1 * 2^e, with a1 in [0.75, 1.5) */
2199*61046927SAndroid Build Coastguard Worker bi_index a1 = bi_frexpm_f32(b, s0, true, false);
2200*61046927SAndroid Build Coastguard Worker bi_index ei = bi_frexpe_f32(b, s0, true, false);
2201*61046927SAndroid Build Coastguard Worker bi_index ef = bi_s32_to_f32(b, ei);
2202*61046927SAndroid Build Coastguard Worker
2203*61046927SAndroid Build Coastguard Worker /* xt estimates -log(r1), a coarse approximation of log(a1) */
2204*61046927SAndroid Build Coastguard Worker bi_index r1 = bi_flog_table_f32(b, s0, BI_MODE_RED, BI_PRECISION_NONE);
2205*61046927SAndroid Build Coastguard Worker bi_index xt = bi_flog_table_f32(b, s0, BI_MODE_BASE2, BI_PRECISION_NONE);
2206*61046927SAndroid Build Coastguard Worker
2207*61046927SAndroid Build Coastguard Worker /* log(s0) = log(a1 * 2^e) = e + log(a1) = e + log(a1 * r1) -
2208*61046927SAndroid Build Coastguard Worker * log(r1), so let x1 = e - log(r1) ~= e + xt and x2 = log(a1 * r1),
2209*61046927SAndroid Build Coastguard Worker * and then log(s0) = x1 + x2 */
2210*61046927SAndroid Build Coastguard Worker bi_index x1 = bi_fadd_f32(b, ef, xt);
2211*61046927SAndroid Build Coastguard Worker
2212*61046927SAndroid Build Coastguard Worker /* Since a1 * r1 is close to 1, x2 = log(a1 * r1) may be computed by
2213*61046927SAndroid Build Coastguard Worker * polynomial approximation around 1. The series is expressed around
2214*61046927SAndroid Build Coastguard Worker * 1, so set y = (a1 * r1) - 1.0 */
2215*61046927SAndroid Build Coastguard Worker bi_index y = bi_fma_f32(b, a1, r1, bi_imm_f32(-1.0));
2216*61046927SAndroid Build Coastguard Worker
2217*61046927SAndroid Build Coastguard Worker /* x2 = log_2(1 + y) = log_e(1 + y) * (1/log_e(2)), so approximate
2218*61046927SAndroid Build Coastguard Worker * log_e(1 + y) by the Taylor series (lower precision than the blob):
2219*61046927SAndroid Build Coastguard Worker * y - y^2/2 + O(y^3) = y(1 - y/2) + O(y^3) */
2220*61046927SAndroid Build Coastguard Worker bi_index loge =
2221*61046927SAndroid Build Coastguard Worker bi_fmul_f32(b, y, bi_fma_f32(b, y, bi_imm_f32(-0.5), bi_imm_f32(1.0)));
2222*61046927SAndroid Build Coastguard Worker
2223*61046927SAndroid Build Coastguard Worker bi_index x2 = bi_fmul_f32(b, loge, bi_imm_f32(1.0 / logf(2.0)));
2224*61046927SAndroid Build Coastguard Worker
2225*61046927SAndroid Build Coastguard Worker /* log(s0) = x1 + x2 */
2226*61046927SAndroid Build Coastguard Worker bi_fadd_f32_to(b, dst, x1, x2);
2227*61046927SAndroid Build Coastguard Worker }
2228*61046927SAndroid Build Coastguard Worker
2229*61046927SAndroid Build Coastguard Worker static void
bi_flog2_32(bi_builder * b,bi_index dst,bi_index s0)2230*61046927SAndroid Build Coastguard Worker bi_flog2_32(bi_builder *b, bi_index dst, bi_index s0)
2231*61046927SAndroid Build Coastguard Worker {
2232*61046927SAndroid Build Coastguard Worker bi_index frexp = bi_frexpe_f32(b, s0, true, false);
2233*61046927SAndroid Build Coastguard Worker bi_index frexpi = bi_s32_to_f32(b, frexp);
2234*61046927SAndroid Build Coastguard Worker bi_index add = bi_fadd_lscale_f32(b, bi_imm_f32(-1.0f), s0);
2235*61046927SAndroid Build Coastguard Worker bi_fma_f32_to(b, dst, bi_flogd_f32(b, s0), add, frexpi);
2236*61046927SAndroid Build Coastguard Worker }
2237*61046927SAndroid Build Coastguard Worker
2238*61046927SAndroid Build Coastguard Worker static void
bi_lower_fpow_32(bi_builder * b,bi_index dst,bi_index base,bi_index exp)2239*61046927SAndroid Build Coastguard Worker bi_lower_fpow_32(bi_builder *b, bi_index dst, bi_index base, bi_index exp)
2240*61046927SAndroid Build Coastguard Worker {
2241*61046927SAndroid Build Coastguard Worker bi_index log2_base = bi_null();
2242*61046927SAndroid Build Coastguard Worker
2243*61046927SAndroid Build Coastguard Worker if (base.type == BI_INDEX_CONSTANT) {
2244*61046927SAndroid Build Coastguard Worker log2_base = bi_imm_f32(log2f(uif(base.value)));
2245*61046927SAndroid Build Coastguard Worker } else {
2246*61046927SAndroid Build Coastguard Worker log2_base = bi_temp(b->shader);
2247*61046927SAndroid Build Coastguard Worker bi_lower_flog2_32(b, log2_base, base);
2248*61046927SAndroid Build Coastguard Worker }
2249*61046927SAndroid Build Coastguard Worker
2250*61046927SAndroid Build Coastguard Worker return bi_lower_fexp2_32(b, dst, bi_fmul_f32(b, exp, log2_base));
2251*61046927SAndroid Build Coastguard Worker }
2252*61046927SAndroid Build Coastguard Worker
2253*61046927SAndroid Build Coastguard Worker static void
bi_fpow_32(bi_builder * b,bi_index dst,bi_index base,bi_index exp)2254*61046927SAndroid Build Coastguard Worker bi_fpow_32(bi_builder *b, bi_index dst, bi_index base, bi_index exp)
2255*61046927SAndroid Build Coastguard Worker {
2256*61046927SAndroid Build Coastguard Worker bi_index log2_base = bi_null();
2257*61046927SAndroid Build Coastguard Worker
2258*61046927SAndroid Build Coastguard Worker if (base.type == BI_INDEX_CONSTANT) {
2259*61046927SAndroid Build Coastguard Worker log2_base = bi_imm_f32(log2f(uif(base.value)));
2260*61046927SAndroid Build Coastguard Worker } else {
2261*61046927SAndroid Build Coastguard Worker log2_base = bi_temp(b->shader);
2262*61046927SAndroid Build Coastguard Worker bi_flog2_32(b, log2_base, base);
2263*61046927SAndroid Build Coastguard Worker }
2264*61046927SAndroid Build Coastguard Worker
2265*61046927SAndroid Build Coastguard Worker return bi_fexp_32(b, dst, exp, log2_base);
2266*61046927SAndroid Build Coastguard Worker }
2267*61046927SAndroid Build Coastguard Worker
2268*61046927SAndroid Build Coastguard Worker /* Bifrost has extremely coarse tables for approximating sin/cos, accessible as
2269*61046927SAndroid Build Coastguard Worker * FSIN/COS_TABLE.u6, which multiplies the bottom 6-bits by pi/32 and
2270*61046927SAndroid Build Coastguard Worker * calculates the results. We use them to calculate sin/cos via a Taylor
2271*61046927SAndroid Build Coastguard Worker * approximation:
2272*61046927SAndroid Build Coastguard Worker *
2273*61046927SAndroid Build Coastguard Worker * f(x + e) = f(x) + e f'(x) + (e^2)/2 f''(x)
2274*61046927SAndroid Build Coastguard Worker * sin(x + e) = sin(x) + e cos(x) - (e^2)/2 sin(x)
2275*61046927SAndroid Build Coastguard Worker * cos(x + e) = cos(x) - e sin(x) - (e^2)/2 cos(x)
2276*61046927SAndroid Build Coastguard Worker */
2277*61046927SAndroid Build Coastguard Worker
2278*61046927SAndroid Build Coastguard Worker #define TWO_OVER_PI bi_imm_f32(2.0f / 3.14159f)
2279*61046927SAndroid Build Coastguard Worker #define MPI_OVER_TWO bi_imm_f32(-3.14159f / 2.0)
2280*61046927SAndroid Build Coastguard Worker #define SINCOS_BIAS bi_imm_u32(0x49400000)
2281*61046927SAndroid Build Coastguard Worker
2282*61046927SAndroid Build Coastguard Worker static void
bi_lower_fsincos_32(bi_builder * b,bi_index dst,bi_index s0,bool cos)2283*61046927SAndroid Build Coastguard Worker bi_lower_fsincos_32(bi_builder *b, bi_index dst, bi_index s0, bool cos)
2284*61046927SAndroid Build Coastguard Worker {
2285*61046927SAndroid Build Coastguard Worker /* bottom 6-bits of result times pi/32 approximately s0 mod 2pi */
2286*61046927SAndroid Build Coastguard Worker bi_index x_u6 = bi_fma_f32(b, s0, TWO_OVER_PI, SINCOS_BIAS);
2287*61046927SAndroid Build Coastguard Worker
2288*61046927SAndroid Build Coastguard Worker /* Approximate domain error (small) */
2289*61046927SAndroid Build Coastguard Worker bi_index e = bi_fma_f32(b, bi_fadd_f32(b, x_u6, bi_neg(SINCOS_BIAS)),
2290*61046927SAndroid Build Coastguard Worker MPI_OVER_TWO, s0);
2291*61046927SAndroid Build Coastguard Worker
2292*61046927SAndroid Build Coastguard Worker /* Lookup sin(x), cos(x) */
2293*61046927SAndroid Build Coastguard Worker bi_index sinx = bi_fsin_table_u6(b, x_u6, false);
2294*61046927SAndroid Build Coastguard Worker bi_index cosx = bi_fcos_table_u6(b, x_u6, false);
2295*61046927SAndroid Build Coastguard Worker
2296*61046927SAndroid Build Coastguard Worker /* e^2 / 2 */
2297*61046927SAndroid Build Coastguard Worker bi_index e2_over_2 =
2298*61046927SAndroid Build Coastguard Worker bi_fma_rscale_f32(b, e, e, bi_negzero(), bi_imm_u32(-1), BI_SPECIAL_NONE);
2299*61046927SAndroid Build Coastguard Worker
2300*61046927SAndroid Build Coastguard Worker /* (-e^2)/2 f''(x) */
2301*61046927SAndroid Build Coastguard Worker bi_index quadratic =
2302*61046927SAndroid Build Coastguard Worker bi_fma_f32(b, bi_neg(e2_over_2), cos ? cosx : sinx, bi_negzero());
2303*61046927SAndroid Build Coastguard Worker
2304*61046927SAndroid Build Coastguard Worker /* e f'(x) - (e^2/2) f''(x) */
2305*61046927SAndroid Build Coastguard Worker bi_instr *I = bi_fma_f32_to(b, bi_temp(b->shader), e,
2306*61046927SAndroid Build Coastguard Worker cos ? bi_neg(sinx) : cosx, quadratic);
2307*61046927SAndroid Build Coastguard Worker I->clamp = BI_CLAMP_CLAMP_M1_1;
2308*61046927SAndroid Build Coastguard Worker
2309*61046927SAndroid Build Coastguard Worker /* f(x) + e f'(x) - (e^2/2) f''(x) */
2310*61046927SAndroid Build Coastguard Worker bi_fadd_f32_to(b, dst, I->dest[0], cos ? cosx : sinx);
2311*61046927SAndroid Build Coastguard Worker }
2312*61046927SAndroid Build Coastguard Worker
2313*61046927SAndroid Build Coastguard Worker static enum bi_cmpf
bi_translate_cmpf(nir_op op)2314*61046927SAndroid Build Coastguard Worker bi_translate_cmpf(nir_op op)
2315*61046927SAndroid Build Coastguard Worker {
2316*61046927SAndroid Build Coastguard Worker switch (op) {
2317*61046927SAndroid Build Coastguard Worker case nir_op_ieq8:
2318*61046927SAndroid Build Coastguard Worker case nir_op_ieq16:
2319*61046927SAndroid Build Coastguard Worker case nir_op_ieq32:
2320*61046927SAndroid Build Coastguard Worker case nir_op_feq16:
2321*61046927SAndroid Build Coastguard Worker case nir_op_feq32:
2322*61046927SAndroid Build Coastguard Worker return BI_CMPF_EQ;
2323*61046927SAndroid Build Coastguard Worker
2324*61046927SAndroid Build Coastguard Worker case nir_op_ine8:
2325*61046927SAndroid Build Coastguard Worker case nir_op_ine16:
2326*61046927SAndroid Build Coastguard Worker case nir_op_ine32:
2327*61046927SAndroid Build Coastguard Worker case nir_op_fneu16:
2328*61046927SAndroid Build Coastguard Worker case nir_op_fneu32:
2329*61046927SAndroid Build Coastguard Worker return BI_CMPF_NE;
2330*61046927SAndroid Build Coastguard Worker
2331*61046927SAndroid Build Coastguard Worker case nir_op_ilt8:
2332*61046927SAndroid Build Coastguard Worker case nir_op_ilt16:
2333*61046927SAndroid Build Coastguard Worker case nir_op_ilt32:
2334*61046927SAndroid Build Coastguard Worker case nir_op_flt16:
2335*61046927SAndroid Build Coastguard Worker case nir_op_flt32:
2336*61046927SAndroid Build Coastguard Worker case nir_op_ult8:
2337*61046927SAndroid Build Coastguard Worker case nir_op_ult16:
2338*61046927SAndroid Build Coastguard Worker case nir_op_ult32:
2339*61046927SAndroid Build Coastguard Worker return BI_CMPF_LT;
2340*61046927SAndroid Build Coastguard Worker
2341*61046927SAndroid Build Coastguard Worker case nir_op_ige8:
2342*61046927SAndroid Build Coastguard Worker case nir_op_ige16:
2343*61046927SAndroid Build Coastguard Worker case nir_op_ige32:
2344*61046927SAndroid Build Coastguard Worker case nir_op_fge16:
2345*61046927SAndroid Build Coastguard Worker case nir_op_fge32:
2346*61046927SAndroid Build Coastguard Worker case nir_op_uge8:
2347*61046927SAndroid Build Coastguard Worker case nir_op_uge16:
2348*61046927SAndroid Build Coastguard Worker case nir_op_uge32:
2349*61046927SAndroid Build Coastguard Worker return BI_CMPF_GE;
2350*61046927SAndroid Build Coastguard Worker
2351*61046927SAndroid Build Coastguard Worker default:
2352*61046927SAndroid Build Coastguard Worker unreachable("invalid comparison");
2353*61046927SAndroid Build Coastguard Worker }
2354*61046927SAndroid Build Coastguard Worker }
2355*61046927SAndroid Build Coastguard Worker
2356*61046927SAndroid Build Coastguard Worker static bool
bi_nir_is_replicated(nir_alu_src * src)2357*61046927SAndroid Build Coastguard Worker bi_nir_is_replicated(nir_alu_src *src)
2358*61046927SAndroid Build Coastguard Worker {
2359*61046927SAndroid Build Coastguard Worker for (unsigned i = 1; i < nir_src_num_components(src->src); ++i) {
2360*61046927SAndroid Build Coastguard Worker if (src->swizzle[0] == src->swizzle[i])
2361*61046927SAndroid Build Coastguard Worker return false;
2362*61046927SAndroid Build Coastguard Worker }
2363*61046927SAndroid Build Coastguard Worker
2364*61046927SAndroid Build Coastguard Worker return true;
2365*61046927SAndroid Build Coastguard Worker }
2366*61046927SAndroid Build Coastguard Worker
2367*61046927SAndroid Build Coastguard Worker static void
bi_emit_alu(bi_builder * b,nir_alu_instr * instr)2368*61046927SAndroid Build Coastguard Worker bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
2369*61046927SAndroid Build Coastguard Worker {
2370*61046927SAndroid Build Coastguard Worker bi_index dst = bi_def_index(&instr->def);
2371*61046927SAndroid Build Coastguard Worker unsigned srcs = nir_op_infos[instr->op].num_inputs;
2372*61046927SAndroid Build Coastguard Worker unsigned sz = instr->def.bit_size;
2373*61046927SAndroid Build Coastguard Worker unsigned comps = instr->def.num_components;
2374*61046927SAndroid Build Coastguard Worker unsigned src_sz = srcs > 0 ? nir_src_bit_size(instr->src[0].src) : 0;
2375*61046927SAndroid Build Coastguard Worker
2376*61046927SAndroid Build Coastguard Worker /* Indicate scalarness */
2377*61046927SAndroid Build Coastguard Worker if (sz == 16 && comps == 1)
2378*61046927SAndroid Build Coastguard Worker dst.swizzle = BI_SWIZZLE_H00;
2379*61046927SAndroid Build Coastguard Worker
2380*61046927SAndroid Build Coastguard Worker /* First, match against the various moves in NIR. These are
2381*61046927SAndroid Build Coastguard Worker * special-cased because they can operate on vectors even after
2382*61046927SAndroid Build Coastguard Worker * lowering ALU to scalar. For Bifrost, bi_alu_src_index assumes the
2383*61046927SAndroid Build Coastguard Worker * instruction is no "bigger" than SIMD-within-a-register. These moves
2384*61046927SAndroid Build Coastguard Worker * are the exceptions that need to handle swizzles specially. */
2385*61046927SAndroid Build Coastguard Worker
2386*61046927SAndroid Build Coastguard Worker switch (instr->op) {
2387*61046927SAndroid Build Coastguard Worker case nir_op_vec2:
2388*61046927SAndroid Build Coastguard Worker case nir_op_vec3:
2389*61046927SAndroid Build Coastguard Worker case nir_op_vec4:
2390*61046927SAndroid Build Coastguard Worker case nir_op_vec8:
2391*61046927SAndroid Build Coastguard Worker case nir_op_vec16: {
2392*61046927SAndroid Build Coastguard Worker bi_index unoffset_srcs[16] = {bi_null()};
2393*61046927SAndroid Build Coastguard Worker unsigned channels[16] = {0};
2394*61046927SAndroid Build Coastguard Worker
2395*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < srcs; ++i) {
2396*61046927SAndroid Build Coastguard Worker unoffset_srcs[i] = bi_src_index(&instr->src[i].src);
2397*61046927SAndroid Build Coastguard Worker channels[i] = instr->src[i].swizzle[0];
2398*61046927SAndroid Build Coastguard Worker }
2399*61046927SAndroid Build Coastguard Worker
2400*61046927SAndroid Build Coastguard Worker bi_make_vec_to(b, dst, unoffset_srcs, channels, srcs, sz);
2401*61046927SAndroid Build Coastguard Worker return;
2402*61046927SAndroid Build Coastguard Worker }
2403*61046927SAndroid Build Coastguard Worker
2404*61046927SAndroid Build Coastguard Worker case nir_op_unpack_32_2x16: {
2405*61046927SAndroid Build Coastguard Worker /* Should have been scalarized */
2406*61046927SAndroid Build Coastguard Worker assert(comps == 2 && sz == 16);
2407*61046927SAndroid Build Coastguard Worker
2408*61046927SAndroid Build Coastguard Worker bi_index vec = bi_src_index(&instr->src[0].src);
2409*61046927SAndroid Build Coastguard Worker unsigned chan = instr->src[0].swizzle[0];
2410*61046927SAndroid Build Coastguard Worker
2411*61046927SAndroid Build Coastguard Worker bi_mov_i32_to(b, dst, bi_extract(b, vec, chan));
2412*61046927SAndroid Build Coastguard Worker return;
2413*61046927SAndroid Build Coastguard Worker }
2414*61046927SAndroid Build Coastguard Worker
2415*61046927SAndroid Build Coastguard Worker case nir_op_unpack_64_2x32_split_x: {
2416*61046927SAndroid Build Coastguard Worker unsigned chan = (instr->src[0].swizzle[0] * 2) + 0;
2417*61046927SAndroid Build Coastguard Worker bi_mov_i32_to(b, dst,
2418*61046927SAndroid Build Coastguard Worker bi_extract(b, bi_src_index(&instr->src[0].src), chan));
2419*61046927SAndroid Build Coastguard Worker return;
2420*61046927SAndroid Build Coastguard Worker }
2421*61046927SAndroid Build Coastguard Worker
2422*61046927SAndroid Build Coastguard Worker case nir_op_unpack_64_2x32_split_y: {
2423*61046927SAndroid Build Coastguard Worker unsigned chan = (instr->src[0].swizzle[0] * 2) + 1;
2424*61046927SAndroid Build Coastguard Worker bi_mov_i32_to(b, dst,
2425*61046927SAndroid Build Coastguard Worker bi_extract(b, bi_src_index(&instr->src[0].src), chan));
2426*61046927SAndroid Build Coastguard Worker return;
2427*61046927SAndroid Build Coastguard Worker }
2428*61046927SAndroid Build Coastguard Worker
2429*61046927SAndroid Build Coastguard Worker case nir_op_pack_64_2x32_split:
2430*61046927SAndroid Build Coastguard Worker bi_collect_v2i32_to(b, dst,
2431*61046927SAndroid Build Coastguard Worker bi_extract(b, bi_src_index(&instr->src[0].src),
2432*61046927SAndroid Build Coastguard Worker instr->src[0].swizzle[0]),
2433*61046927SAndroid Build Coastguard Worker bi_extract(b, bi_src_index(&instr->src[1].src),
2434*61046927SAndroid Build Coastguard Worker instr->src[1].swizzle[0]));
2435*61046927SAndroid Build Coastguard Worker return;
2436*61046927SAndroid Build Coastguard Worker
2437*61046927SAndroid Build Coastguard Worker case nir_op_pack_64_2x32:
2438*61046927SAndroid Build Coastguard Worker bi_collect_v2i32_to(b, dst,
2439*61046927SAndroid Build Coastguard Worker bi_extract(b, bi_src_index(&instr->src[0].src),
2440*61046927SAndroid Build Coastguard Worker instr->src[0].swizzle[0]),
2441*61046927SAndroid Build Coastguard Worker bi_extract(b, bi_src_index(&instr->src[0].src),
2442*61046927SAndroid Build Coastguard Worker instr->src[0].swizzle[1]));
2443*61046927SAndroid Build Coastguard Worker return;
2444*61046927SAndroid Build Coastguard Worker
2445*61046927SAndroid Build Coastguard Worker case nir_op_pack_uvec2_to_uint: {
2446*61046927SAndroid Build Coastguard Worker bi_index src = bi_src_index(&instr->src[0].src);
2447*61046927SAndroid Build Coastguard Worker
2448*61046927SAndroid Build Coastguard Worker assert(sz == 32 && src_sz == 32);
2449*61046927SAndroid Build Coastguard Worker bi_mkvec_v2i16_to(
2450*61046927SAndroid Build Coastguard Worker b, dst, bi_half(bi_extract(b, src, instr->src[0].swizzle[0]), false),
2451*61046927SAndroid Build Coastguard Worker bi_half(bi_extract(b, src, instr->src[0].swizzle[1]), false));
2452*61046927SAndroid Build Coastguard Worker return;
2453*61046927SAndroid Build Coastguard Worker }
2454*61046927SAndroid Build Coastguard Worker
2455*61046927SAndroid Build Coastguard Worker case nir_op_pack_uvec4_to_uint: {
2456*61046927SAndroid Build Coastguard Worker bi_index src = bi_src_index(&instr->src[0].src);
2457*61046927SAndroid Build Coastguard Worker
2458*61046927SAndroid Build Coastguard Worker assert(sz == 32 && src_sz == 32);
2459*61046927SAndroid Build Coastguard Worker bi_mkvec_v4i8_to(
2460*61046927SAndroid Build Coastguard Worker b, dst, bi_byte(bi_extract(b, src, instr->src[0].swizzle[0]), 0),
2461*61046927SAndroid Build Coastguard Worker bi_byte(bi_extract(b, src, instr->src[0].swizzle[1]), 0),
2462*61046927SAndroid Build Coastguard Worker bi_byte(bi_extract(b, src, instr->src[0].swizzle[2]), 0),
2463*61046927SAndroid Build Coastguard Worker bi_byte(bi_extract(b, src, instr->src[0].swizzle[3]), 0));
2464*61046927SAndroid Build Coastguard Worker return;
2465*61046927SAndroid Build Coastguard Worker }
2466*61046927SAndroid Build Coastguard Worker
2467*61046927SAndroid Build Coastguard Worker case nir_op_mov: {
2468*61046927SAndroid Build Coastguard Worker bi_index idx = bi_src_index(&instr->src[0].src);
2469*61046927SAndroid Build Coastguard Worker bi_index unoffset_srcs[4] = {idx, idx, idx, idx};
2470*61046927SAndroid Build Coastguard Worker
2471*61046927SAndroid Build Coastguard Worker unsigned channels[4] = {
2472*61046927SAndroid Build Coastguard Worker comps > 0 ? instr->src[0].swizzle[0] : 0,
2473*61046927SAndroid Build Coastguard Worker comps > 1 ? instr->src[0].swizzle[1] : 0,
2474*61046927SAndroid Build Coastguard Worker comps > 2 ? instr->src[0].swizzle[2] : 0,
2475*61046927SAndroid Build Coastguard Worker comps > 3 ? instr->src[0].swizzle[3] : 0,
2476*61046927SAndroid Build Coastguard Worker };
2477*61046927SAndroid Build Coastguard Worker
2478*61046927SAndroid Build Coastguard Worker bi_make_vec_to(b, dst, unoffset_srcs, channels, comps, src_sz);
2479*61046927SAndroid Build Coastguard Worker return;
2480*61046927SAndroid Build Coastguard Worker }
2481*61046927SAndroid Build Coastguard Worker
2482*61046927SAndroid Build Coastguard Worker case nir_op_pack_32_2x16: {
2483*61046927SAndroid Build Coastguard Worker assert(comps == 1);
2484*61046927SAndroid Build Coastguard Worker
2485*61046927SAndroid Build Coastguard Worker bi_index idx = bi_src_index(&instr->src[0].src);
2486*61046927SAndroid Build Coastguard Worker bi_index unoffset_srcs[4] = {idx, idx, idx, idx};
2487*61046927SAndroid Build Coastguard Worker
2488*61046927SAndroid Build Coastguard Worker unsigned channels[2] = {instr->src[0].swizzle[0],
2489*61046927SAndroid Build Coastguard Worker instr->src[0].swizzle[1]};
2490*61046927SAndroid Build Coastguard Worker
2491*61046927SAndroid Build Coastguard Worker bi_make_vec_to(b, dst, unoffset_srcs, channels, 2, 16);
2492*61046927SAndroid Build Coastguard Worker return;
2493*61046927SAndroid Build Coastguard Worker }
2494*61046927SAndroid Build Coastguard Worker
2495*61046927SAndroid Build Coastguard Worker case nir_op_f2f16:
2496*61046927SAndroid Build Coastguard Worker case nir_op_f2f16_rtz:
2497*61046927SAndroid Build Coastguard Worker case nir_op_f2f16_rtne: {
2498*61046927SAndroid Build Coastguard Worker assert(src_sz == 32);
2499*61046927SAndroid Build Coastguard Worker bi_index idx = bi_src_index(&instr->src[0].src);
2500*61046927SAndroid Build Coastguard Worker bi_index s0 = bi_extract(b, idx, instr->src[0].swizzle[0]);
2501*61046927SAndroid Build Coastguard Worker bi_index s1 =
2502*61046927SAndroid Build Coastguard Worker comps > 1 ? bi_extract(b, idx, instr->src[0].swizzle[1]) : s0;
2503*61046927SAndroid Build Coastguard Worker
2504*61046927SAndroid Build Coastguard Worker bi_instr *I = bi_v2f32_to_v2f16_to(b, dst, s0, s1);
2505*61046927SAndroid Build Coastguard Worker
2506*61046927SAndroid Build Coastguard Worker /* Override rounding if explicitly requested. Otherwise, the
2507*61046927SAndroid Build Coastguard Worker * default rounding mode is selected by the builder. Depending
2508*61046927SAndroid Build Coastguard Worker * on the float controls required by the shader, the default
2509*61046927SAndroid Build Coastguard Worker * mode may not be nearest-even.
2510*61046927SAndroid Build Coastguard Worker */
2511*61046927SAndroid Build Coastguard Worker if (instr->op == nir_op_f2f16_rtz)
2512*61046927SAndroid Build Coastguard Worker I->round = BI_ROUND_RTZ;
2513*61046927SAndroid Build Coastguard Worker else if (instr->op == nir_op_f2f16_rtne)
2514*61046927SAndroid Build Coastguard Worker I->round = BI_ROUND_NONE; /* Nearest even */
2515*61046927SAndroid Build Coastguard Worker
2516*61046927SAndroid Build Coastguard Worker return;
2517*61046927SAndroid Build Coastguard Worker }
2518*61046927SAndroid Build Coastguard Worker
2519*61046927SAndroid Build Coastguard Worker /* Vectorized downcasts */
2520*61046927SAndroid Build Coastguard Worker case nir_op_u2u16:
2521*61046927SAndroid Build Coastguard Worker case nir_op_i2i16: {
2522*61046927SAndroid Build Coastguard Worker if (!(src_sz == 32 && comps == 2))
2523*61046927SAndroid Build Coastguard Worker break;
2524*61046927SAndroid Build Coastguard Worker
2525*61046927SAndroid Build Coastguard Worker bi_index idx = bi_src_index(&instr->src[0].src);
2526*61046927SAndroid Build Coastguard Worker bi_index s0 = bi_extract(b, idx, instr->src[0].swizzle[0]);
2527*61046927SAndroid Build Coastguard Worker bi_index s1 = bi_extract(b, idx, instr->src[0].swizzle[1]);
2528*61046927SAndroid Build Coastguard Worker
2529*61046927SAndroid Build Coastguard Worker bi_mkvec_v2i16_to(b, dst, bi_half(s0, false), bi_half(s1, false));
2530*61046927SAndroid Build Coastguard Worker return;
2531*61046927SAndroid Build Coastguard Worker }
2532*61046927SAndroid Build Coastguard Worker
2533*61046927SAndroid Build Coastguard Worker /* While we do not have a direct V2U32_TO_V2F16 instruction, lowering to
2534*61046927SAndroid Build Coastguard Worker * MKVEC.v2i16 + V2U16_TO_V2F16 is more efficient on Bifrost than
2535*61046927SAndroid Build Coastguard Worker * scalarizing due to scheduling (equal cost on Valhall). Additionally
2536*61046927SAndroid Build Coastguard Worker * if the source is replicated the MKVEC.v2i16 can be optimized out.
2537*61046927SAndroid Build Coastguard Worker */
2538*61046927SAndroid Build Coastguard Worker case nir_op_u2f16:
2539*61046927SAndroid Build Coastguard Worker case nir_op_i2f16: {
2540*61046927SAndroid Build Coastguard Worker if (!(src_sz == 32 && comps == 2))
2541*61046927SAndroid Build Coastguard Worker break;
2542*61046927SAndroid Build Coastguard Worker
2543*61046927SAndroid Build Coastguard Worker nir_alu_src *src = &instr->src[0];
2544*61046927SAndroid Build Coastguard Worker bi_index idx = bi_src_index(&src->src);
2545*61046927SAndroid Build Coastguard Worker bi_index s0 = bi_extract(b, idx, src->swizzle[0]);
2546*61046927SAndroid Build Coastguard Worker bi_index s1 = bi_extract(b, idx, src->swizzle[1]);
2547*61046927SAndroid Build Coastguard Worker
2548*61046927SAndroid Build Coastguard Worker bi_index t =
2549*61046927SAndroid Build Coastguard Worker (src->swizzle[0] == src->swizzle[1])
2550*61046927SAndroid Build Coastguard Worker ? bi_half(s0, false)
2551*61046927SAndroid Build Coastguard Worker : bi_mkvec_v2i16(b, bi_half(s0, false), bi_half(s1, false));
2552*61046927SAndroid Build Coastguard Worker
2553*61046927SAndroid Build Coastguard Worker if (instr->op == nir_op_u2f16)
2554*61046927SAndroid Build Coastguard Worker bi_v2u16_to_v2f16_to(b, dst, t);
2555*61046927SAndroid Build Coastguard Worker else
2556*61046927SAndroid Build Coastguard Worker bi_v2s16_to_v2f16_to(b, dst, t);
2557*61046927SAndroid Build Coastguard Worker
2558*61046927SAndroid Build Coastguard Worker return;
2559*61046927SAndroid Build Coastguard Worker }
2560*61046927SAndroid Build Coastguard Worker
2561*61046927SAndroid Build Coastguard Worker case nir_op_i2i8:
2562*61046927SAndroid Build Coastguard Worker case nir_op_u2u8: {
2563*61046927SAndroid Build Coastguard Worker /* Acts like an 8-bit swizzle */
2564*61046927SAndroid Build Coastguard Worker bi_index idx = bi_src_index(&instr->src[0].src);
2565*61046927SAndroid Build Coastguard Worker unsigned factor = src_sz / 8;
2566*61046927SAndroid Build Coastguard Worker unsigned chan[4] = {0};
2567*61046927SAndroid Build Coastguard Worker
2568*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < comps; ++i)
2569*61046927SAndroid Build Coastguard Worker chan[i] = instr->src[0].swizzle[i] * factor;
2570*61046927SAndroid Build Coastguard Worker
2571*61046927SAndroid Build Coastguard Worker bi_make_vec_to(b, dst, &idx, chan, comps, 8);
2572*61046927SAndroid Build Coastguard Worker return;
2573*61046927SAndroid Build Coastguard Worker }
2574*61046927SAndroid Build Coastguard Worker
2575*61046927SAndroid Build Coastguard Worker case nir_op_b32csel: {
2576*61046927SAndroid Build Coastguard Worker if (sz != 16)
2577*61046927SAndroid Build Coastguard Worker break;
2578*61046927SAndroid Build Coastguard Worker
2579*61046927SAndroid Build Coastguard Worker /* We allow vectorizing b32csel(cond, A, B) which can be
2580*61046927SAndroid Build Coastguard Worker * translated as MUX.v2i16, even though cond is a 32-bit vector.
2581*61046927SAndroid Build Coastguard Worker *
2582*61046927SAndroid Build Coastguard Worker * If the source condition vector is replicated, we can use
2583*61046927SAndroid Build Coastguard Worker * MUX.v2i16 directly, letting each component use the
2584*61046927SAndroid Build Coastguard Worker * corresponding half of the 32-bit source. NIR uses 0/~0
2585*61046927SAndroid Build Coastguard Worker * booleans so that's guaranteed to work (that is, 32-bit NIR
2586*61046927SAndroid Build Coastguard Worker * booleans are 16-bit replicated).
2587*61046927SAndroid Build Coastguard Worker *
2588*61046927SAndroid Build Coastguard Worker * If we're not replicated, we use the same trick but must
2589*61046927SAndroid Build Coastguard Worker * insert a MKVEC.v2i16 first to convert down to 16-bit.
2590*61046927SAndroid Build Coastguard Worker */
2591*61046927SAndroid Build Coastguard Worker bi_index idx = bi_src_index(&instr->src[0].src);
2592*61046927SAndroid Build Coastguard Worker bi_index s0 = bi_extract(b, idx, instr->src[0].swizzle[0]);
2593*61046927SAndroid Build Coastguard Worker bi_index s1 = bi_alu_src_index(b, instr->src[1], comps);
2594*61046927SAndroid Build Coastguard Worker bi_index s2 = bi_alu_src_index(b, instr->src[2], comps);
2595*61046927SAndroid Build Coastguard Worker
2596*61046927SAndroid Build Coastguard Worker if (!bi_nir_is_replicated(&instr->src[0])) {
2597*61046927SAndroid Build Coastguard Worker s0 = bi_mkvec_v2i16(
2598*61046927SAndroid Build Coastguard Worker b, bi_half(s0, false),
2599*61046927SAndroid Build Coastguard Worker bi_half(bi_extract(b, idx, instr->src[0].swizzle[1]), false));
2600*61046927SAndroid Build Coastguard Worker }
2601*61046927SAndroid Build Coastguard Worker
2602*61046927SAndroid Build Coastguard Worker bi_mux_v2i16_to(b, dst, s2, s1, s0, BI_MUX_INT_ZERO);
2603*61046927SAndroid Build Coastguard Worker return;
2604*61046927SAndroid Build Coastguard Worker }
2605*61046927SAndroid Build Coastguard Worker
2606*61046927SAndroid Build Coastguard Worker default:
2607*61046927SAndroid Build Coastguard Worker break;
2608*61046927SAndroid Build Coastguard Worker }
2609*61046927SAndroid Build Coastguard Worker
2610*61046927SAndroid Build Coastguard Worker bi_index s0 =
2611*61046927SAndroid Build Coastguard Worker srcs > 0 ? bi_alu_src_index(b, instr->src[0], comps) : bi_null();
2612*61046927SAndroid Build Coastguard Worker bi_index s1 =
2613*61046927SAndroid Build Coastguard Worker srcs > 1 ? bi_alu_src_index(b, instr->src[1], comps) : bi_null();
2614*61046927SAndroid Build Coastguard Worker bi_index s2 =
2615*61046927SAndroid Build Coastguard Worker srcs > 2 ? bi_alu_src_index(b, instr->src[2], comps) : bi_null();
2616*61046927SAndroid Build Coastguard Worker
2617*61046927SAndroid Build Coastguard Worker switch (instr->op) {
2618*61046927SAndroid Build Coastguard Worker case nir_op_ffma:
2619*61046927SAndroid Build Coastguard Worker bi_fma_to(b, sz, dst, s0, s1, s2);
2620*61046927SAndroid Build Coastguard Worker break;
2621*61046927SAndroid Build Coastguard Worker
2622*61046927SAndroid Build Coastguard Worker case nir_op_fmul:
2623*61046927SAndroid Build Coastguard Worker bi_fma_to(b, sz, dst, s0, s1, bi_negzero());
2624*61046927SAndroid Build Coastguard Worker break;
2625*61046927SAndroid Build Coastguard Worker
2626*61046927SAndroid Build Coastguard Worker case nir_op_fadd:
2627*61046927SAndroid Build Coastguard Worker bi_fadd_to(b, sz, dst, s0, s1);
2628*61046927SAndroid Build Coastguard Worker break;
2629*61046927SAndroid Build Coastguard Worker
2630*61046927SAndroid Build Coastguard Worker case nir_op_fsat: {
2631*61046927SAndroid Build Coastguard Worker bi_instr *I = bi_fclamp_to(b, sz, dst, s0);
2632*61046927SAndroid Build Coastguard Worker I->clamp = BI_CLAMP_CLAMP_0_1;
2633*61046927SAndroid Build Coastguard Worker break;
2634*61046927SAndroid Build Coastguard Worker }
2635*61046927SAndroid Build Coastguard Worker
2636*61046927SAndroid Build Coastguard Worker case nir_op_fsat_signed_mali: {
2637*61046927SAndroid Build Coastguard Worker bi_instr *I = bi_fclamp_to(b, sz, dst, s0);
2638*61046927SAndroid Build Coastguard Worker I->clamp = BI_CLAMP_CLAMP_M1_1;
2639*61046927SAndroid Build Coastguard Worker break;
2640*61046927SAndroid Build Coastguard Worker }
2641*61046927SAndroid Build Coastguard Worker
2642*61046927SAndroid Build Coastguard Worker case nir_op_fclamp_pos_mali: {
2643*61046927SAndroid Build Coastguard Worker bi_instr *I = bi_fclamp_to(b, sz, dst, s0);
2644*61046927SAndroid Build Coastguard Worker I->clamp = BI_CLAMP_CLAMP_0_INF;
2645*61046927SAndroid Build Coastguard Worker break;
2646*61046927SAndroid Build Coastguard Worker }
2647*61046927SAndroid Build Coastguard Worker
2648*61046927SAndroid Build Coastguard Worker case nir_op_fneg:
2649*61046927SAndroid Build Coastguard Worker bi_fabsneg_to(b, sz, dst, bi_neg(s0));
2650*61046927SAndroid Build Coastguard Worker break;
2651*61046927SAndroid Build Coastguard Worker
2652*61046927SAndroid Build Coastguard Worker case nir_op_fabs:
2653*61046927SAndroid Build Coastguard Worker bi_fabsneg_to(b, sz, dst, bi_abs(s0));
2654*61046927SAndroid Build Coastguard Worker break;
2655*61046927SAndroid Build Coastguard Worker
2656*61046927SAndroid Build Coastguard Worker case nir_op_fsin:
2657*61046927SAndroid Build Coastguard Worker bi_lower_fsincos_32(b, dst, s0, false);
2658*61046927SAndroid Build Coastguard Worker break;
2659*61046927SAndroid Build Coastguard Worker
2660*61046927SAndroid Build Coastguard Worker case nir_op_fcos:
2661*61046927SAndroid Build Coastguard Worker bi_lower_fsincos_32(b, dst, s0, true);
2662*61046927SAndroid Build Coastguard Worker break;
2663*61046927SAndroid Build Coastguard Worker
2664*61046927SAndroid Build Coastguard Worker case nir_op_fexp2:
2665*61046927SAndroid Build Coastguard Worker assert(sz == 32); /* should've been lowered */
2666*61046927SAndroid Build Coastguard Worker
2667*61046927SAndroid Build Coastguard Worker if (b->shader->quirks & BIFROST_NO_FP32_TRANSCENDENTALS)
2668*61046927SAndroid Build Coastguard Worker bi_lower_fexp2_32(b, dst, s0);
2669*61046927SAndroid Build Coastguard Worker else
2670*61046927SAndroid Build Coastguard Worker bi_fexp_32(b, dst, s0, bi_imm_f32(1.0f));
2671*61046927SAndroid Build Coastguard Worker
2672*61046927SAndroid Build Coastguard Worker break;
2673*61046927SAndroid Build Coastguard Worker
2674*61046927SAndroid Build Coastguard Worker case nir_op_flog2:
2675*61046927SAndroid Build Coastguard Worker assert(sz == 32); /* should've been lowered */
2676*61046927SAndroid Build Coastguard Worker
2677*61046927SAndroid Build Coastguard Worker if (b->shader->quirks & BIFROST_NO_FP32_TRANSCENDENTALS)
2678*61046927SAndroid Build Coastguard Worker bi_lower_flog2_32(b, dst, s0);
2679*61046927SAndroid Build Coastguard Worker else
2680*61046927SAndroid Build Coastguard Worker bi_flog2_32(b, dst, s0);
2681*61046927SAndroid Build Coastguard Worker
2682*61046927SAndroid Build Coastguard Worker break;
2683*61046927SAndroid Build Coastguard Worker
2684*61046927SAndroid Build Coastguard Worker case nir_op_fpow:
2685*61046927SAndroid Build Coastguard Worker assert(sz == 32); /* should've been lowered */
2686*61046927SAndroid Build Coastguard Worker
2687*61046927SAndroid Build Coastguard Worker if (b->shader->quirks & BIFROST_NO_FP32_TRANSCENDENTALS)
2688*61046927SAndroid Build Coastguard Worker bi_lower_fpow_32(b, dst, s0, s1);
2689*61046927SAndroid Build Coastguard Worker else
2690*61046927SAndroid Build Coastguard Worker bi_fpow_32(b, dst, s0, s1);
2691*61046927SAndroid Build Coastguard Worker
2692*61046927SAndroid Build Coastguard Worker break;
2693*61046927SAndroid Build Coastguard Worker
2694*61046927SAndroid Build Coastguard Worker case nir_op_frexp_exp:
2695*61046927SAndroid Build Coastguard Worker bi_frexpe_to(b, sz, dst, s0, false, false);
2696*61046927SAndroid Build Coastguard Worker break;
2697*61046927SAndroid Build Coastguard Worker
2698*61046927SAndroid Build Coastguard Worker case nir_op_frexp_sig:
2699*61046927SAndroid Build Coastguard Worker bi_frexpm_to(b, sz, dst, s0, false, false);
2700*61046927SAndroid Build Coastguard Worker break;
2701*61046927SAndroid Build Coastguard Worker
2702*61046927SAndroid Build Coastguard Worker case nir_op_ldexp:
2703*61046927SAndroid Build Coastguard Worker bi_ldexp_to(b, sz, dst, s0, s1);
2704*61046927SAndroid Build Coastguard Worker break;
2705*61046927SAndroid Build Coastguard Worker
2706*61046927SAndroid Build Coastguard Worker case nir_op_b8csel:
2707*61046927SAndroid Build Coastguard Worker bi_mux_v4i8_to(b, dst, s2, s1, s0, BI_MUX_INT_ZERO);
2708*61046927SAndroid Build Coastguard Worker break;
2709*61046927SAndroid Build Coastguard Worker
2710*61046927SAndroid Build Coastguard Worker case nir_op_b16csel:
2711*61046927SAndroid Build Coastguard Worker bi_mux_v2i16_to(b, dst, s2, s1, s0, BI_MUX_INT_ZERO);
2712*61046927SAndroid Build Coastguard Worker break;
2713*61046927SAndroid Build Coastguard Worker
2714*61046927SAndroid Build Coastguard Worker case nir_op_b32csel:
2715*61046927SAndroid Build Coastguard Worker bi_mux_i32_to(b, dst, s2, s1, s0, BI_MUX_INT_ZERO);
2716*61046927SAndroid Build Coastguard Worker break;
2717*61046927SAndroid Build Coastguard Worker
2718*61046927SAndroid Build Coastguard Worker case nir_op_extract_u8:
2719*61046927SAndroid Build Coastguard Worker case nir_op_extract_i8: {
2720*61046927SAndroid Build Coastguard Worker assert(comps == 1 && "should be scalarized");
2721*61046927SAndroid Build Coastguard Worker assert((src_sz == 16 || src_sz == 32) && "should be lowered");
2722*61046927SAndroid Build Coastguard Worker unsigned byte = nir_alu_src_as_uint(instr->src[1]);
2723*61046927SAndroid Build Coastguard Worker
2724*61046927SAndroid Build Coastguard Worker if (s0.swizzle == BI_SWIZZLE_H11) {
2725*61046927SAndroid Build Coastguard Worker assert(byte < 2);
2726*61046927SAndroid Build Coastguard Worker byte += 2;
2727*61046927SAndroid Build Coastguard Worker } else if (s0.swizzle != BI_SWIZZLE_H01) {
2728*61046927SAndroid Build Coastguard Worker assert(s0.swizzle == BI_SWIZZLE_H00);
2729*61046927SAndroid Build Coastguard Worker }
2730*61046927SAndroid Build Coastguard Worker
2731*61046927SAndroid Build Coastguard Worker assert(byte < 4);
2732*61046927SAndroid Build Coastguard Worker
2733*61046927SAndroid Build Coastguard Worker s0.swizzle = BI_SWIZZLE_H01;
2734*61046927SAndroid Build Coastguard Worker
2735*61046927SAndroid Build Coastguard Worker if (instr->op == nir_op_extract_i8)
2736*61046927SAndroid Build Coastguard Worker bi_s8_to_s32_to(b, dst, bi_byte(s0, byte));
2737*61046927SAndroid Build Coastguard Worker else
2738*61046927SAndroid Build Coastguard Worker bi_u8_to_u32_to(b, dst, bi_byte(s0, byte));
2739*61046927SAndroid Build Coastguard Worker break;
2740*61046927SAndroid Build Coastguard Worker }
2741*61046927SAndroid Build Coastguard Worker
2742*61046927SAndroid Build Coastguard Worker case nir_op_extract_u16:
2743*61046927SAndroid Build Coastguard Worker case nir_op_extract_i16: {
2744*61046927SAndroid Build Coastguard Worker assert(comps == 1 && "should be scalarized");
2745*61046927SAndroid Build Coastguard Worker assert(src_sz == 32 && "should be lowered");
2746*61046927SAndroid Build Coastguard Worker unsigned half = nir_alu_src_as_uint(instr->src[1]);
2747*61046927SAndroid Build Coastguard Worker assert(half == 0 || half == 1);
2748*61046927SAndroid Build Coastguard Worker
2749*61046927SAndroid Build Coastguard Worker if (instr->op == nir_op_extract_i16)
2750*61046927SAndroid Build Coastguard Worker bi_s16_to_s32_to(b, dst, bi_half(s0, half));
2751*61046927SAndroid Build Coastguard Worker else
2752*61046927SAndroid Build Coastguard Worker bi_u16_to_u32_to(b, dst, bi_half(s0, half));
2753*61046927SAndroid Build Coastguard Worker break;
2754*61046927SAndroid Build Coastguard Worker }
2755*61046927SAndroid Build Coastguard Worker
2756*61046927SAndroid Build Coastguard Worker case nir_op_insert_u16: {
2757*61046927SAndroid Build Coastguard Worker assert(comps == 1 && "should be scalarized");
2758*61046927SAndroid Build Coastguard Worker unsigned half = nir_alu_src_as_uint(instr->src[1]);
2759*61046927SAndroid Build Coastguard Worker assert(half == 0 || half == 1);
2760*61046927SAndroid Build Coastguard Worker
2761*61046927SAndroid Build Coastguard Worker if (half == 0)
2762*61046927SAndroid Build Coastguard Worker bi_u16_to_u32_to(b, dst, bi_half(s0, 0));
2763*61046927SAndroid Build Coastguard Worker else
2764*61046927SAndroid Build Coastguard Worker bi_mkvec_v2i16_to(b, dst, bi_imm_u16(0), bi_half(s0, 0));
2765*61046927SAndroid Build Coastguard Worker break;
2766*61046927SAndroid Build Coastguard Worker }
2767*61046927SAndroid Build Coastguard Worker
2768*61046927SAndroid Build Coastguard Worker case nir_op_ishl:
2769*61046927SAndroid Build Coastguard Worker bi_lshift_or_to(b, sz, dst, s0, bi_zero(), bi_byte(s1, 0));
2770*61046927SAndroid Build Coastguard Worker break;
2771*61046927SAndroid Build Coastguard Worker case nir_op_ushr:
2772*61046927SAndroid Build Coastguard Worker bi_rshift_or_to(b, sz, dst, s0, bi_zero(), bi_byte(s1, 0), false);
2773*61046927SAndroid Build Coastguard Worker break;
2774*61046927SAndroid Build Coastguard Worker
2775*61046927SAndroid Build Coastguard Worker case nir_op_ishr:
2776*61046927SAndroid Build Coastguard Worker if (b->shader->arch >= 9)
2777*61046927SAndroid Build Coastguard Worker bi_rshift_or_to(b, sz, dst, s0, bi_zero(), bi_byte(s1, 0), true);
2778*61046927SAndroid Build Coastguard Worker else
2779*61046927SAndroid Build Coastguard Worker bi_arshift_to(b, sz, dst, s0, bi_null(), bi_byte(s1, 0));
2780*61046927SAndroid Build Coastguard Worker break;
2781*61046927SAndroid Build Coastguard Worker
2782*61046927SAndroid Build Coastguard Worker case nir_op_imin:
2783*61046927SAndroid Build Coastguard Worker case nir_op_umin:
2784*61046927SAndroid Build Coastguard Worker bi_csel_to(b, nir_op_infos[instr->op].input_types[0], sz, dst, s0, s1, s0,
2785*61046927SAndroid Build Coastguard Worker s1, BI_CMPF_LT);
2786*61046927SAndroid Build Coastguard Worker break;
2787*61046927SAndroid Build Coastguard Worker
2788*61046927SAndroid Build Coastguard Worker case nir_op_imax:
2789*61046927SAndroid Build Coastguard Worker case nir_op_umax:
2790*61046927SAndroid Build Coastguard Worker bi_csel_to(b, nir_op_infos[instr->op].input_types[0], sz, dst, s0, s1, s0,
2791*61046927SAndroid Build Coastguard Worker s1, BI_CMPF_GT);
2792*61046927SAndroid Build Coastguard Worker break;
2793*61046927SAndroid Build Coastguard Worker
2794*61046927SAndroid Build Coastguard Worker case nir_op_f2f32:
2795*61046927SAndroid Build Coastguard Worker bi_f16_to_f32_to(b, dst, s0);
2796*61046927SAndroid Build Coastguard Worker break;
2797*61046927SAndroid Build Coastguard Worker
2798*61046927SAndroid Build Coastguard Worker case nir_op_fquantize2f16: {
2799*61046927SAndroid Build Coastguard Worker bi_instr *f16 = bi_v2f32_to_v2f16_to(b, bi_temp(b->shader), s0, s0);
2800*61046927SAndroid Build Coastguard Worker bi_instr *f32 = bi_f16_to_f32_to(b, dst, bi_half(f16->dest[0], false));
2801*61046927SAndroid Build Coastguard Worker
2802*61046927SAndroid Build Coastguard Worker f16->ftz = f32->ftz = true;
2803*61046927SAndroid Build Coastguard Worker break;
2804*61046927SAndroid Build Coastguard Worker }
2805*61046927SAndroid Build Coastguard Worker
2806*61046927SAndroid Build Coastguard Worker case nir_op_f2i32:
2807*61046927SAndroid Build Coastguard Worker if (src_sz == 32)
2808*61046927SAndroid Build Coastguard Worker bi_f32_to_s32_to(b, dst, s0);
2809*61046927SAndroid Build Coastguard Worker else
2810*61046927SAndroid Build Coastguard Worker bi_f16_to_s32_to(b, dst, s0);
2811*61046927SAndroid Build Coastguard Worker break;
2812*61046927SAndroid Build Coastguard Worker
2813*61046927SAndroid Build Coastguard Worker /* Note 32-bit sources => no vectorization, so 32-bit works */
2814*61046927SAndroid Build Coastguard Worker case nir_op_f2u16:
2815*61046927SAndroid Build Coastguard Worker if (src_sz == 32)
2816*61046927SAndroid Build Coastguard Worker bi_f32_to_u32_to(b, dst, s0);
2817*61046927SAndroid Build Coastguard Worker else
2818*61046927SAndroid Build Coastguard Worker bi_v2f16_to_v2u16_to(b, dst, s0);
2819*61046927SAndroid Build Coastguard Worker break;
2820*61046927SAndroid Build Coastguard Worker
2821*61046927SAndroid Build Coastguard Worker case nir_op_f2i16:
2822*61046927SAndroid Build Coastguard Worker if (src_sz == 32)
2823*61046927SAndroid Build Coastguard Worker bi_f32_to_s32_to(b, dst, s0);
2824*61046927SAndroid Build Coastguard Worker else
2825*61046927SAndroid Build Coastguard Worker bi_v2f16_to_v2s16_to(b, dst, s0);
2826*61046927SAndroid Build Coastguard Worker break;
2827*61046927SAndroid Build Coastguard Worker
2828*61046927SAndroid Build Coastguard Worker case nir_op_f2u32:
2829*61046927SAndroid Build Coastguard Worker if (src_sz == 32)
2830*61046927SAndroid Build Coastguard Worker bi_f32_to_u32_to(b, dst, s0);
2831*61046927SAndroid Build Coastguard Worker else
2832*61046927SAndroid Build Coastguard Worker bi_f16_to_u32_to(b, dst, s0);
2833*61046927SAndroid Build Coastguard Worker break;
2834*61046927SAndroid Build Coastguard Worker
2835*61046927SAndroid Build Coastguard Worker case nir_op_u2f16:
2836*61046927SAndroid Build Coastguard Worker if (src_sz == 32)
2837*61046927SAndroid Build Coastguard Worker bi_v2u16_to_v2f16_to(b, dst, bi_half(s0, false));
2838*61046927SAndroid Build Coastguard Worker else if (src_sz == 16)
2839*61046927SAndroid Build Coastguard Worker bi_v2u16_to_v2f16_to(b, dst, s0);
2840*61046927SAndroid Build Coastguard Worker else if (src_sz == 8)
2841*61046927SAndroid Build Coastguard Worker bi_v2u8_to_v2f16_to(b, dst, s0);
2842*61046927SAndroid Build Coastguard Worker break;
2843*61046927SAndroid Build Coastguard Worker
2844*61046927SAndroid Build Coastguard Worker case nir_op_u2f32:
2845*61046927SAndroid Build Coastguard Worker if (src_sz == 32)
2846*61046927SAndroid Build Coastguard Worker bi_u32_to_f32_to(b, dst, s0);
2847*61046927SAndroid Build Coastguard Worker else if (src_sz == 16)
2848*61046927SAndroid Build Coastguard Worker bi_u16_to_f32_to(b, dst, s0);
2849*61046927SAndroid Build Coastguard Worker else
2850*61046927SAndroid Build Coastguard Worker bi_u8_to_f32_to(b, dst, s0);
2851*61046927SAndroid Build Coastguard Worker break;
2852*61046927SAndroid Build Coastguard Worker
2853*61046927SAndroid Build Coastguard Worker case nir_op_i2f16:
2854*61046927SAndroid Build Coastguard Worker if (src_sz == 32)
2855*61046927SAndroid Build Coastguard Worker bi_v2s16_to_v2f16_to(b, dst, bi_half(s0, false));
2856*61046927SAndroid Build Coastguard Worker else if (src_sz == 16)
2857*61046927SAndroid Build Coastguard Worker bi_v2s16_to_v2f16_to(b, dst, s0);
2858*61046927SAndroid Build Coastguard Worker else if (src_sz == 8)
2859*61046927SAndroid Build Coastguard Worker bi_v2s8_to_v2f16_to(b, dst, s0);
2860*61046927SAndroid Build Coastguard Worker break;
2861*61046927SAndroid Build Coastguard Worker
2862*61046927SAndroid Build Coastguard Worker case nir_op_i2f32:
2863*61046927SAndroid Build Coastguard Worker assert(src_sz == 32 || src_sz == 16 || src_sz == 8);
2864*61046927SAndroid Build Coastguard Worker
2865*61046927SAndroid Build Coastguard Worker if (src_sz == 32)
2866*61046927SAndroid Build Coastguard Worker bi_s32_to_f32_to(b, dst, s0);
2867*61046927SAndroid Build Coastguard Worker else if (src_sz == 16)
2868*61046927SAndroid Build Coastguard Worker bi_s16_to_f32_to(b, dst, s0);
2869*61046927SAndroid Build Coastguard Worker else if (src_sz == 8)
2870*61046927SAndroid Build Coastguard Worker bi_s8_to_f32_to(b, dst, s0);
2871*61046927SAndroid Build Coastguard Worker break;
2872*61046927SAndroid Build Coastguard Worker
2873*61046927SAndroid Build Coastguard Worker case nir_op_i2i32:
2874*61046927SAndroid Build Coastguard Worker assert(src_sz == 32 || src_sz == 16 || src_sz == 8);
2875*61046927SAndroid Build Coastguard Worker
2876*61046927SAndroid Build Coastguard Worker if (src_sz == 32)
2877*61046927SAndroid Build Coastguard Worker bi_mov_i32_to(b, dst, s0);
2878*61046927SAndroid Build Coastguard Worker else if (src_sz == 16)
2879*61046927SAndroid Build Coastguard Worker bi_s16_to_s32_to(b, dst, s0);
2880*61046927SAndroid Build Coastguard Worker else if (src_sz == 8)
2881*61046927SAndroid Build Coastguard Worker bi_s8_to_s32_to(b, dst, s0);
2882*61046927SAndroid Build Coastguard Worker break;
2883*61046927SAndroid Build Coastguard Worker
2884*61046927SAndroid Build Coastguard Worker case nir_op_u2u32:
2885*61046927SAndroid Build Coastguard Worker assert(src_sz == 32 || src_sz == 16 || src_sz == 8);
2886*61046927SAndroid Build Coastguard Worker
2887*61046927SAndroid Build Coastguard Worker if (src_sz == 32)
2888*61046927SAndroid Build Coastguard Worker bi_mov_i32_to(b, dst, s0);
2889*61046927SAndroid Build Coastguard Worker else if (src_sz == 16)
2890*61046927SAndroid Build Coastguard Worker bi_u16_to_u32_to(b, dst, s0);
2891*61046927SAndroid Build Coastguard Worker else if (src_sz == 8)
2892*61046927SAndroid Build Coastguard Worker bi_u8_to_u32_to(b, dst, s0);
2893*61046927SAndroid Build Coastguard Worker
2894*61046927SAndroid Build Coastguard Worker break;
2895*61046927SAndroid Build Coastguard Worker
2896*61046927SAndroid Build Coastguard Worker case nir_op_i2i16:
2897*61046927SAndroid Build Coastguard Worker assert(src_sz == 8 || src_sz == 32);
2898*61046927SAndroid Build Coastguard Worker
2899*61046927SAndroid Build Coastguard Worker if (src_sz == 8)
2900*61046927SAndroid Build Coastguard Worker bi_v2s8_to_v2s16_to(b, dst, s0);
2901*61046927SAndroid Build Coastguard Worker else
2902*61046927SAndroid Build Coastguard Worker bi_mov_i32_to(b, dst, s0);
2903*61046927SAndroid Build Coastguard Worker break;
2904*61046927SAndroid Build Coastguard Worker
2905*61046927SAndroid Build Coastguard Worker case nir_op_u2u16:
2906*61046927SAndroid Build Coastguard Worker assert(src_sz == 8 || src_sz == 32);
2907*61046927SAndroid Build Coastguard Worker
2908*61046927SAndroid Build Coastguard Worker if (src_sz == 8)
2909*61046927SAndroid Build Coastguard Worker bi_v2u8_to_v2u16_to(b, dst, s0);
2910*61046927SAndroid Build Coastguard Worker else
2911*61046927SAndroid Build Coastguard Worker bi_mov_i32_to(b, dst, s0);
2912*61046927SAndroid Build Coastguard Worker break;
2913*61046927SAndroid Build Coastguard Worker
2914*61046927SAndroid Build Coastguard Worker case nir_op_b2i8:
2915*61046927SAndroid Build Coastguard Worker case nir_op_b2i16:
2916*61046927SAndroid Build Coastguard Worker case nir_op_b2i32:
2917*61046927SAndroid Build Coastguard Worker bi_mux_to(b, sz, dst, bi_imm_u8(0), bi_imm_uintN(1, sz), s0,
2918*61046927SAndroid Build Coastguard Worker BI_MUX_INT_ZERO);
2919*61046927SAndroid Build Coastguard Worker break;
2920*61046927SAndroid Build Coastguard Worker
2921*61046927SAndroid Build Coastguard Worker case nir_op_ieq8:
2922*61046927SAndroid Build Coastguard Worker case nir_op_ine8:
2923*61046927SAndroid Build Coastguard Worker case nir_op_ilt8:
2924*61046927SAndroid Build Coastguard Worker case nir_op_ige8:
2925*61046927SAndroid Build Coastguard Worker case nir_op_ieq16:
2926*61046927SAndroid Build Coastguard Worker case nir_op_ine16:
2927*61046927SAndroid Build Coastguard Worker case nir_op_ilt16:
2928*61046927SAndroid Build Coastguard Worker case nir_op_ige16:
2929*61046927SAndroid Build Coastguard Worker case nir_op_ieq32:
2930*61046927SAndroid Build Coastguard Worker case nir_op_ine32:
2931*61046927SAndroid Build Coastguard Worker case nir_op_ilt32:
2932*61046927SAndroid Build Coastguard Worker case nir_op_ige32:
2933*61046927SAndroid Build Coastguard Worker bi_icmp_to(b, nir_type_int, sz, dst, s0, s1, bi_translate_cmpf(instr->op),
2934*61046927SAndroid Build Coastguard Worker BI_RESULT_TYPE_M1);
2935*61046927SAndroid Build Coastguard Worker break;
2936*61046927SAndroid Build Coastguard Worker
2937*61046927SAndroid Build Coastguard Worker case nir_op_ult8:
2938*61046927SAndroid Build Coastguard Worker case nir_op_uge8:
2939*61046927SAndroid Build Coastguard Worker case nir_op_ult16:
2940*61046927SAndroid Build Coastguard Worker case nir_op_uge16:
2941*61046927SAndroid Build Coastguard Worker case nir_op_ult32:
2942*61046927SAndroid Build Coastguard Worker case nir_op_uge32:
2943*61046927SAndroid Build Coastguard Worker bi_icmp_to(b, nir_type_uint, sz, dst, s0, s1,
2944*61046927SAndroid Build Coastguard Worker bi_translate_cmpf(instr->op), BI_RESULT_TYPE_M1);
2945*61046927SAndroid Build Coastguard Worker break;
2946*61046927SAndroid Build Coastguard Worker
2947*61046927SAndroid Build Coastguard Worker case nir_op_feq32:
2948*61046927SAndroid Build Coastguard Worker case nir_op_feq16:
2949*61046927SAndroid Build Coastguard Worker case nir_op_flt32:
2950*61046927SAndroid Build Coastguard Worker case nir_op_flt16:
2951*61046927SAndroid Build Coastguard Worker case nir_op_fge32:
2952*61046927SAndroid Build Coastguard Worker case nir_op_fge16:
2953*61046927SAndroid Build Coastguard Worker case nir_op_fneu32:
2954*61046927SAndroid Build Coastguard Worker case nir_op_fneu16:
2955*61046927SAndroid Build Coastguard Worker bi_fcmp_to(b, sz, dst, s0, s1, bi_translate_cmpf(instr->op),
2956*61046927SAndroid Build Coastguard Worker BI_RESULT_TYPE_M1);
2957*61046927SAndroid Build Coastguard Worker break;
2958*61046927SAndroid Build Coastguard Worker
2959*61046927SAndroid Build Coastguard Worker case nir_op_fround_even:
2960*61046927SAndroid Build Coastguard Worker case nir_op_fceil:
2961*61046927SAndroid Build Coastguard Worker case nir_op_ffloor:
2962*61046927SAndroid Build Coastguard Worker case nir_op_ftrunc:
2963*61046927SAndroid Build Coastguard Worker bi_fround_to(b, sz, dst, s0, bi_nir_round(instr->op));
2964*61046927SAndroid Build Coastguard Worker break;
2965*61046927SAndroid Build Coastguard Worker
2966*61046927SAndroid Build Coastguard Worker case nir_op_fmin:
2967*61046927SAndroid Build Coastguard Worker bi_fmin_to(b, sz, dst, s0, s1);
2968*61046927SAndroid Build Coastguard Worker break;
2969*61046927SAndroid Build Coastguard Worker
2970*61046927SAndroid Build Coastguard Worker case nir_op_fmax:
2971*61046927SAndroid Build Coastguard Worker bi_fmax_to(b, sz, dst, s0, s1);
2972*61046927SAndroid Build Coastguard Worker break;
2973*61046927SAndroid Build Coastguard Worker
2974*61046927SAndroid Build Coastguard Worker case nir_op_iadd:
2975*61046927SAndroid Build Coastguard Worker bi_iadd_to(b, nir_type_int, sz, dst, s0, s1, false);
2976*61046927SAndroid Build Coastguard Worker break;
2977*61046927SAndroid Build Coastguard Worker
2978*61046927SAndroid Build Coastguard Worker case nir_op_iadd_sat:
2979*61046927SAndroid Build Coastguard Worker bi_iadd_to(b, nir_type_int, sz, dst, s0, s1, true);
2980*61046927SAndroid Build Coastguard Worker break;
2981*61046927SAndroid Build Coastguard Worker
2982*61046927SAndroid Build Coastguard Worker case nir_op_uadd_sat:
2983*61046927SAndroid Build Coastguard Worker bi_iadd_to(b, nir_type_uint, sz, dst, s0, s1, true);
2984*61046927SAndroid Build Coastguard Worker break;
2985*61046927SAndroid Build Coastguard Worker
2986*61046927SAndroid Build Coastguard Worker case nir_op_ihadd:
2987*61046927SAndroid Build Coastguard Worker bi_hadd_to(b, nir_type_int, sz, dst, s0, s1, BI_ROUND_RTN);
2988*61046927SAndroid Build Coastguard Worker break;
2989*61046927SAndroid Build Coastguard Worker
2990*61046927SAndroid Build Coastguard Worker case nir_op_irhadd:
2991*61046927SAndroid Build Coastguard Worker bi_hadd_to(b, nir_type_int, sz, dst, s0, s1, BI_ROUND_RTP);
2992*61046927SAndroid Build Coastguard Worker break;
2993*61046927SAndroid Build Coastguard Worker
2994*61046927SAndroid Build Coastguard Worker case nir_op_uhadd:
2995*61046927SAndroid Build Coastguard Worker bi_hadd_to(b, nir_type_uint, sz, dst, s0, s1, BI_ROUND_RTN);
2996*61046927SAndroid Build Coastguard Worker break;
2997*61046927SAndroid Build Coastguard Worker
2998*61046927SAndroid Build Coastguard Worker case nir_op_urhadd:
2999*61046927SAndroid Build Coastguard Worker bi_hadd_to(b, nir_type_uint, sz, dst, s0, s1, BI_ROUND_RTP);
3000*61046927SAndroid Build Coastguard Worker break;
3001*61046927SAndroid Build Coastguard Worker
3002*61046927SAndroid Build Coastguard Worker case nir_op_ineg:
3003*61046927SAndroid Build Coastguard Worker bi_isub_to(b, nir_type_int, sz, dst, bi_zero(), s0, false);
3004*61046927SAndroid Build Coastguard Worker break;
3005*61046927SAndroid Build Coastguard Worker
3006*61046927SAndroid Build Coastguard Worker case nir_op_isub:
3007*61046927SAndroid Build Coastguard Worker bi_isub_to(b, nir_type_int, sz, dst, s0, s1, false);
3008*61046927SAndroid Build Coastguard Worker break;
3009*61046927SAndroid Build Coastguard Worker
3010*61046927SAndroid Build Coastguard Worker case nir_op_isub_sat:
3011*61046927SAndroid Build Coastguard Worker bi_isub_to(b, nir_type_int, sz, dst, s0, s1, true);
3012*61046927SAndroid Build Coastguard Worker break;
3013*61046927SAndroid Build Coastguard Worker
3014*61046927SAndroid Build Coastguard Worker case nir_op_usub_sat:
3015*61046927SAndroid Build Coastguard Worker bi_isub_to(b, nir_type_uint, sz, dst, s0, s1, true);
3016*61046927SAndroid Build Coastguard Worker break;
3017*61046927SAndroid Build Coastguard Worker
3018*61046927SAndroid Build Coastguard Worker case nir_op_imul:
3019*61046927SAndroid Build Coastguard Worker bi_imul_to(b, sz, dst, s0, s1);
3020*61046927SAndroid Build Coastguard Worker break;
3021*61046927SAndroid Build Coastguard Worker
3022*61046927SAndroid Build Coastguard Worker case nir_op_iabs:
3023*61046927SAndroid Build Coastguard Worker bi_iabs_to(b, sz, dst, s0);
3024*61046927SAndroid Build Coastguard Worker break;
3025*61046927SAndroid Build Coastguard Worker
3026*61046927SAndroid Build Coastguard Worker case nir_op_iand:
3027*61046927SAndroid Build Coastguard Worker bi_lshift_and_to(b, sz, dst, s0, s1, bi_imm_u8(0));
3028*61046927SAndroid Build Coastguard Worker break;
3029*61046927SAndroid Build Coastguard Worker
3030*61046927SAndroid Build Coastguard Worker case nir_op_ior:
3031*61046927SAndroid Build Coastguard Worker bi_lshift_or_to(b, sz, dst, s0, s1, bi_imm_u8(0));
3032*61046927SAndroid Build Coastguard Worker break;
3033*61046927SAndroid Build Coastguard Worker
3034*61046927SAndroid Build Coastguard Worker case nir_op_ixor:
3035*61046927SAndroid Build Coastguard Worker bi_lshift_xor_to(b, sz, dst, s0, s1, bi_imm_u8(0));
3036*61046927SAndroid Build Coastguard Worker break;
3037*61046927SAndroid Build Coastguard Worker
3038*61046927SAndroid Build Coastguard Worker case nir_op_inot:
3039*61046927SAndroid Build Coastguard Worker bi_lshift_or_to(b, sz, dst, bi_zero(), bi_not(s0), bi_imm_u8(0));
3040*61046927SAndroid Build Coastguard Worker break;
3041*61046927SAndroid Build Coastguard Worker
3042*61046927SAndroid Build Coastguard Worker case nir_op_frsq:
3043*61046927SAndroid Build Coastguard Worker if (sz == 32 && b->shader->quirks & BIFROST_NO_FP32_TRANSCENDENTALS)
3044*61046927SAndroid Build Coastguard Worker bi_lower_frsq_32(b, dst, s0);
3045*61046927SAndroid Build Coastguard Worker else
3046*61046927SAndroid Build Coastguard Worker bi_frsq_to(b, sz, dst, s0);
3047*61046927SAndroid Build Coastguard Worker break;
3048*61046927SAndroid Build Coastguard Worker
3049*61046927SAndroid Build Coastguard Worker case nir_op_frcp:
3050*61046927SAndroid Build Coastguard Worker if (sz == 32 && b->shader->quirks & BIFROST_NO_FP32_TRANSCENDENTALS)
3051*61046927SAndroid Build Coastguard Worker bi_lower_frcp_32(b, dst, s0);
3052*61046927SAndroid Build Coastguard Worker else
3053*61046927SAndroid Build Coastguard Worker bi_frcp_to(b, sz, dst, s0);
3054*61046927SAndroid Build Coastguard Worker break;
3055*61046927SAndroid Build Coastguard Worker
3056*61046927SAndroid Build Coastguard Worker case nir_op_uclz:
3057*61046927SAndroid Build Coastguard Worker bi_clz_to(b, sz, dst, s0, false);
3058*61046927SAndroid Build Coastguard Worker break;
3059*61046927SAndroid Build Coastguard Worker
3060*61046927SAndroid Build Coastguard Worker case nir_op_bit_count:
3061*61046927SAndroid Build Coastguard Worker assert(sz == 32 && src_sz == 32 && "should've been lowered");
3062*61046927SAndroid Build Coastguard Worker bi_popcount_i32_to(b, dst, s0);
3063*61046927SAndroid Build Coastguard Worker break;
3064*61046927SAndroid Build Coastguard Worker
3065*61046927SAndroid Build Coastguard Worker case nir_op_bitfield_reverse:
3066*61046927SAndroid Build Coastguard Worker assert(sz == 32 && src_sz == 32 && "should've been lowered");
3067*61046927SAndroid Build Coastguard Worker bi_bitrev_i32_to(b, dst, s0);
3068*61046927SAndroid Build Coastguard Worker break;
3069*61046927SAndroid Build Coastguard Worker
3070*61046927SAndroid Build Coastguard Worker case nir_op_ufind_msb: {
3071*61046927SAndroid Build Coastguard Worker bi_index clz = bi_clz(b, src_sz, s0, false);
3072*61046927SAndroid Build Coastguard Worker
3073*61046927SAndroid Build Coastguard Worker if (sz == 8)
3074*61046927SAndroid Build Coastguard Worker clz = bi_byte(clz, 0);
3075*61046927SAndroid Build Coastguard Worker else if (sz == 16)
3076*61046927SAndroid Build Coastguard Worker clz = bi_half(clz, false);
3077*61046927SAndroid Build Coastguard Worker
3078*61046927SAndroid Build Coastguard Worker bi_isub_u32_to(b, dst, bi_imm_u32(src_sz - 1), clz, false);
3079*61046927SAndroid Build Coastguard Worker break;
3080*61046927SAndroid Build Coastguard Worker }
3081*61046927SAndroid Build Coastguard Worker
3082*61046927SAndroid Build Coastguard Worker default:
3083*61046927SAndroid Build Coastguard Worker fprintf(stderr, "Unhandled ALU op %s\n", nir_op_infos[instr->op].name);
3084*61046927SAndroid Build Coastguard Worker unreachable("Unknown ALU op");
3085*61046927SAndroid Build Coastguard Worker }
3086*61046927SAndroid Build Coastguard Worker }
3087*61046927SAndroid Build Coastguard Worker
3088*61046927SAndroid Build Coastguard Worker /* Returns dimension with 0 special casing cubemaps. Shamelessly copied from
3089*61046927SAndroid Build Coastguard Worker * Midgard */
3090*61046927SAndroid Build Coastguard Worker static unsigned
bifrost_tex_format(enum glsl_sampler_dim dim)3091*61046927SAndroid Build Coastguard Worker bifrost_tex_format(enum glsl_sampler_dim dim)
3092*61046927SAndroid Build Coastguard Worker {
3093*61046927SAndroid Build Coastguard Worker switch (dim) {
3094*61046927SAndroid Build Coastguard Worker case GLSL_SAMPLER_DIM_1D:
3095*61046927SAndroid Build Coastguard Worker case GLSL_SAMPLER_DIM_BUF:
3096*61046927SAndroid Build Coastguard Worker return 1;
3097*61046927SAndroid Build Coastguard Worker
3098*61046927SAndroid Build Coastguard Worker case GLSL_SAMPLER_DIM_2D:
3099*61046927SAndroid Build Coastguard Worker case GLSL_SAMPLER_DIM_MS:
3100*61046927SAndroid Build Coastguard Worker case GLSL_SAMPLER_DIM_EXTERNAL:
3101*61046927SAndroid Build Coastguard Worker case GLSL_SAMPLER_DIM_RECT:
3102*61046927SAndroid Build Coastguard Worker case GLSL_SAMPLER_DIM_SUBPASS:
3103*61046927SAndroid Build Coastguard Worker case GLSL_SAMPLER_DIM_SUBPASS_MS:
3104*61046927SAndroid Build Coastguard Worker return 2;
3105*61046927SAndroid Build Coastguard Worker
3106*61046927SAndroid Build Coastguard Worker case GLSL_SAMPLER_DIM_3D:
3107*61046927SAndroid Build Coastguard Worker return 3;
3108*61046927SAndroid Build Coastguard Worker
3109*61046927SAndroid Build Coastguard Worker case GLSL_SAMPLER_DIM_CUBE:
3110*61046927SAndroid Build Coastguard Worker return 0;
3111*61046927SAndroid Build Coastguard Worker
3112*61046927SAndroid Build Coastguard Worker default:
3113*61046927SAndroid Build Coastguard Worker DBG("Unknown sampler dim type\n");
3114*61046927SAndroid Build Coastguard Worker assert(0);
3115*61046927SAndroid Build Coastguard Worker return 0;
3116*61046927SAndroid Build Coastguard Worker }
3117*61046927SAndroid Build Coastguard Worker }
3118*61046927SAndroid Build Coastguard Worker
3119*61046927SAndroid Build Coastguard Worker static enum bi_dimension
valhall_tex_dimension(enum glsl_sampler_dim dim)3120*61046927SAndroid Build Coastguard Worker valhall_tex_dimension(enum glsl_sampler_dim dim)
3121*61046927SAndroid Build Coastguard Worker {
3122*61046927SAndroid Build Coastguard Worker switch (dim) {
3123*61046927SAndroid Build Coastguard Worker case GLSL_SAMPLER_DIM_1D:
3124*61046927SAndroid Build Coastguard Worker case GLSL_SAMPLER_DIM_BUF:
3125*61046927SAndroid Build Coastguard Worker return BI_DIMENSION_1D;
3126*61046927SAndroid Build Coastguard Worker
3127*61046927SAndroid Build Coastguard Worker case GLSL_SAMPLER_DIM_2D:
3128*61046927SAndroid Build Coastguard Worker case GLSL_SAMPLER_DIM_MS:
3129*61046927SAndroid Build Coastguard Worker case GLSL_SAMPLER_DIM_EXTERNAL:
3130*61046927SAndroid Build Coastguard Worker case GLSL_SAMPLER_DIM_RECT:
3131*61046927SAndroid Build Coastguard Worker case GLSL_SAMPLER_DIM_SUBPASS:
3132*61046927SAndroid Build Coastguard Worker case GLSL_SAMPLER_DIM_SUBPASS_MS:
3133*61046927SAndroid Build Coastguard Worker return BI_DIMENSION_2D;
3134*61046927SAndroid Build Coastguard Worker
3135*61046927SAndroid Build Coastguard Worker case GLSL_SAMPLER_DIM_3D:
3136*61046927SAndroid Build Coastguard Worker return BI_DIMENSION_3D;
3137*61046927SAndroid Build Coastguard Worker
3138*61046927SAndroid Build Coastguard Worker case GLSL_SAMPLER_DIM_CUBE:
3139*61046927SAndroid Build Coastguard Worker return BI_DIMENSION_CUBE;
3140*61046927SAndroid Build Coastguard Worker
3141*61046927SAndroid Build Coastguard Worker default:
3142*61046927SAndroid Build Coastguard Worker unreachable("Unknown sampler dim type");
3143*61046927SAndroid Build Coastguard Worker }
3144*61046927SAndroid Build Coastguard Worker }
3145*61046927SAndroid Build Coastguard Worker
3146*61046927SAndroid Build Coastguard Worker static enum bifrost_texture_format_full
bi_texture_format(nir_alu_type T,enum bi_clamp clamp)3147*61046927SAndroid Build Coastguard Worker bi_texture_format(nir_alu_type T, enum bi_clamp clamp)
3148*61046927SAndroid Build Coastguard Worker {
3149*61046927SAndroid Build Coastguard Worker switch (T) {
3150*61046927SAndroid Build Coastguard Worker case nir_type_float16:
3151*61046927SAndroid Build Coastguard Worker return BIFROST_TEXTURE_FORMAT_F16 + clamp;
3152*61046927SAndroid Build Coastguard Worker case nir_type_float32:
3153*61046927SAndroid Build Coastguard Worker return BIFROST_TEXTURE_FORMAT_F32 + clamp;
3154*61046927SAndroid Build Coastguard Worker case nir_type_uint16:
3155*61046927SAndroid Build Coastguard Worker return BIFROST_TEXTURE_FORMAT_U16;
3156*61046927SAndroid Build Coastguard Worker case nir_type_int16:
3157*61046927SAndroid Build Coastguard Worker return BIFROST_TEXTURE_FORMAT_S16;
3158*61046927SAndroid Build Coastguard Worker case nir_type_uint32:
3159*61046927SAndroid Build Coastguard Worker return BIFROST_TEXTURE_FORMAT_U32;
3160*61046927SAndroid Build Coastguard Worker case nir_type_int32:
3161*61046927SAndroid Build Coastguard Worker return BIFROST_TEXTURE_FORMAT_S32;
3162*61046927SAndroid Build Coastguard Worker default:
3163*61046927SAndroid Build Coastguard Worker unreachable("Invalid type for texturing");
3164*61046927SAndroid Build Coastguard Worker }
3165*61046927SAndroid Build Coastguard Worker }
3166*61046927SAndroid Build Coastguard Worker
3167*61046927SAndroid Build Coastguard Worker /* Array indices are specified as 32-bit uints, need to convert. In .z component
3168*61046927SAndroid Build Coastguard Worker * from NIR */
3169*61046927SAndroid Build Coastguard Worker static bi_index
bi_emit_texc_array_index(bi_builder * b,bi_index idx,nir_alu_type T)3170*61046927SAndroid Build Coastguard Worker bi_emit_texc_array_index(bi_builder *b, bi_index idx, nir_alu_type T)
3171*61046927SAndroid Build Coastguard Worker {
3172*61046927SAndroid Build Coastguard Worker /* For (u)int we can just passthrough */
3173*61046927SAndroid Build Coastguard Worker nir_alu_type base = nir_alu_type_get_base_type(T);
3174*61046927SAndroid Build Coastguard Worker if (base == nir_type_int || base == nir_type_uint)
3175*61046927SAndroid Build Coastguard Worker return idx;
3176*61046927SAndroid Build Coastguard Worker
3177*61046927SAndroid Build Coastguard Worker /* Otherwise we convert */
3178*61046927SAndroid Build Coastguard Worker assert(T == nir_type_float32);
3179*61046927SAndroid Build Coastguard Worker
3180*61046927SAndroid Build Coastguard Worker /* OpenGL ES 3.2 specification section 8.14.2 ("Coordinate Wrapping and
3181*61046927SAndroid Build Coastguard Worker * Texel Selection") defines the layer to be taken from clamp(RNE(r),
3182*61046927SAndroid Build Coastguard Worker * 0, dt - 1). So we use round RTE, clamping is handled at the data
3183*61046927SAndroid Build Coastguard Worker * structure level */
3184*61046927SAndroid Build Coastguard Worker
3185*61046927SAndroid Build Coastguard Worker bi_instr *I = bi_f32_to_u32_to(b, bi_temp(b->shader), idx);
3186*61046927SAndroid Build Coastguard Worker I->round = BI_ROUND_NONE;
3187*61046927SAndroid Build Coastguard Worker return I->dest[0];
3188*61046927SAndroid Build Coastguard Worker }
3189*61046927SAndroid Build Coastguard Worker
3190*61046927SAndroid Build Coastguard Worker /* TEXC's explicit and bias LOD modes requires the LOD to be transformed to a
3191*61046927SAndroid Build Coastguard Worker * 16-bit 8:8 fixed-point format. We lower as:
3192*61046927SAndroid Build Coastguard Worker *
3193*61046927SAndroid Build Coastguard Worker * F32_TO_S32(clamp(x, -16.0, +16.0) * 256.0) & 0xFFFF =
3194*61046927SAndroid Build Coastguard Worker * MKVEC(F32_TO_S32(clamp(x * 1.0/16.0, -1.0, 1.0) * (16.0 * 256.0)), #0)
3195*61046927SAndroid Build Coastguard Worker */
3196*61046927SAndroid Build Coastguard Worker
3197*61046927SAndroid Build Coastguard Worker static bi_index
bi_emit_texc_lod_88(bi_builder * b,bi_index lod,bool fp16)3198*61046927SAndroid Build Coastguard Worker bi_emit_texc_lod_88(bi_builder *b, bi_index lod, bool fp16)
3199*61046927SAndroid Build Coastguard Worker {
3200*61046927SAndroid Build Coastguard Worker /* Precompute for constant LODs to avoid general constant folding */
3201*61046927SAndroid Build Coastguard Worker if (lod.type == BI_INDEX_CONSTANT) {
3202*61046927SAndroid Build Coastguard Worker uint32_t raw = lod.value;
3203*61046927SAndroid Build Coastguard Worker float x = fp16 ? _mesa_half_to_float(raw) : uif(raw);
3204*61046927SAndroid Build Coastguard Worker int32_t s32 = CLAMP(x, -16.0f, 16.0f) * 256.0f;
3205*61046927SAndroid Build Coastguard Worker return bi_imm_u32(s32 & 0xFFFF);
3206*61046927SAndroid Build Coastguard Worker }
3207*61046927SAndroid Build Coastguard Worker
3208*61046927SAndroid Build Coastguard Worker /* Sort of arbitrary. Must be less than 128.0, greater than or equal to
3209*61046927SAndroid Build Coastguard Worker * the max LOD (16 since we cap at 2^16 texture dimensions), and
3210*61046927SAndroid Build Coastguard Worker * preferably small to minimize precision loss */
3211*61046927SAndroid Build Coastguard Worker const float max_lod = 16.0;
3212*61046927SAndroid Build Coastguard Worker
3213*61046927SAndroid Build Coastguard Worker bi_instr *fsat =
3214*61046927SAndroid Build Coastguard Worker bi_fma_f32_to(b, bi_temp(b->shader), fp16 ? bi_half(lod, false) : lod,
3215*61046927SAndroid Build Coastguard Worker bi_imm_f32(1.0f / max_lod), bi_negzero());
3216*61046927SAndroid Build Coastguard Worker
3217*61046927SAndroid Build Coastguard Worker fsat->clamp = BI_CLAMP_CLAMP_M1_1;
3218*61046927SAndroid Build Coastguard Worker
3219*61046927SAndroid Build Coastguard Worker bi_index fmul =
3220*61046927SAndroid Build Coastguard Worker bi_fma_f32(b, fsat->dest[0], bi_imm_f32(max_lod * 256.0f), bi_negzero());
3221*61046927SAndroid Build Coastguard Worker
3222*61046927SAndroid Build Coastguard Worker return bi_mkvec_v2i16(b, bi_half(bi_f32_to_s32(b, fmul), false),
3223*61046927SAndroid Build Coastguard Worker bi_imm_u16(0));
3224*61046927SAndroid Build Coastguard Worker }
3225*61046927SAndroid Build Coastguard Worker
3226*61046927SAndroid Build Coastguard Worker /* FETCH takes a 32-bit staging register containing the LOD as an integer in
3227*61046927SAndroid Build Coastguard Worker * the bottom 16-bits and (if present) the cube face index in the top 16-bits.
3228*61046927SAndroid Build Coastguard Worker * TODO: Cube face.
3229*61046927SAndroid Build Coastguard Worker */
3230*61046927SAndroid Build Coastguard Worker
3231*61046927SAndroid Build Coastguard Worker static bi_index
bi_emit_texc_lod_cube(bi_builder * b,bi_index lod)3232*61046927SAndroid Build Coastguard Worker bi_emit_texc_lod_cube(bi_builder *b, bi_index lod)
3233*61046927SAndroid Build Coastguard Worker {
3234*61046927SAndroid Build Coastguard Worker return bi_lshift_or_i32(b, lod, bi_zero(), bi_imm_u8(8));
3235*61046927SAndroid Build Coastguard Worker }
3236*61046927SAndroid Build Coastguard Worker
3237*61046927SAndroid Build Coastguard Worker /* The hardware specifies texel offsets and multisample indices together as a
3238*61046927SAndroid Build Coastguard Worker * u8vec4 <offset, ms index>. By default all are zero, so if have either a
3239*61046927SAndroid Build Coastguard Worker * nonzero texel offset or a nonzero multisample index, we build a u8vec4 with
3240*61046927SAndroid Build Coastguard Worker * the bits we need and return that to be passed as a staging register. Else we
3241*61046927SAndroid Build Coastguard Worker * return 0 to avoid allocating a data register when everything is zero. */
3242*61046927SAndroid Build Coastguard Worker
3243*61046927SAndroid Build Coastguard Worker static bi_index
bi_emit_texc_offset_ms_index(bi_builder * b,nir_tex_instr * instr)3244*61046927SAndroid Build Coastguard Worker bi_emit_texc_offset_ms_index(bi_builder *b, nir_tex_instr *instr)
3245*61046927SAndroid Build Coastguard Worker {
3246*61046927SAndroid Build Coastguard Worker bi_index dest = bi_zero();
3247*61046927SAndroid Build Coastguard Worker
3248*61046927SAndroid Build Coastguard Worker int offs_idx = nir_tex_instr_src_index(instr, nir_tex_src_offset);
3249*61046927SAndroid Build Coastguard Worker if (offs_idx >= 0 && (!nir_src_is_const(instr->src[offs_idx].src) ||
3250*61046927SAndroid Build Coastguard Worker nir_src_as_uint(instr->src[offs_idx].src) != 0)) {
3251*61046927SAndroid Build Coastguard Worker unsigned nr = nir_src_num_components(instr->src[offs_idx].src);
3252*61046927SAndroid Build Coastguard Worker bi_index idx = bi_src_index(&instr->src[offs_idx].src);
3253*61046927SAndroid Build Coastguard Worker dest = bi_mkvec_v4i8(
3254*61046927SAndroid Build Coastguard Worker b, (nr > 0) ? bi_byte(bi_extract(b, idx, 0), 0) : bi_imm_u8(0),
3255*61046927SAndroid Build Coastguard Worker (nr > 1) ? bi_byte(bi_extract(b, idx, 1), 0) : bi_imm_u8(0),
3256*61046927SAndroid Build Coastguard Worker (nr > 2) ? bi_byte(bi_extract(b, idx, 2), 0) : bi_imm_u8(0),
3257*61046927SAndroid Build Coastguard Worker bi_imm_u8(0));
3258*61046927SAndroid Build Coastguard Worker }
3259*61046927SAndroid Build Coastguard Worker
3260*61046927SAndroid Build Coastguard Worker int ms_idx = nir_tex_instr_src_index(instr, nir_tex_src_ms_index);
3261*61046927SAndroid Build Coastguard Worker if (ms_idx >= 0 && (!nir_src_is_const(instr->src[ms_idx].src) ||
3262*61046927SAndroid Build Coastguard Worker nir_src_as_uint(instr->src[ms_idx].src) != 0)) {
3263*61046927SAndroid Build Coastguard Worker dest = bi_lshift_or_i32(b, bi_src_index(&instr->src[ms_idx].src), dest,
3264*61046927SAndroid Build Coastguard Worker bi_imm_u8(24));
3265*61046927SAndroid Build Coastguard Worker }
3266*61046927SAndroid Build Coastguard Worker
3267*61046927SAndroid Build Coastguard Worker return dest;
3268*61046927SAndroid Build Coastguard Worker }
3269*61046927SAndroid Build Coastguard Worker
3270*61046927SAndroid Build Coastguard Worker /*
3271*61046927SAndroid Build Coastguard Worker * Valhall specifies specifies texel offsets, multisample indices, and (for
3272*61046927SAndroid Build Coastguard Worker * fetches) LOD together as a u8vec4 <offset.xyz, LOD>, where the third
3273*61046927SAndroid Build Coastguard Worker * component is either offset.z or multisample index depending on context. Build
3274*61046927SAndroid Build Coastguard Worker * this register.
3275*61046927SAndroid Build Coastguard Worker */
3276*61046927SAndroid Build Coastguard Worker static bi_index
bi_emit_valhall_offsets(bi_builder * b,nir_tex_instr * instr)3277*61046927SAndroid Build Coastguard Worker bi_emit_valhall_offsets(bi_builder *b, nir_tex_instr *instr)
3278*61046927SAndroid Build Coastguard Worker {
3279*61046927SAndroid Build Coastguard Worker bi_index dest = bi_zero();
3280*61046927SAndroid Build Coastguard Worker
3281*61046927SAndroid Build Coastguard Worker int offs_idx = nir_tex_instr_src_index(instr, nir_tex_src_offset);
3282*61046927SAndroid Build Coastguard Worker int ms_idx = nir_tex_instr_src_index(instr, nir_tex_src_ms_index);
3283*61046927SAndroid Build Coastguard Worker int lod_idx = nir_tex_instr_src_index(instr, nir_tex_src_lod);
3284*61046927SAndroid Build Coastguard Worker
3285*61046927SAndroid Build Coastguard Worker /* Components 0-2: offsets */
3286*61046927SAndroid Build Coastguard Worker if (offs_idx >= 0 && (!nir_src_is_const(instr->src[offs_idx].src) ||
3287*61046927SAndroid Build Coastguard Worker nir_src_as_uint(instr->src[offs_idx].src) != 0)) {
3288*61046927SAndroid Build Coastguard Worker unsigned nr = nir_src_num_components(instr->src[offs_idx].src);
3289*61046927SAndroid Build Coastguard Worker bi_index idx = bi_src_index(&instr->src[offs_idx].src);
3290*61046927SAndroid Build Coastguard Worker
3291*61046927SAndroid Build Coastguard Worker /* No multisample index with 3D */
3292*61046927SAndroid Build Coastguard Worker assert((nr <= 2) || (ms_idx < 0));
3293*61046927SAndroid Build Coastguard Worker
3294*61046927SAndroid Build Coastguard Worker /* Zero extend the Z byte so we can use it with MKVEC.v2i8 */
3295*61046927SAndroid Build Coastguard Worker bi_index z = (nr > 2)
3296*61046927SAndroid Build Coastguard Worker ? bi_mkvec_v2i8(b, bi_byte(bi_extract(b, idx, 2), 0),
3297*61046927SAndroid Build Coastguard Worker bi_imm_u8(0), bi_zero())
3298*61046927SAndroid Build Coastguard Worker : bi_zero();
3299*61046927SAndroid Build Coastguard Worker
3300*61046927SAndroid Build Coastguard Worker dest = bi_mkvec_v2i8(
3301*61046927SAndroid Build Coastguard Worker b, (nr > 0) ? bi_byte(bi_extract(b, idx, 0), 0) : bi_imm_u8(0),
3302*61046927SAndroid Build Coastguard Worker (nr > 1) ? bi_byte(bi_extract(b, idx, 1), 0) : bi_imm_u8(0), z);
3303*61046927SAndroid Build Coastguard Worker }
3304*61046927SAndroid Build Coastguard Worker
3305*61046927SAndroid Build Coastguard Worker /* Component 2: multisample index */
3306*61046927SAndroid Build Coastguard Worker if (ms_idx >= 0 && (!nir_src_is_const(instr->src[ms_idx].src) ||
3307*61046927SAndroid Build Coastguard Worker nir_src_as_uint(instr->src[ms_idx].src) != 0)) {
3308*61046927SAndroid Build Coastguard Worker dest = bi_mkvec_v2i16(b, dest, bi_src_index(&instr->src[ms_idx].src));
3309*61046927SAndroid Build Coastguard Worker }
3310*61046927SAndroid Build Coastguard Worker
3311*61046927SAndroid Build Coastguard Worker /* Component 3: 8-bit LOD */
3312*61046927SAndroid Build Coastguard Worker if (lod_idx >= 0 &&
3313*61046927SAndroid Build Coastguard Worker (!nir_src_is_const(instr->src[lod_idx].src) ||
3314*61046927SAndroid Build Coastguard Worker nir_src_as_uint(instr->src[lod_idx].src) != 0) &&
3315*61046927SAndroid Build Coastguard Worker nir_tex_instr_src_type(instr, lod_idx) != nir_type_float) {
3316*61046927SAndroid Build Coastguard Worker dest = bi_lshift_or_i32(b, bi_src_index(&instr->src[lod_idx].src), dest,
3317*61046927SAndroid Build Coastguard Worker bi_imm_u8(24));
3318*61046927SAndroid Build Coastguard Worker }
3319*61046927SAndroid Build Coastguard Worker
3320*61046927SAndroid Build Coastguard Worker return dest;
3321*61046927SAndroid Build Coastguard Worker }
3322*61046927SAndroid Build Coastguard Worker
3323*61046927SAndroid Build Coastguard Worker static void
bi_emit_cube_coord(bi_builder * b,bi_index coord,bi_index * face,bi_index * s,bi_index * t)3324*61046927SAndroid Build Coastguard Worker bi_emit_cube_coord(bi_builder *b, bi_index coord, bi_index *face, bi_index *s,
3325*61046927SAndroid Build Coastguard Worker bi_index *t)
3326*61046927SAndroid Build Coastguard Worker {
3327*61046927SAndroid Build Coastguard Worker /* Compute max { |x|, |y|, |z| } */
3328*61046927SAndroid Build Coastguard Worker bi_index maxxyz = bi_temp(b->shader);
3329*61046927SAndroid Build Coastguard Worker *face = bi_temp(b->shader);
3330*61046927SAndroid Build Coastguard Worker
3331*61046927SAndroid Build Coastguard Worker bi_index cx = bi_extract(b, coord, 0), cy = bi_extract(b, coord, 1),
3332*61046927SAndroid Build Coastguard Worker cz = bi_extract(b, coord, 2);
3333*61046927SAndroid Build Coastguard Worker
3334*61046927SAndroid Build Coastguard Worker /* Use a pseudo op on Bifrost due to tuple restrictions */
3335*61046927SAndroid Build Coastguard Worker if (b->shader->arch <= 8) {
3336*61046927SAndroid Build Coastguard Worker bi_cubeface_to(b, maxxyz, *face, cx, cy, cz);
3337*61046927SAndroid Build Coastguard Worker } else {
3338*61046927SAndroid Build Coastguard Worker bi_cubeface1_to(b, maxxyz, cx, cy, cz);
3339*61046927SAndroid Build Coastguard Worker bi_cubeface2_v9_to(b, *face, cx, cy, cz);
3340*61046927SAndroid Build Coastguard Worker }
3341*61046927SAndroid Build Coastguard Worker
3342*61046927SAndroid Build Coastguard Worker /* Select coordinates */
3343*61046927SAndroid Build Coastguard Worker bi_index ssel =
3344*61046927SAndroid Build Coastguard Worker bi_cube_ssel(b, bi_extract(b, coord, 2), bi_extract(b, coord, 0), *face);
3345*61046927SAndroid Build Coastguard Worker bi_index tsel =
3346*61046927SAndroid Build Coastguard Worker bi_cube_tsel(b, bi_extract(b, coord, 1), bi_extract(b, coord, 2), *face);
3347*61046927SAndroid Build Coastguard Worker
3348*61046927SAndroid Build Coastguard Worker /* The OpenGL ES specification requires us to transform an input vector
3349*61046927SAndroid Build Coastguard Worker * (x, y, z) to the coordinate, given the selected S/T:
3350*61046927SAndroid Build Coastguard Worker *
3351*61046927SAndroid Build Coastguard Worker * (1/2 ((s / max{x,y,z}) + 1), 1/2 ((t / max{x, y, z}) + 1))
3352*61046927SAndroid Build Coastguard Worker *
3353*61046927SAndroid Build Coastguard Worker * We implement (s shown, t similar) in a form friendlier to FMA
3354*61046927SAndroid Build Coastguard Worker * instructions, and clamp coordinates at the end for correct
3355*61046927SAndroid Build Coastguard Worker * NaN/infinity handling:
3356*61046927SAndroid Build Coastguard Worker *
3357*61046927SAndroid Build Coastguard Worker * fsat(s * (0.5 * (1 / max{x, y, z})) + 0.5)
3358*61046927SAndroid Build Coastguard Worker *
3359*61046927SAndroid Build Coastguard Worker * Take the reciprocal of max{x, y, z}
3360*61046927SAndroid Build Coastguard Worker */
3361*61046927SAndroid Build Coastguard Worker bi_index rcp = bi_frcp_f32(b, maxxyz);
3362*61046927SAndroid Build Coastguard Worker
3363*61046927SAndroid Build Coastguard Worker /* Calculate 0.5 * (1.0 / max{x, y, z}) */
3364*61046927SAndroid Build Coastguard Worker bi_index fma1 = bi_fma_f32(b, rcp, bi_imm_f32(0.5f), bi_negzero());
3365*61046927SAndroid Build Coastguard Worker
3366*61046927SAndroid Build Coastguard Worker /* Transform the coordinates */
3367*61046927SAndroid Build Coastguard Worker *s = bi_temp(b->shader);
3368*61046927SAndroid Build Coastguard Worker *t = bi_temp(b->shader);
3369*61046927SAndroid Build Coastguard Worker
3370*61046927SAndroid Build Coastguard Worker bi_instr *S = bi_fma_f32_to(b, *s, fma1, ssel, bi_imm_f32(0.5f));
3371*61046927SAndroid Build Coastguard Worker bi_instr *T = bi_fma_f32_to(b, *t, fma1, tsel, bi_imm_f32(0.5f));
3372*61046927SAndroid Build Coastguard Worker
3373*61046927SAndroid Build Coastguard Worker S->clamp = BI_CLAMP_CLAMP_0_1;
3374*61046927SAndroid Build Coastguard Worker T->clamp = BI_CLAMP_CLAMP_0_1;
3375*61046927SAndroid Build Coastguard Worker }
3376*61046927SAndroid Build Coastguard Worker
3377*61046927SAndroid Build Coastguard Worker /* Emits a cube map descriptor, returning lower 32-bits and putting upper
3378*61046927SAndroid Build Coastguard Worker * 32-bits in passed pointer t. The packing of the face with the S coordinate
3379*61046927SAndroid Build Coastguard Worker * exploits the redundancy of floating points with the range restriction of
3380*61046927SAndroid Build Coastguard Worker * CUBEFACE output.
3381*61046927SAndroid Build Coastguard Worker *
3382*61046927SAndroid Build Coastguard Worker * struct cube_map_descriptor {
3383*61046927SAndroid Build Coastguard Worker * float s : 29;
3384*61046927SAndroid Build Coastguard Worker * unsigned face : 3;
3385*61046927SAndroid Build Coastguard Worker * float t : 32;
3386*61046927SAndroid Build Coastguard Worker * }
3387*61046927SAndroid Build Coastguard Worker *
3388*61046927SAndroid Build Coastguard Worker * Since the cube face index is preshifted, this is easy to pack with a bitwise
3389*61046927SAndroid Build Coastguard Worker * MUX.i32 and a fixed mask, selecting the lower bits 29 from s and the upper 3
3390*61046927SAndroid Build Coastguard Worker * bits from face.
3391*61046927SAndroid Build Coastguard Worker */
3392*61046927SAndroid Build Coastguard Worker
3393*61046927SAndroid Build Coastguard Worker static bi_index
bi_emit_texc_cube_coord(bi_builder * b,bi_index coord,bi_index * t)3394*61046927SAndroid Build Coastguard Worker bi_emit_texc_cube_coord(bi_builder *b, bi_index coord, bi_index *t)
3395*61046927SAndroid Build Coastguard Worker {
3396*61046927SAndroid Build Coastguard Worker bi_index face, s;
3397*61046927SAndroid Build Coastguard Worker bi_emit_cube_coord(b, coord, &face, &s, t);
3398*61046927SAndroid Build Coastguard Worker bi_index mask = bi_imm_u32(BITFIELD_MASK(29));
3399*61046927SAndroid Build Coastguard Worker return bi_mux_i32(b, s, face, mask, BI_MUX_BIT);
3400*61046927SAndroid Build Coastguard Worker }
3401*61046927SAndroid Build Coastguard Worker
3402*61046927SAndroid Build Coastguard Worker /* Map to the main texture op used. Some of these (txd in particular) will
3403*61046927SAndroid Build Coastguard Worker * lower to multiple texture ops with different opcodes (GRDESC_DER + TEX in
3404*61046927SAndroid Build Coastguard Worker * sequence). We assume that lowering is handled elsewhere.
3405*61046927SAndroid Build Coastguard Worker */
3406*61046927SAndroid Build Coastguard Worker
3407*61046927SAndroid Build Coastguard Worker static enum bifrost_tex_op
bi_tex_op(nir_texop op)3408*61046927SAndroid Build Coastguard Worker bi_tex_op(nir_texop op)
3409*61046927SAndroid Build Coastguard Worker {
3410*61046927SAndroid Build Coastguard Worker switch (op) {
3411*61046927SAndroid Build Coastguard Worker case nir_texop_tex:
3412*61046927SAndroid Build Coastguard Worker case nir_texop_txb:
3413*61046927SAndroid Build Coastguard Worker case nir_texop_txl:
3414*61046927SAndroid Build Coastguard Worker case nir_texop_txd:
3415*61046927SAndroid Build Coastguard Worker return BIFROST_TEX_OP_TEX;
3416*61046927SAndroid Build Coastguard Worker case nir_texop_txf:
3417*61046927SAndroid Build Coastguard Worker case nir_texop_txf_ms:
3418*61046927SAndroid Build Coastguard Worker case nir_texop_tg4:
3419*61046927SAndroid Build Coastguard Worker return BIFROST_TEX_OP_FETCH;
3420*61046927SAndroid Build Coastguard Worker case nir_texop_txs:
3421*61046927SAndroid Build Coastguard Worker case nir_texop_lod:
3422*61046927SAndroid Build Coastguard Worker case nir_texop_query_levels:
3423*61046927SAndroid Build Coastguard Worker case nir_texop_texture_samples:
3424*61046927SAndroid Build Coastguard Worker case nir_texop_samples_identical:
3425*61046927SAndroid Build Coastguard Worker unreachable("should've been lowered");
3426*61046927SAndroid Build Coastguard Worker default:
3427*61046927SAndroid Build Coastguard Worker unreachable("unsupported tex op");
3428*61046927SAndroid Build Coastguard Worker }
3429*61046927SAndroid Build Coastguard Worker }
3430*61046927SAndroid Build Coastguard Worker
3431*61046927SAndroid Build Coastguard Worker /* Data registers required by texturing in the order they appear. All are
3432*61046927SAndroid Build Coastguard Worker * optional, the texture operation descriptor determines which are present.
3433*61046927SAndroid Build Coastguard Worker * Note since 3D arrays are not permitted at an API level, Z_COORD and
3434*61046927SAndroid Build Coastguard Worker * ARRAY/SHADOW are exlusive, so TEXC in practice reads at most 8 registers */
3435*61046927SAndroid Build Coastguard Worker
3436*61046927SAndroid Build Coastguard Worker enum bifrost_tex_dreg {
3437*61046927SAndroid Build Coastguard Worker BIFROST_TEX_DREG_Z_COORD = 0,
3438*61046927SAndroid Build Coastguard Worker BIFROST_TEX_DREG_Y_DELTAS = 1,
3439*61046927SAndroid Build Coastguard Worker BIFROST_TEX_DREG_LOD = 2,
3440*61046927SAndroid Build Coastguard Worker BIFROST_TEX_DREG_GRDESC_HI = 3,
3441*61046927SAndroid Build Coastguard Worker BIFROST_TEX_DREG_SHADOW = 4,
3442*61046927SAndroid Build Coastguard Worker BIFROST_TEX_DREG_ARRAY = 5,
3443*61046927SAndroid Build Coastguard Worker BIFROST_TEX_DREG_OFFSETMS = 6,
3444*61046927SAndroid Build Coastguard Worker BIFROST_TEX_DREG_SAMPLER = 7,
3445*61046927SAndroid Build Coastguard Worker BIFROST_TEX_DREG_TEXTURE = 8,
3446*61046927SAndroid Build Coastguard Worker BIFROST_TEX_DREG_COUNT,
3447*61046927SAndroid Build Coastguard Worker };
3448*61046927SAndroid Build Coastguard Worker
3449*61046927SAndroid Build Coastguard Worker static void
bi_emit_texc(bi_builder * b,nir_tex_instr * instr)3450*61046927SAndroid Build Coastguard Worker bi_emit_texc(bi_builder *b, nir_tex_instr *instr)
3451*61046927SAndroid Build Coastguard Worker {
3452*61046927SAndroid Build Coastguard Worker struct bifrost_texture_operation desc = {
3453*61046927SAndroid Build Coastguard Worker .op = bi_tex_op(instr->op),
3454*61046927SAndroid Build Coastguard Worker .offset_or_bias_disable = false, /* TODO */
3455*61046927SAndroid Build Coastguard Worker .shadow_or_clamp_disable = instr->is_shadow,
3456*61046927SAndroid Build Coastguard Worker .array = instr->is_array,
3457*61046927SAndroid Build Coastguard Worker .dimension = bifrost_tex_format(instr->sampler_dim),
3458*61046927SAndroid Build Coastguard Worker .format = bi_texture_format(instr->dest_type | instr->def.bit_size,
3459*61046927SAndroid Build Coastguard Worker BI_CLAMP_NONE), /* TODO */
3460*61046927SAndroid Build Coastguard Worker .mask = 0xF,
3461*61046927SAndroid Build Coastguard Worker };
3462*61046927SAndroid Build Coastguard Worker
3463*61046927SAndroid Build Coastguard Worker switch (desc.op) {
3464*61046927SAndroid Build Coastguard Worker case BIFROST_TEX_OP_TEX:
3465*61046927SAndroid Build Coastguard Worker desc.lod_or_fetch = BIFROST_LOD_MODE_COMPUTE;
3466*61046927SAndroid Build Coastguard Worker break;
3467*61046927SAndroid Build Coastguard Worker case BIFROST_TEX_OP_FETCH:
3468*61046927SAndroid Build Coastguard Worker desc.lod_or_fetch = (enum bifrost_lod_mode)(
3469*61046927SAndroid Build Coastguard Worker instr->op == nir_texop_tg4
3470*61046927SAndroid Build Coastguard Worker ? BIFROST_TEXTURE_FETCH_GATHER4_R + instr->component
3471*61046927SAndroid Build Coastguard Worker : BIFROST_TEXTURE_FETCH_TEXEL);
3472*61046927SAndroid Build Coastguard Worker break;
3473*61046927SAndroid Build Coastguard Worker default:
3474*61046927SAndroid Build Coastguard Worker unreachable("texture op unsupported");
3475*61046927SAndroid Build Coastguard Worker }
3476*61046927SAndroid Build Coastguard Worker
3477*61046927SAndroid Build Coastguard Worker /* 32-bit indices to be allocated as consecutive staging registers */
3478*61046927SAndroid Build Coastguard Worker bi_index dregs[BIFROST_TEX_DREG_COUNT] = {};
3479*61046927SAndroid Build Coastguard Worker bi_index cx = bi_null(), cy = bi_null();
3480*61046927SAndroid Build Coastguard Worker
3481*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < instr->num_srcs; ++i) {
3482*61046927SAndroid Build Coastguard Worker bi_index index = bi_src_index(&instr->src[i].src);
3483*61046927SAndroid Build Coastguard Worker unsigned sz = nir_src_bit_size(instr->src[i].src);
3484*61046927SAndroid Build Coastguard Worker unsigned components = nir_src_num_components(instr->src[i].src);
3485*61046927SAndroid Build Coastguard Worker ASSERTED nir_alu_type base = nir_tex_instr_src_type(instr, i);
3486*61046927SAndroid Build Coastguard Worker nir_alu_type T = base | sz;
3487*61046927SAndroid Build Coastguard Worker
3488*61046927SAndroid Build Coastguard Worker switch (instr->src[i].src_type) {
3489*61046927SAndroid Build Coastguard Worker case nir_tex_src_coord:
3490*61046927SAndroid Build Coastguard Worker if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
3491*61046927SAndroid Build Coastguard Worker cx = bi_emit_texc_cube_coord(b, index, &cy);
3492*61046927SAndroid Build Coastguard Worker } else {
3493*61046927SAndroid Build Coastguard Worker /* Copy XY (for 2D+) or XX (for 1D) */
3494*61046927SAndroid Build Coastguard Worker cx = bi_extract(b, index, 0);
3495*61046927SAndroid Build Coastguard Worker cy = bi_extract(b, index, MIN2(1, components - 1));
3496*61046927SAndroid Build Coastguard Worker
3497*61046927SAndroid Build Coastguard Worker assert(components >= 1 && components <= 3);
3498*61046927SAndroid Build Coastguard Worker
3499*61046927SAndroid Build Coastguard Worker if (components == 3 && !desc.array) {
3500*61046927SAndroid Build Coastguard Worker /* 3D */
3501*61046927SAndroid Build Coastguard Worker dregs[BIFROST_TEX_DREG_Z_COORD] = bi_extract(b, index, 2);
3502*61046927SAndroid Build Coastguard Worker }
3503*61046927SAndroid Build Coastguard Worker }
3504*61046927SAndroid Build Coastguard Worker
3505*61046927SAndroid Build Coastguard Worker if (desc.array) {
3506*61046927SAndroid Build Coastguard Worker dregs[BIFROST_TEX_DREG_ARRAY] = bi_emit_texc_array_index(
3507*61046927SAndroid Build Coastguard Worker b, bi_extract(b, index, components - 1), T);
3508*61046927SAndroid Build Coastguard Worker }
3509*61046927SAndroid Build Coastguard Worker
3510*61046927SAndroid Build Coastguard Worker break;
3511*61046927SAndroid Build Coastguard Worker
3512*61046927SAndroid Build Coastguard Worker case nir_tex_src_lod:
3513*61046927SAndroid Build Coastguard Worker if (desc.op == BIFROST_TEX_OP_TEX &&
3514*61046927SAndroid Build Coastguard Worker nir_src_is_const(instr->src[i].src) &&
3515*61046927SAndroid Build Coastguard Worker nir_src_as_uint(instr->src[i].src) == 0) {
3516*61046927SAndroid Build Coastguard Worker desc.lod_or_fetch = BIFROST_LOD_MODE_ZERO;
3517*61046927SAndroid Build Coastguard Worker } else if (desc.op == BIFROST_TEX_OP_TEX) {
3518*61046927SAndroid Build Coastguard Worker assert(base == nir_type_float);
3519*61046927SAndroid Build Coastguard Worker
3520*61046927SAndroid Build Coastguard Worker assert(sz == 16 || sz == 32);
3521*61046927SAndroid Build Coastguard Worker dregs[BIFROST_TEX_DREG_LOD] =
3522*61046927SAndroid Build Coastguard Worker bi_emit_texc_lod_88(b, index, sz == 16);
3523*61046927SAndroid Build Coastguard Worker desc.lod_or_fetch = BIFROST_LOD_MODE_EXPLICIT;
3524*61046927SAndroid Build Coastguard Worker } else {
3525*61046927SAndroid Build Coastguard Worker assert(desc.op == BIFROST_TEX_OP_FETCH);
3526*61046927SAndroid Build Coastguard Worker assert(base == nir_type_uint || base == nir_type_int);
3527*61046927SAndroid Build Coastguard Worker assert(sz == 16 || sz == 32);
3528*61046927SAndroid Build Coastguard Worker
3529*61046927SAndroid Build Coastguard Worker dregs[BIFROST_TEX_DREG_LOD] = bi_emit_texc_lod_cube(b, index);
3530*61046927SAndroid Build Coastguard Worker }
3531*61046927SAndroid Build Coastguard Worker
3532*61046927SAndroid Build Coastguard Worker break;
3533*61046927SAndroid Build Coastguard Worker
3534*61046927SAndroid Build Coastguard Worker case nir_tex_src_bias:
3535*61046927SAndroid Build Coastguard Worker /* Upper 16-bits interpreted as a clamp, leave zero */
3536*61046927SAndroid Build Coastguard Worker assert(desc.op == BIFROST_TEX_OP_TEX);
3537*61046927SAndroid Build Coastguard Worker assert(base == nir_type_float);
3538*61046927SAndroid Build Coastguard Worker assert(sz == 16 || sz == 32);
3539*61046927SAndroid Build Coastguard Worker dregs[BIFROST_TEX_DREG_LOD] = bi_emit_texc_lod_88(b, index, sz == 16);
3540*61046927SAndroid Build Coastguard Worker desc.lod_or_fetch = BIFROST_LOD_MODE_BIAS;
3541*61046927SAndroid Build Coastguard Worker break;
3542*61046927SAndroid Build Coastguard Worker
3543*61046927SAndroid Build Coastguard Worker case nir_tex_src_ms_index:
3544*61046927SAndroid Build Coastguard Worker case nir_tex_src_offset:
3545*61046927SAndroid Build Coastguard Worker if (desc.offset_or_bias_disable)
3546*61046927SAndroid Build Coastguard Worker break;
3547*61046927SAndroid Build Coastguard Worker
3548*61046927SAndroid Build Coastguard Worker dregs[BIFROST_TEX_DREG_OFFSETMS] =
3549*61046927SAndroid Build Coastguard Worker bi_emit_texc_offset_ms_index(b, instr);
3550*61046927SAndroid Build Coastguard Worker if (!bi_is_equiv(dregs[BIFROST_TEX_DREG_OFFSETMS], bi_zero()))
3551*61046927SAndroid Build Coastguard Worker desc.offset_or_bias_disable = true;
3552*61046927SAndroid Build Coastguard Worker break;
3553*61046927SAndroid Build Coastguard Worker
3554*61046927SAndroid Build Coastguard Worker case nir_tex_src_comparator:
3555*61046927SAndroid Build Coastguard Worker dregs[BIFROST_TEX_DREG_SHADOW] = index;
3556*61046927SAndroid Build Coastguard Worker break;
3557*61046927SAndroid Build Coastguard Worker
3558*61046927SAndroid Build Coastguard Worker case nir_tex_src_texture_offset:
3559*61046927SAndroid Build Coastguard Worker dregs[BIFROST_TEX_DREG_TEXTURE] = index;
3560*61046927SAndroid Build Coastguard Worker break;
3561*61046927SAndroid Build Coastguard Worker
3562*61046927SAndroid Build Coastguard Worker case nir_tex_src_sampler_offset:
3563*61046927SAndroid Build Coastguard Worker dregs[BIFROST_TEX_DREG_SAMPLER] = index;
3564*61046927SAndroid Build Coastguard Worker break;
3565*61046927SAndroid Build Coastguard Worker
3566*61046927SAndroid Build Coastguard Worker default:
3567*61046927SAndroid Build Coastguard Worker unreachable("Unhandled src type in texc emit");
3568*61046927SAndroid Build Coastguard Worker }
3569*61046927SAndroid Build Coastguard Worker }
3570*61046927SAndroid Build Coastguard Worker
3571*61046927SAndroid Build Coastguard Worker if (desc.op == BIFROST_TEX_OP_FETCH &&
3572*61046927SAndroid Build Coastguard Worker bi_is_null(dregs[BIFROST_TEX_DREG_LOD])) {
3573*61046927SAndroid Build Coastguard Worker dregs[BIFROST_TEX_DREG_LOD] = bi_emit_texc_lod_cube(b, bi_zero());
3574*61046927SAndroid Build Coastguard Worker }
3575*61046927SAndroid Build Coastguard Worker
3576*61046927SAndroid Build Coastguard Worker /* Choose an index mode */
3577*61046927SAndroid Build Coastguard Worker
3578*61046927SAndroid Build Coastguard Worker bool direct_tex = bi_is_null(dregs[BIFROST_TEX_DREG_TEXTURE]);
3579*61046927SAndroid Build Coastguard Worker bool direct_samp = bi_is_null(dregs[BIFROST_TEX_DREG_SAMPLER]);
3580*61046927SAndroid Build Coastguard Worker bool direct = direct_tex && direct_samp;
3581*61046927SAndroid Build Coastguard Worker
3582*61046927SAndroid Build Coastguard Worker desc.immediate_indices =
3583*61046927SAndroid Build Coastguard Worker direct && (instr->sampler_index < 16 && instr->texture_index < 128);
3584*61046927SAndroid Build Coastguard Worker
3585*61046927SAndroid Build Coastguard Worker if (desc.immediate_indices) {
3586*61046927SAndroid Build Coastguard Worker desc.sampler_index_or_mode = instr->sampler_index;
3587*61046927SAndroid Build Coastguard Worker desc.index = instr->texture_index;
3588*61046927SAndroid Build Coastguard Worker } else {
3589*61046927SAndroid Build Coastguard Worker unsigned mode = 0;
3590*61046927SAndroid Build Coastguard Worker
3591*61046927SAndroid Build Coastguard Worker if (direct && instr->sampler_index == instr->texture_index &&
3592*61046927SAndroid Build Coastguard Worker instr->sampler_index < 128) {
3593*61046927SAndroid Build Coastguard Worker mode = BIFROST_INDEX_IMMEDIATE_SHARED;
3594*61046927SAndroid Build Coastguard Worker desc.index = instr->texture_index;
3595*61046927SAndroid Build Coastguard Worker } else if (direct && instr->sampler_index < 128) {
3596*61046927SAndroid Build Coastguard Worker mode = BIFROST_INDEX_IMMEDIATE_SAMPLER;
3597*61046927SAndroid Build Coastguard Worker desc.index = instr->sampler_index;
3598*61046927SAndroid Build Coastguard Worker dregs[BIFROST_TEX_DREG_TEXTURE] =
3599*61046927SAndroid Build Coastguard Worker bi_mov_i32(b, bi_imm_u32(instr->texture_index));
3600*61046927SAndroid Build Coastguard Worker } else if (direct_tex && instr->texture_index < 128) {
3601*61046927SAndroid Build Coastguard Worker mode = BIFROST_INDEX_IMMEDIATE_TEXTURE;
3602*61046927SAndroid Build Coastguard Worker desc.index = instr->texture_index;
3603*61046927SAndroid Build Coastguard Worker
3604*61046927SAndroid Build Coastguard Worker if (direct_samp) {
3605*61046927SAndroid Build Coastguard Worker dregs[BIFROST_TEX_DREG_SAMPLER] =
3606*61046927SAndroid Build Coastguard Worker bi_mov_i32(b, bi_imm_u32(instr->sampler_index));
3607*61046927SAndroid Build Coastguard Worker }
3608*61046927SAndroid Build Coastguard Worker } else if (direct_samp && instr->sampler_index < 128) {
3609*61046927SAndroid Build Coastguard Worker mode = BIFROST_INDEX_IMMEDIATE_SAMPLER;
3610*61046927SAndroid Build Coastguard Worker desc.index = instr->sampler_index;
3611*61046927SAndroid Build Coastguard Worker
3612*61046927SAndroid Build Coastguard Worker if (direct_tex) {
3613*61046927SAndroid Build Coastguard Worker dregs[BIFROST_TEX_DREG_TEXTURE] =
3614*61046927SAndroid Build Coastguard Worker bi_mov_i32(b, bi_imm_u32(instr->texture_index));
3615*61046927SAndroid Build Coastguard Worker }
3616*61046927SAndroid Build Coastguard Worker } else {
3617*61046927SAndroid Build Coastguard Worker mode = BIFROST_INDEX_REGISTER;
3618*61046927SAndroid Build Coastguard Worker
3619*61046927SAndroid Build Coastguard Worker if (direct_tex) {
3620*61046927SAndroid Build Coastguard Worker dregs[BIFROST_TEX_DREG_TEXTURE] =
3621*61046927SAndroid Build Coastguard Worker bi_mov_i32(b, bi_imm_u32(instr->texture_index));
3622*61046927SAndroid Build Coastguard Worker }
3623*61046927SAndroid Build Coastguard Worker
3624*61046927SAndroid Build Coastguard Worker if (direct_samp) {
3625*61046927SAndroid Build Coastguard Worker dregs[BIFROST_TEX_DREG_SAMPLER] =
3626*61046927SAndroid Build Coastguard Worker bi_mov_i32(b, bi_imm_u32(instr->sampler_index));
3627*61046927SAndroid Build Coastguard Worker }
3628*61046927SAndroid Build Coastguard Worker }
3629*61046927SAndroid Build Coastguard Worker
3630*61046927SAndroid Build Coastguard Worker mode |= (BIFROST_TEXTURE_OPERATION_SINGLE << 2);
3631*61046927SAndroid Build Coastguard Worker desc.sampler_index_or_mode = mode;
3632*61046927SAndroid Build Coastguard Worker }
3633*61046927SAndroid Build Coastguard Worker
3634*61046927SAndroid Build Coastguard Worker /* Allocate staging registers contiguously by compacting the array. */
3635*61046927SAndroid Build Coastguard Worker unsigned sr_count = 0;
3636*61046927SAndroid Build Coastguard Worker
3637*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < ARRAY_SIZE(dregs); ++i) {
3638*61046927SAndroid Build Coastguard Worker if (!bi_is_null(dregs[i]))
3639*61046927SAndroid Build Coastguard Worker dregs[sr_count++] = dregs[i];
3640*61046927SAndroid Build Coastguard Worker }
3641*61046927SAndroid Build Coastguard Worker
3642*61046927SAndroid Build Coastguard Worker unsigned res_size = instr->def.bit_size == 16 ? 2 : 4;
3643*61046927SAndroid Build Coastguard Worker
3644*61046927SAndroid Build Coastguard Worker bi_index sr = sr_count ? bi_temp(b->shader) : bi_null();
3645*61046927SAndroid Build Coastguard Worker bi_index dst = bi_temp(b->shader);
3646*61046927SAndroid Build Coastguard Worker
3647*61046927SAndroid Build Coastguard Worker if (sr_count)
3648*61046927SAndroid Build Coastguard Worker bi_emit_collect_to(b, sr, dregs, sr_count);
3649*61046927SAndroid Build Coastguard Worker
3650*61046927SAndroid Build Coastguard Worker uint32_t desc_u = 0;
3651*61046927SAndroid Build Coastguard Worker memcpy(&desc_u, &desc, sizeof(desc_u));
3652*61046927SAndroid Build Coastguard Worker bi_instr *I =
3653*61046927SAndroid Build Coastguard Worker bi_texc_to(b, dst, sr, cx, cy, bi_imm_u32(desc_u),
3654*61046927SAndroid Build Coastguard Worker !nir_tex_instr_has_implicit_derivative(instr), sr_count, 0);
3655*61046927SAndroid Build Coastguard Worker I->register_format = bi_reg_fmt_for_nir(instr->dest_type);
3656*61046927SAndroid Build Coastguard Worker
3657*61046927SAndroid Build Coastguard Worker bi_index w[4] = {bi_null(), bi_null(), bi_null(), bi_null()};
3658*61046927SAndroid Build Coastguard Worker bi_emit_split_i32(b, w, dst, res_size);
3659*61046927SAndroid Build Coastguard Worker bi_emit_collect_to(b, bi_def_index(&instr->def), w,
3660*61046927SAndroid Build Coastguard Worker DIV_ROUND_UP(instr->def.num_components * res_size, 4));
3661*61046927SAndroid Build Coastguard Worker }
3662*61046927SAndroid Build Coastguard Worker
3663*61046927SAndroid Build Coastguard Worker /* Staging registers required by texturing in the order they appear (Valhall) */
3664*61046927SAndroid Build Coastguard Worker
3665*61046927SAndroid Build Coastguard Worker enum valhall_tex_sreg {
3666*61046927SAndroid Build Coastguard Worker VALHALL_TEX_SREG_X_COORD = 0,
3667*61046927SAndroid Build Coastguard Worker VALHALL_TEX_SREG_Y_COORD = 1,
3668*61046927SAndroid Build Coastguard Worker VALHALL_TEX_SREG_Z_COORD = 2,
3669*61046927SAndroid Build Coastguard Worker VALHALL_TEX_SREG_Y_DELTAS = 3,
3670*61046927SAndroid Build Coastguard Worker VALHALL_TEX_SREG_ARRAY = 4,
3671*61046927SAndroid Build Coastguard Worker VALHALL_TEX_SREG_SHADOW = 5,
3672*61046927SAndroid Build Coastguard Worker VALHALL_TEX_SREG_OFFSETMS = 6,
3673*61046927SAndroid Build Coastguard Worker VALHALL_TEX_SREG_LOD = 7,
3674*61046927SAndroid Build Coastguard Worker VALHALL_TEX_SREG_GRDESC = 8,
3675*61046927SAndroid Build Coastguard Worker VALHALL_TEX_SREG_COUNT,
3676*61046927SAndroid Build Coastguard Worker };
3677*61046927SAndroid Build Coastguard Worker
3678*61046927SAndroid Build Coastguard Worker static void
bi_emit_tex_valhall(bi_builder * b,nir_tex_instr * instr)3679*61046927SAndroid Build Coastguard Worker bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *instr)
3680*61046927SAndroid Build Coastguard Worker {
3681*61046927SAndroid Build Coastguard Worker bool explicit_offset = false;
3682*61046927SAndroid Build Coastguard Worker enum bi_va_lod_mode lod_mode = BI_VA_LOD_MODE_COMPUTED_LOD;
3683*61046927SAndroid Build Coastguard Worker
3684*61046927SAndroid Build Coastguard Worker bool has_lod_mode = (instr->op == nir_texop_tex) ||
3685*61046927SAndroid Build Coastguard Worker (instr->op == nir_texop_txl) ||
3686*61046927SAndroid Build Coastguard Worker (instr->op == nir_texop_txb);
3687*61046927SAndroid Build Coastguard Worker
3688*61046927SAndroid Build Coastguard Worker /* 32-bit indices to be allocated as consecutive staging registers */
3689*61046927SAndroid Build Coastguard Worker bi_index sregs[VALHALL_TEX_SREG_COUNT] = {};
3690*61046927SAndroid Build Coastguard Worker bi_index sampler = bi_imm_u32(instr->sampler_index);
3691*61046927SAndroid Build Coastguard Worker bi_index texture = bi_imm_u32(instr->texture_index);
3692*61046927SAndroid Build Coastguard Worker
3693*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < instr->num_srcs; ++i) {
3694*61046927SAndroid Build Coastguard Worker bi_index index = bi_src_index(&instr->src[i].src);
3695*61046927SAndroid Build Coastguard Worker unsigned sz = nir_src_bit_size(instr->src[i].src);
3696*61046927SAndroid Build Coastguard Worker
3697*61046927SAndroid Build Coastguard Worker switch (instr->src[i].src_type) {
3698*61046927SAndroid Build Coastguard Worker case nir_tex_src_coord: {
3699*61046927SAndroid Build Coastguard Worker unsigned components =
3700*61046927SAndroid Build Coastguard Worker nir_src_num_components(instr->src[i].src) - instr->is_array;
3701*61046927SAndroid Build Coastguard Worker
3702*61046927SAndroid Build Coastguard Worker if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
3703*61046927SAndroid Build Coastguard Worker sregs[VALHALL_TEX_SREG_X_COORD] = bi_emit_texc_cube_coord(
3704*61046927SAndroid Build Coastguard Worker b, index, &sregs[VALHALL_TEX_SREG_Y_COORD]);
3705*61046927SAndroid Build Coastguard Worker } else {
3706*61046927SAndroid Build Coastguard Worker assert(components >= 1 && components <= 3);
3707*61046927SAndroid Build Coastguard Worker
3708*61046927SAndroid Build Coastguard Worker /* Copy XY (for 2D+) or XX (for 1D) */
3709*61046927SAndroid Build Coastguard Worker sregs[VALHALL_TEX_SREG_X_COORD] = index;
3710*61046927SAndroid Build Coastguard Worker
3711*61046927SAndroid Build Coastguard Worker if (components >= 2)
3712*61046927SAndroid Build Coastguard Worker sregs[VALHALL_TEX_SREG_Y_COORD] = bi_extract(b, index, 1);
3713*61046927SAndroid Build Coastguard Worker
3714*61046927SAndroid Build Coastguard Worker if (components == 3)
3715*61046927SAndroid Build Coastguard Worker sregs[VALHALL_TEX_SREG_Z_COORD] = bi_extract(b, index, 2);
3716*61046927SAndroid Build Coastguard Worker }
3717*61046927SAndroid Build Coastguard Worker
3718*61046927SAndroid Build Coastguard Worker if (instr->is_array) {
3719*61046927SAndroid Build Coastguard Worker sregs[VALHALL_TEX_SREG_ARRAY] = bi_extract(b, index, components);
3720*61046927SAndroid Build Coastguard Worker }
3721*61046927SAndroid Build Coastguard Worker
3722*61046927SAndroid Build Coastguard Worker break;
3723*61046927SAndroid Build Coastguard Worker }
3724*61046927SAndroid Build Coastguard Worker
3725*61046927SAndroid Build Coastguard Worker case nir_tex_src_lod:
3726*61046927SAndroid Build Coastguard Worker if (nir_src_is_const(instr->src[i].src) &&
3727*61046927SAndroid Build Coastguard Worker nir_src_as_uint(instr->src[i].src) == 0) {
3728*61046927SAndroid Build Coastguard Worker lod_mode = BI_VA_LOD_MODE_ZERO_LOD;
3729*61046927SAndroid Build Coastguard Worker } else if (has_lod_mode) {
3730*61046927SAndroid Build Coastguard Worker lod_mode = BI_VA_LOD_MODE_EXPLICIT;
3731*61046927SAndroid Build Coastguard Worker
3732*61046927SAndroid Build Coastguard Worker assert(sz == 16 || sz == 32);
3733*61046927SAndroid Build Coastguard Worker sregs[VALHALL_TEX_SREG_LOD] =
3734*61046927SAndroid Build Coastguard Worker bi_emit_texc_lod_88(b, index, sz == 16);
3735*61046927SAndroid Build Coastguard Worker }
3736*61046927SAndroid Build Coastguard Worker break;
3737*61046927SAndroid Build Coastguard Worker
3738*61046927SAndroid Build Coastguard Worker case nir_tex_src_bias:
3739*61046927SAndroid Build Coastguard Worker /* Upper 16-bits interpreted as a clamp, leave zero */
3740*61046927SAndroid Build Coastguard Worker assert(sz == 16 || sz == 32);
3741*61046927SAndroid Build Coastguard Worker sregs[VALHALL_TEX_SREG_LOD] = bi_emit_texc_lod_88(b, index, sz == 16);
3742*61046927SAndroid Build Coastguard Worker
3743*61046927SAndroid Build Coastguard Worker lod_mode = BI_VA_LOD_MODE_COMPUTED_BIAS;
3744*61046927SAndroid Build Coastguard Worker break;
3745*61046927SAndroid Build Coastguard Worker case nir_tex_src_ms_index:
3746*61046927SAndroid Build Coastguard Worker case nir_tex_src_offset:
3747*61046927SAndroid Build Coastguard Worker /* Handled below */
3748*61046927SAndroid Build Coastguard Worker break;
3749*61046927SAndroid Build Coastguard Worker
3750*61046927SAndroid Build Coastguard Worker case nir_tex_src_comparator:
3751*61046927SAndroid Build Coastguard Worker sregs[VALHALL_TEX_SREG_SHADOW] = index;
3752*61046927SAndroid Build Coastguard Worker break;
3753*61046927SAndroid Build Coastguard Worker
3754*61046927SAndroid Build Coastguard Worker case nir_tex_src_texture_offset:
3755*61046927SAndroid Build Coastguard Worker /* This should always be 0 as lower_index_to_offset is expected to be
3756*61046927SAndroid Build Coastguard Worker * set */
3757*61046927SAndroid Build Coastguard Worker assert(instr->texture_index == 0);
3758*61046927SAndroid Build Coastguard Worker texture = index;
3759*61046927SAndroid Build Coastguard Worker break;
3760*61046927SAndroid Build Coastguard Worker
3761*61046927SAndroid Build Coastguard Worker case nir_tex_src_sampler_offset:
3762*61046927SAndroid Build Coastguard Worker /* This should always be 0 as lower_index_to_offset is expected to be
3763*61046927SAndroid Build Coastguard Worker * set */
3764*61046927SAndroid Build Coastguard Worker assert(instr->sampler_index == 0);
3765*61046927SAndroid Build Coastguard Worker sampler = index;
3766*61046927SAndroid Build Coastguard Worker break;
3767*61046927SAndroid Build Coastguard Worker
3768*61046927SAndroid Build Coastguard Worker default:
3769*61046927SAndroid Build Coastguard Worker unreachable("Unhandled src type in tex emit");
3770*61046927SAndroid Build Coastguard Worker }
3771*61046927SAndroid Build Coastguard Worker }
3772*61046927SAndroid Build Coastguard Worker
3773*61046927SAndroid Build Coastguard Worker /* Generate packed offset + ms index + LOD register. These default to
3774*61046927SAndroid Build Coastguard Worker * zero so we only need to encode if these features are actually in use.
3775*61046927SAndroid Build Coastguard Worker */
3776*61046927SAndroid Build Coastguard Worker bi_index offsets = bi_emit_valhall_offsets(b, instr);
3777*61046927SAndroid Build Coastguard Worker
3778*61046927SAndroid Build Coastguard Worker if (!bi_is_equiv(offsets, bi_zero())) {
3779*61046927SAndroid Build Coastguard Worker sregs[VALHALL_TEX_SREG_OFFSETMS] = offsets;
3780*61046927SAndroid Build Coastguard Worker explicit_offset = true;
3781*61046927SAndroid Build Coastguard Worker }
3782*61046927SAndroid Build Coastguard Worker
3783*61046927SAndroid Build Coastguard Worker /* Allocate staging registers contiguously by compacting the array. */
3784*61046927SAndroid Build Coastguard Worker unsigned sr_count = 0;
3785*61046927SAndroid Build Coastguard Worker
3786*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < ARRAY_SIZE(sregs); ++i) {
3787*61046927SAndroid Build Coastguard Worker if (!bi_is_null(sregs[i]))
3788*61046927SAndroid Build Coastguard Worker sregs[sr_count++] = sregs[i];
3789*61046927SAndroid Build Coastguard Worker }
3790*61046927SAndroid Build Coastguard Worker
3791*61046927SAndroid Build Coastguard Worker bi_index idx = sr_count ? bi_temp(b->shader) : bi_null();
3792*61046927SAndroid Build Coastguard Worker
3793*61046927SAndroid Build Coastguard Worker if (sr_count)
3794*61046927SAndroid Build Coastguard Worker bi_make_vec_to(b, idx, sregs, NULL, sr_count, 32);
3795*61046927SAndroid Build Coastguard Worker
3796*61046927SAndroid Build Coastguard Worker bool narrow_indices = va_is_valid_const_narrow_index(texture) &&
3797*61046927SAndroid Build Coastguard Worker va_is_valid_const_narrow_index(sampler);
3798*61046927SAndroid Build Coastguard Worker
3799*61046927SAndroid Build Coastguard Worker bi_index src0;
3800*61046927SAndroid Build Coastguard Worker bi_index src1;
3801*61046927SAndroid Build Coastguard Worker
3802*61046927SAndroid Build Coastguard Worker if (narrow_indices) {
3803*61046927SAndroid Build Coastguard Worker unsigned tex_set =
3804*61046927SAndroid Build Coastguard Worker va_res_fold_table_idx(pan_res_handle_get_table(texture.value));
3805*61046927SAndroid Build Coastguard Worker unsigned sampler_set =
3806*61046927SAndroid Build Coastguard Worker va_res_fold_table_idx(pan_res_handle_get_table(sampler.value));
3807*61046927SAndroid Build Coastguard Worker unsigned texture_index = pan_res_handle_get_index(texture.value);
3808*61046927SAndroid Build Coastguard Worker unsigned sampler_index = pan_res_handle_get_index(sampler.value);
3809*61046927SAndroid Build Coastguard Worker
3810*61046927SAndroid Build Coastguard Worker unsigned packed_handle = (tex_set << 27) | (texture_index << 16) |
3811*61046927SAndroid Build Coastguard Worker (sampler_set << 11) | sampler_index;
3812*61046927SAndroid Build Coastguard Worker
3813*61046927SAndroid Build Coastguard Worker src0 = bi_imm_u32(packed_handle);
3814*61046927SAndroid Build Coastguard Worker
3815*61046927SAndroid Build Coastguard Worker /* TODO: narrow offsetms */
3816*61046927SAndroid Build Coastguard Worker src1 = bi_zero();
3817*61046927SAndroid Build Coastguard Worker } else {
3818*61046927SAndroid Build Coastguard Worker src0 = sampler;
3819*61046927SAndroid Build Coastguard Worker src1 = texture;
3820*61046927SAndroid Build Coastguard Worker }
3821*61046927SAndroid Build Coastguard Worker
3822*61046927SAndroid Build Coastguard Worker /* Only write the components that we actually read */
3823*61046927SAndroid Build Coastguard Worker unsigned mask = nir_def_components_read(&instr->def);
3824*61046927SAndroid Build Coastguard Worker unsigned comps_per_reg = instr->def.bit_size == 16 ? 2 : 1;
3825*61046927SAndroid Build Coastguard Worker unsigned res_size = DIV_ROUND_UP(util_bitcount(mask), comps_per_reg);
3826*61046927SAndroid Build Coastguard Worker
3827*61046927SAndroid Build Coastguard Worker enum bi_register_format regfmt = bi_reg_fmt_for_nir(instr->dest_type);
3828*61046927SAndroid Build Coastguard Worker enum bi_dimension dim = valhall_tex_dimension(instr->sampler_dim);
3829*61046927SAndroid Build Coastguard Worker bi_index dest = bi_temp(b->shader);
3830*61046927SAndroid Build Coastguard Worker
3831*61046927SAndroid Build Coastguard Worker switch (instr->op) {
3832*61046927SAndroid Build Coastguard Worker case nir_texop_tex:
3833*61046927SAndroid Build Coastguard Worker case nir_texop_txl:
3834*61046927SAndroid Build Coastguard Worker case nir_texop_txb:
3835*61046927SAndroid Build Coastguard Worker bi_tex_single_to(b, dest, idx, src0, src1, instr->is_array, dim, regfmt,
3836*61046927SAndroid Build Coastguard Worker instr->is_shadow, explicit_offset, lod_mode,
3837*61046927SAndroid Build Coastguard Worker !narrow_indices, mask, sr_count);
3838*61046927SAndroid Build Coastguard Worker break;
3839*61046927SAndroid Build Coastguard Worker case nir_texop_txf:
3840*61046927SAndroid Build Coastguard Worker case nir_texop_txf_ms:
3841*61046927SAndroid Build Coastguard Worker bi_tex_fetch_to(b, dest, idx, src0, src1, instr->is_array, dim, regfmt,
3842*61046927SAndroid Build Coastguard Worker explicit_offset, !narrow_indices, mask, sr_count);
3843*61046927SAndroid Build Coastguard Worker break;
3844*61046927SAndroid Build Coastguard Worker case nir_texop_tg4:
3845*61046927SAndroid Build Coastguard Worker bi_tex_gather_to(b, dest, idx, src0, src1, instr->is_array, dim,
3846*61046927SAndroid Build Coastguard Worker instr->component, false, regfmt, instr->is_shadow,
3847*61046927SAndroid Build Coastguard Worker explicit_offset, !narrow_indices, mask, sr_count);
3848*61046927SAndroid Build Coastguard Worker break;
3849*61046927SAndroid Build Coastguard Worker default:
3850*61046927SAndroid Build Coastguard Worker unreachable("Unhandled Valhall texture op");
3851*61046927SAndroid Build Coastguard Worker }
3852*61046927SAndroid Build Coastguard Worker
3853*61046927SAndroid Build Coastguard Worker /* The hardware will write only what we read, and it will into
3854*61046927SAndroid Build Coastguard Worker * contiguous registers without gaps (different from Bifrost). NIR
3855*61046927SAndroid Build Coastguard Worker * expects the gaps, so fill in the holes (they'll be copypropped and
3856*61046927SAndroid Build Coastguard Worker * DCE'd away later).
3857*61046927SAndroid Build Coastguard Worker */
3858*61046927SAndroid Build Coastguard Worker bi_index unpacked[4] = {bi_null(), bi_null(), bi_null(), bi_null()};
3859*61046927SAndroid Build Coastguard Worker
3860*61046927SAndroid Build Coastguard Worker bi_emit_cached_split_i32(b, dest, res_size);
3861*61046927SAndroid Build Coastguard Worker
3862*61046927SAndroid Build Coastguard Worker /* Index into the packed component array */
3863*61046927SAndroid Build Coastguard Worker unsigned j = 0;
3864*61046927SAndroid Build Coastguard Worker unsigned comps[4] = {0};
3865*61046927SAndroid Build Coastguard Worker unsigned nr_components = instr->def.num_components;
3866*61046927SAndroid Build Coastguard Worker
3867*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < nr_components; ++i) {
3868*61046927SAndroid Build Coastguard Worker if (mask & BITFIELD_BIT(i)) {
3869*61046927SAndroid Build Coastguard Worker unpacked[i] = dest;
3870*61046927SAndroid Build Coastguard Worker comps[i] = j++;
3871*61046927SAndroid Build Coastguard Worker } else {
3872*61046927SAndroid Build Coastguard Worker unpacked[i] = bi_zero();
3873*61046927SAndroid Build Coastguard Worker }
3874*61046927SAndroid Build Coastguard Worker }
3875*61046927SAndroid Build Coastguard Worker
3876*61046927SAndroid Build Coastguard Worker bi_make_vec_to(b, bi_def_index(&instr->def), unpacked, comps,
3877*61046927SAndroid Build Coastguard Worker instr->def.num_components, instr->def.bit_size);
3878*61046927SAndroid Build Coastguard Worker }
3879*61046927SAndroid Build Coastguard Worker
3880*61046927SAndroid Build Coastguard Worker /* Simple textures ops correspond to NIR tex or txl with LOD = 0 on 2D/cube
3881*61046927SAndroid Build Coastguard Worker * textures with sufficiently small immediate indices. Anything else
3882*61046927SAndroid Build Coastguard Worker * needs a complete texture op. */
3883*61046927SAndroid Build Coastguard Worker
3884*61046927SAndroid Build Coastguard Worker static void
bi_emit_texs(bi_builder * b,nir_tex_instr * instr)3885*61046927SAndroid Build Coastguard Worker bi_emit_texs(bi_builder *b, nir_tex_instr *instr)
3886*61046927SAndroid Build Coastguard Worker {
3887*61046927SAndroid Build Coastguard Worker int coord_idx = nir_tex_instr_src_index(instr, nir_tex_src_coord);
3888*61046927SAndroid Build Coastguard Worker assert(coord_idx >= 0);
3889*61046927SAndroid Build Coastguard Worker bi_index coords = bi_src_index(&instr->src[coord_idx].src);
3890*61046927SAndroid Build Coastguard Worker
3891*61046927SAndroid Build Coastguard Worker if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
3892*61046927SAndroid Build Coastguard Worker bi_index face, s, t;
3893*61046927SAndroid Build Coastguard Worker bi_emit_cube_coord(b, coords, &face, &s, &t);
3894*61046927SAndroid Build Coastguard Worker
3895*61046927SAndroid Build Coastguard Worker bi_texs_cube_to(b, instr->def.bit_size, bi_def_index(&instr->def), s, t,
3896*61046927SAndroid Build Coastguard Worker face, instr->sampler_index, instr->texture_index);
3897*61046927SAndroid Build Coastguard Worker } else {
3898*61046927SAndroid Build Coastguard Worker bi_texs_2d_to(b, instr->def.bit_size, bi_def_index(&instr->def),
3899*61046927SAndroid Build Coastguard Worker bi_extract(b, coords, 0), bi_extract(b, coords, 1),
3900*61046927SAndroid Build Coastguard Worker instr->op != nir_texop_tex, /* zero LOD */
3901*61046927SAndroid Build Coastguard Worker instr->sampler_index, instr->texture_index);
3902*61046927SAndroid Build Coastguard Worker }
3903*61046927SAndroid Build Coastguard Worker
3904*61046927SAndroid Build Coastguard Worker bi_split_def(b, &instr->def);
3905*61046927SAndroid Build Coastguard Worker }
3906*61046927SAndroid Build Coastguard Worker
3907*61046927SAndroid Build Coastguard Worker static bool
bi_is_simple_tex(nir_tex_instr * instr)3908*61046927SAndroid Build Coastguard Worker bi_is_simple_tex(nir_tex_instr *instr)
3909*61046927SAndroid Build Coastguard Worker {
3910*61046927SAndroid Build Coastguard Worker if (instr->op != nir_texop_tex && instr->op != nir_texop_txl)
3911*61046927SAndroid Build Coastguard Worker return false;
3912*61046927SAndroid Build Coastguard Worker
3913*61046927SAndroid Build Coastguard Worker if (instr->dest_type != nir_type_float32 &&
3914*61046927SAndroid Build Coastguard Worker instr->dest_type != nir_type_float16)
3915*61046927SAndroid Build Coastguard Worker return false;
3916*61046927SAndroid Build Coastguard Worker
3917*61046927SAndroid Build Coastguard Worker if (instr->is_shadow || instr->is_array)
3918*61046927SAndroid Build Coastguard Worker return false;
3919*61046927SAndroid Build Coastguard Worker
3920*61046927SAndroid Build Coastguard Worker switch (instr->sampler_dim) {
3921*61046927SAndroid Build Coastguard Worker case GLSL_SAMPLER_DIM_2D:
3922*61046927SAndroid Build Coastguard Worker case GLSL_SAMPLER_DIM_EXTERNAL:
3923*61046927SAndroid Build Coastguard Worker case GLSL_SAMPLER_DIM_RECT:
3924*61046927SAndroid Build Coastguard Worker break;
3925*61046927SAndroid Build Coastguard Worker
3926*61046927SAndroid Build Coastguard Worker case GLSL_SAMPLER_DIM_CUBE:
3927*61046927SAndroid Build Coastguard Worker /* LOD can't be specified with TEXS_CUBE */
3928*61046927SAndroid Build Coastguard Worker if (instr->op == nir_texop_txl)
3929*61046927SAndroid Build Coastguard Worker return false;
3930*61046927SAndroid Build Coastguard Worker break;
3931*61046927SAndroid Build Coastguard Worker
3932*61046927SAndroid Build Coastguard Worker default:
3933*61046927SAndroid Build Coastguard Worker return false;
3934*61046927SAndroid Build Coastguard Worker }
3935*61046927SAndroid Build Coastguard Worker
3936*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < instr->num_srcs; ++i) {
3937*61046927SAndroid Build Coastguard Worker if (instr->src[i].src_type != nir_tex_src_lod &&
3938*61046927SAndroid Build Coastguard Worker instr->src[i].src_type != nir_tex_src_coord)
3939*61046927SAndroid Build Coastguard Worker return false;
3940*61046927SAndroid Build Coastguard Worker }
3941*61046927SAndroid Build Coastguard Worker
3942*61046927SAndroid Build Coastguard Worker /* Indices need to fit in provided bits */
3943*61046927SAndroid Build Coastguard Worker unsigned idx_bits = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE ? 2 : 3;
3944*61046927SAndroid Build Coastguard Worker if (MAX2(instr->sampler_index, instr->texture_index) >= (1 << idx_bits))
3945*61046927SAndroid Build Coastguard Worker return false;
3946*61046927SAndroid Build Coastguard Worker
3947*61046927SAndroid Build Coastguard Worker int lod_idx = nir_tex_instr_src_index(instr, nir_tex_src_lod);
3948*61046927SAndroid Build Coastguard Worker if (lod_idx < 0)
3949*61046927SAndroid Build Coastguard Worker return true;
3950*61046927SAndroid Build Coastguard Worker
3951*61046927SAndroid Build Coastguard Worker nir_src lod = instr->src[lod_idx].src;
3952*61046927SAndroid Build Coastguard Worker return nir_src_is_const(lod) && nir_src_as_uint(lod) == 0;
3953*61046927SAndroid Build Coastguard Worker }
3954*61046927SAndroid Build Coastguard Worker
3955*61046927SAndroid Build Coastguard Worker static void
bi_emit_tex(bi_builder * b,nir_tex_instr * instr)3956*61046927SAndroid Build Coastguard Worker bi_emit_tex(bi_builder *b, nir_tex_instr *instr)
3957*61046927SAndroid Build Coastguard Worker {
3958*61046927SAndroid Build Coastguard Worker /* If txf is used, we assume there is a valid sampler bound at index 0. Use
3959*61046927SAndroid Build Coastguard Worker * it for txf operations, since there may be no other valid samplers. This is
3960*61046927SAndroid Build Coastguard Worker * a workaround: txf does not require a sampler in NIR (so sampler_index is
3961*61046927SAndroid Build Coastguard Worker * undefined) but we need one in the hardware. This is ABI with the driver.
3962*61046927SAndroid Build Coastguard Worker *
3963*61046927SAndroid Build Coastguard Worker * On Valhall, as the descriptor table is encoded in the index, this should
3964*61046927SAndroid Build Coastguard Worker * be handled by the driver.
3965*61046927SAndroid Build Coastguard Worker */
3966*61046927SAndroid Build Coastguard Worker if (!nir_tex_instr_need_sampler(instr) && b->shader->arch < 9)
3967*61046927SAndroid Build Coastguard Worker instr->sampler_index = 0;
3968*61046927SAndroid Build Coastguard Worker
3969*61046927SAndroid Build Coastguard Worker if (b->shader->arch >= 9)
3970*61046927SAndroid Build Coastguard Worker bi_emit_tex_valhall(b, instr);
3971*61046927SAndroid Build Coastguard Worker else if (bi_is_simple_tex(instr))
3972*61046927SAndroid Build Coastguard Worker bi_emit_texs(b, instr);
3973*61046927SAndroid Build Coastguard Worker else
3974*61046927SAndroid Build Coastguard Worker bi_emit_texc(b, instr);
3975*61046927SAndroid Build Coastguard Worker }
3976*61046927SAndroid Build Coastguard Worker
3977*61046927SAndroid Build Coastguard Worker static void
bi_emit_phi(bi_builder * b,nir_phi_instr * instr)3978*61046927SAndroid Build Coastguard Worker bi_emit_phi(bi_builder *b, nir_phi_instr *instr)
3979*61046927SAndroid Build Coastguard Worker {
3980*61046927SAndroid Build Coastguard Worker unsigned nr_srcs = exec_list_length(&instr->srcs);
3981*61046927SAndroid Build Coastguard Worker bi_instr *I = bi_phi_to(b, bi_def_index(&instr->def), nr_srcs);
3982*61046927SAndroid Build Coastguard Worker
3983*61046927SAndroid Build Coastguard Worker /* Deferred */
3984*61046927SAndroid Build Coastguard Worker I->phi = instr;
3985*61046927SAndroid Build Coastguard Worker }
3986*61046927SAndroid Build Coastguard Worker
3987*61046927SAndroid Build Coastguard Worker /* Look up the AGX block corresponding to a given NIR block. Used when
3988*61046927SAndroid Build Coastguard Worker * translating phi nodes after emitting all blocks.
3989*61046927SAndroid Build Coastguard Worker */
3990*61046927SAndroid Build Coastguard Worker static bi_block *
bi_from_nir_block(bi_context * ctx,nir_block * block)3991*61046927SAndroid Build Coastguard Worker bi_from_nir_block(bi_context *ctx, nir_block *block)
3992*61046927SAndroid Build Coastguard Worker {
3993*61046927SAndroid Build Coastguard Worker return ctx->indexed_nir_blocks[block->index];
3994*61046927SAndroid Build Coastguard Worker }
3995*61046927SAndroid Build Coastguard Worker
3996*61046927SAndroid Build Coastguard Worker static void
bi_emit_phi_deferred(bi_context * ctx,bi_block * block,bi_instr * I)3997*61046927SAndroid Build Coastguard Worker bi_emit_phi_deferred(bi_context *ctx, bi_block *block, bi_instr *I)
3998*61046927SAndroid Build Coastguard Worker {
3999*61046927SAndroid Build Coastguard Worker nir_phi_instr *phi = I->phi;
4000*61046927SAndroid Build Coastguard Worker
4001*61046927SAndroid Build Coastguard Worker /* Guaranteed by lower_phis_to_scalar */
4002*61046927SAndroid Build Coastguard Worker assert(phi->def.num_components == 1);
4003*61046927SAndroid Build Coastguard Worker
4004*61046927SAndroid Build Coastguard Worker nir_foreach_phi_src(src, phi) {
4005*61046927SAndroid Build Coastguard Worker bi_block *pred = bi_from_nir_block(ctx, src->pred);
4006*61046927SAndroid Build Coastguard Worker unsigned i = bi_predecessor_index(block, pred);
4007*61046927SAndroid Build Coastguard Worker assert(i < I->nr_srcs);
4008*61046927SAndroid Build Coastguard Worker
4009*61046927SAndroid Build Coastguard Worker I->src[i] = bi_src_index(&src->src);
4010*61046927SAndroid Build Coastguard Worker }
4011*61046927SAndroid Build Coastguard Worker
4012*61046927SAndroid Build Coastguard Worker I->phi = NULL;
4013*61046927SAndroid Build Coastguard Worker }
4014*61046927SAndroid Build Coastguard Worker
4015*61046927SAndroid Build Coastguard Worker static void
bi_emit_phis_deferred(bi_context * ctx)4016*61046927SAndroid Build Coastguard Worker bi_emit_phis_deferred(bi_context *ctx)
4017*61046927SAndroid Build Coastguard Worker {
4018*61046927SAndroid Build Coastguard Worker bi_foreach_block(ctx, block) {
4019*61046927SAndroid Build Coastguard Worker bi_foreach_instr_in_block(block, I) {
4020*61046927SAndroid Build Coastguard Worker if (I->op == BI_OPCODE_PHI)
4021*61046927SAndroid Build Coastguard Worker bi_emit_phi_deferred(ctx, block, I);
4022*61046927SAndroid Build Coastguard Worker }
4023*61046927SAndroid Build Coastguard Worker }
4024*61046927SAndroid Build Coastguard Worker }
4025*61046927SAndroid Build Coastguard Worker
4026*61046927SAndroid Build Coastguard Worker static void
bi_emit_instr(bi_builder * b,struct nir_instr * instr)4027*61046927SAndroid Build Coastguard Worker bi_emit_instr(bi_builder *b, struct nir_instr *instr)
4028*61046927SAndroid Build Coastguard Worker {
4029*61046927SAndroid Build Coastguard Worker switch (instr->type) {
4030*61046927SAndroid Build Coastguard Worker case nir_instr_type_load_const:
4031*61046927SAndroid Build Coastguard Worker bi_emit_load_const(b, nir_instr_as_load_const(instr));
4032*61046927SAndroid Build Coastguard Worker break;
4033*61046927SAndroid Build Coastguard Worker
4034*61046927SAndroid Build Coastguard Worker case nir_instr_type_intrinsic:
4035*61046927SAndroid Build Coastguard Worker bi_emit_intrinsic(b, nir_instr_as_intrinsic(instr));
4036*61046927SAndroid Build Coastguard Worker break;
4037*61046927SAndroid Build Coastguard Worker
4038*61046927SAndroid Build Coastguard Worker case nir_instr_type_alu:
4039*61046927SAndroid Build Coastguard Worker bi_emit_alu(b, nir_instr_as_alu(instr));
4040*61046927SAndroid Build Coastguard Worker break;
4041*61046927SAndroid Build Coastguard Worker
4042*61046927SAndroid Build Coastguard Worker case nir_instr_type_tex:
4043*61046927SAndroid Build Coastguard Worker bi_emit_tex(b, nir_instr_as_tex(instr));
4044*61046927SAndroid Build Coastguard Worker break;
4045*61046927SAndroid Build Coastguard Worker
4046*61046927SAndroid Build Coastguard Worker case nir_instr_type_jump:
4047*61046927SAndroid Build Coastguard Worker bi_emit_jump(b, nir_instr_as_jump(instr));
4048*61046927SAndroid Build Coastguard Worker break;
4049*61046927SAndroid Build Coastguard Worker
4050*61046927SAndroid Build Coastguard Worker case nir_instr_type_phi:
4051*61046927SAndroid Build Coastguard Worker bi_emit_phi(b, nir_instr_as_phi(instr));
4052*61046927SAndroid Build Coastguard Worker break;
4053*61046927SAndroid Build Coastguard Worker
4054*61046927SAndroid Build Coastguard Worker default:
4055*61046927SAndroid Build Coastguard Worker unreachable("should've been lowered");
4056*61046927SAndroid Build Coastguard Worker }
4057*61046927SAndroid Build Coastguard Worker }
4058*61046927SAndroid Build Coastguard Worker
4059*61046927SAndroid Build Coastguard Worker static bi_block *
create_empty_block(bi_context * ctx)4060*61046927SAndroid Build Coastguard Worker create_empty_block(bi_context *ctx)
4061*61046927SAndroid Build Coastguard Worker {
4062*61046927SAndroid Build Coastguard Worker bi_block *blk = rzalloc(ctx, bi_block);
4063*61046927SAndroid Build Coastguard Worker
4064*61046927SAndroid Build Coastguard Worker util_dynarray_init(&blk->predecessors, blk);
4065*61046927SAndroid Build Coastguard Worker
4066*61046927SAndroid Build Coastguard Worker return blk;
4067*61046927SAndroid Build Coastguard Worker }
4068*61046927SAndroid Build Coastguard Worker
4069*61046927SAndroid Build Coastguard Worker static bi_block *
emit_block(bi_context * ctx,nir_block * block)4070*61046927SAndroid Build Coastguard Worker emit_block(bi_context *ctx, nir_block *block)
4071*61046927SAndroid Build Coastguard Worker {
4072*61046927SAndroid Build Coastguard Worker if (ctx->after_block) {
4073*61046927SAndroid Build Coastguard Worker ctx->current_block = ctx->after_block;
4074*61046927SAndroid Build Coastguard Worker ctx->after_block = NULL;
4075*61046927SAndroid Build Coastguard Worker } else {
4076*61046927SAndroid Build Coastguard Worker ctx->current_block = create_empty_block(ctx);
4077*61046927SAndroid Build Coastguard Worker }
4078*61046927SAndroid Build Coastguard Worker
4079*61046927SAndroid Build Coastguard Worker list_addtail(&ctx->current_block->link, &ctx->blocks);
4080*61046927SAndroid Build Coastguard Worker list_inithead(&ctx->current_block->instructions);
4081*61046927SAndroid Build Coastguard Worker
4082*61046927SAndroid Build Coastguard Worker bi_builder _b = bi_init_builder(ctx, bi_after_block(ctx->current_block));
4083*61046927SAndroid Build Coastguard Worker
4084*61046927SAndroid Build Coastguard Worker ctx->indexed_nir_blocks[block->index] = ctx->current_block;
4085*61046927SAndroid Build Coastguard Worker
4086*61046927SAndroid Build Coastguard Worker nir_foreach_instr(instr, block) {
4087*61046927SAndroid Build Coastguard Worker bi_emit_instr(&_b, instr);
4088*61046927SAndroid Build Coastguard Worker }
4089*61046927SAndroid Build Coastguard Worker
4090*61046927SAndroid Build Coastguard Worker return ctx->current_block;
4091*61046927SAndroid Build Coastguard Worker }
4092*61046927SAndroid Build Coastguard Worker
4093*61046927SAndroid Build Coastguard Worker static void
emit_if(bi_context * ctx,nir_if * nif)4094*61046927SAndroid Build Coastguard Worker emit_if(bi_context *ctx, nir_if *nif)
4095*61046927SAndroid Build Coastguard Worker {
4096*61046927SAndroid Build Coastguard Worker bi_block *before_block = ctx->current_block;
4097*61046927SAndroid Build Coastguard Worker
4098*61046927SAndroid Build Coastguard Worker /* Speculatively emit the branch, but we can't fill it in until later */
4099*61046927SAndroid Build Coastguard Worker bi_builder _b = bi_init_builder(ctx, bi_after_block(ctx->current_block));
4100*61046927SAndroid Build Coastguard Worker bi_instr *then_branch =
4101*61046927SAndroid Build Coastguard Worker bi_branchz_i16(&_b, bi_half(bi_src_index(&nif->condition), false),
4102*61046927SAndroid Build Coastguard Worker bi_zero(), BI_CMPF_EQ);
4103*61046927SAndroid Build Coastguard Worker
4104*61046927SAndroid Build Coastguard Worker /* Emit the two subblocks. */
4105*61046927SAndroid Build Coastguard Worker bi_block *then_block = emit_cf_list(ctx, &nif->then_list);
4106*61046927SAndroid Build Coastguard Worker bi_block *end_then_block = ctx->current_block;
4107*61046927SAndroid Build Coastguard Worker
4108*61046927SAndroid Build Coastguard Worker /* Emit second block */
4109*61046927SAndroid Build Coastguard Worker
4110*61046927SAndroid Build Coastguard Worker bi_block *else_block = emit_cf_list(ctx, &nif->else_list);
4111*61046927SAndroid Build Coastguard Worker bi_block *end_else_block = ctx->current_block;
4112*61046927SAndroid Build Coastguard Worker ctx->after_block = create_empty_block(ctx);
4113*61046927SAndroid Build Coastguard Worker
4114*61046927SAndroid Build Coastguard Worker /* Now that we have the subblocks emitted, fix up the branches */
4115*61046927SAndroid Build Coastguard Worker
4116*61046927SAndroid Build Coastguard Worker assert(then_block);
4117*61046927SAndroid Build Coastguard Worker assert(else_block);
4118*61046927SAndroid Build Coastguard Worker
4119*61046927SAndroid Build Coastguard Worker then_branch->branch_target = else_block;
4120*61046927SAndroid Build Coastguard Worker
4121*61046927SAndroid Build Coastguard Worker /* Emit a jump from the end of the then block to the end of the else */
4122*61046927SAndroid Build Coastguard Worker _b.cursor = bi_after_block(end_then_block);
4123*61046927SAndroid Build Coastguard Worker bi_instr *then_exit = bi_jump(&_b, bi_zero());
4124*61046927SAndroid Build Coastguard Worker then_exit->branch_target = ctx->after_block;
4125*61046927SAndroid Build Coastguard Worker
4126*61046927SAndroid Build Coastguard Worker bi_block_add_successor(end_then_block, then_exit->branch_target);
4127*61046927SAndroid Build Coastguard Worker bi_block_add_successor(end_else_block, ctx->after_block); /* fallthrough */
4128*61046927SAndroid Build Coastguard Worker
4129*61046927SAndroid Build Coastguard Worker bi_block_add_successor(before_block,
4130*61046927SAndroid Build Coastguard Worker then_branch->branch_target); /* then_branch */
4131*61046927SAndroid Build Coastguard Worker bi_block_add_successor(before_block, then_block); /* fallthrough */
4132*61046927SAndroid Build Coastguard Worker }
4133*61046927SAndroid Build Coastguard Worker
4134*61046927SAndroid Build Coastguard Worker static void
emit_loop(bi_context * ctx,nir_loop * nloop)4135*61046927SAndroid Build Coastguard Worker emit_loop(bi_context *ctx, nir_loop *nloop)
4136*61046927SAndroid Build Coastguard Worker {
4137*61046927SAndroid Build Coastguard Worker assert(!nir_loop_has_continue_construct(nloop));
4138*61046927SAndroid Build Coastguard Worker
4139*61046927SAndroid Build Coastguard Worker /* Remember where we are */
4140*61046927SAndroid Build Coastguard Worker bi_block *start_block = ctx->current_block;
4141*61046927SAndroid Build Coastguard Worker
4142*61046927SAndroid Build Coastguard Worker bi_block *saved_break = ctx->break_block;
4143*61046927SAndroid Build Coastguard Worker bi_block *saved_continue = ctx->continue_block;
4144*61046927SAndroid Build Coastguard Worker
4145*61046927SAndroid Build Coastguard Worker ctx->continue_block = create_empty_block(ctx);
4146*61046927SAndroid Build Coastguard Worker ctx->break_block = create_empty_block(ctx);
4147*61046927SAndroid Build Coastguard Worker ctx->after_block = ctx->continue_block;
4148*61046927SAndroid Build Coastguard Worker ctx->after_block->loop_header = true;
4149*61046927SAndroid Build Coastguard Worker
4150*61046927SAndroid Build Coastguard Worker /* Emit the body itself */
4151*61046927SAndroid Build Coastguard Worker emit_cf_list(ctx, &nloop->body);
4152*61046927SAndroid Build Coastguard Worker
4153*61046927SAndroid Build Coastguard Worker /* Branch back to loop back */
4154*61046927SAndroid Build Coastguard Worker bi_builder _b = bi_init_builder(ctx, bi_after_block(ctx->current_block));
4155*61046927SAndroid Build Coastguard Worker bi_instr *I = bi_jump(&_b, bi_zero());
4156*61046927SAndroid Build Coastguard Worker I->branch_target = ctx->continue_block;
4157*61046927SAndroid Build Coastguard Worker bi_block_add_successor(start_block, ctx->continue_block);
4158*61046927SAndroid Build Coastguard Worker bi_block_add_successor(ctx->current_block, ctx->continue_block);
4159*61046927SAndroid Build Coastguard Worker
4160*61046927SAndroid Build Coastguard Worker ctx->after_block = ctx->break_block;
4161*61046927SAndroid Build Coastguard Worker
4162*61046927SAndroid Build Coastguard Worker /* Pop off */
4163*61046927SAndroid Build Coastguard Worker ctx->break_block = saved_break;
4164*61046927SAndroid Build Coastguard Worker ctx->continue_block = saved_continue;
4165*61046927SAndroid Build Coastguard Worker ++ctx->loop_count;
4166*61046927SAndroid Build Coastguard Worker }
4167*61046927SAndroid Build Coastguard Worker
4168*61046927SAndroid Build Coastguard Worker static bi_block *
emit_cf_list(bi_context * ctx,struct exec_list * list)4169*61046927SAndroid Build Coastguard Worker emit_cf_list(bi_context *ctx, struct exec_list *list)
4170*61046927SAndroid Build Coastguard Worker {
4171*61046927SAndroid Build Coastguard Worker bi_block *start_block = NULL;
4172*61046927SAndroid Build Coastguard Worker
4173*61046927SAndroid Build Coastguard Worker foreach_list_typed(nir_cf_node, node, node, list) {
4174*61046927SAndroid Build Coastguard Worker switch (node->type) {
4175*61046927SAndroid Build Coastguard Worker case nir_cf_node_block: {
4176*61046927SAndroid Build Coastguard Worker bi_block *block = emit_block(ctx, nir_cf_node_as_block(node));
4177*61046927SAndroid Build Coastguard Worker
4178*61046927SAndroid Build Coastguard Worker if (!start_block)
4179*61046927SAndroid Build Coastguard Worker start_block = block;
4180*61046927SAndroid Build Coastguard Worker
4181*61046927SAndroid Build Coastguard Worker break;
4182*61046927SAndroid Build Coastguard Worker }
4183*61046927SAndroid Build Coastguard Worker
4184*61046927SAndroid Build Coastguard Worker case nir_cf_node_if:
4185*61046927SAndroid Build Coastguard Worker emit_if(ctx, nir_cf_node_as_if(node));
4186*61046927SAndroid Build Coastguard Worker break;
4187*61046927SAndroid Build Coastguard Worker
4188*61046927SAndroid Build Coastguard Worker case nir_cf_node_loop:
4189*61046927SAndroid Build Coastguard Worker emit_loop(ctx, nir_cf_node_as_loop(node));
4190*61046927SAndroid Build Coastguard Worker break;
4191*61046927SAndroid Build Coastguard Worker
4192*61046927SAndroid Build Coastguard Worker default:
4193*61046927SAndroid Build Coastguard Worker unreachable("Unknown control flow");
4194*61046927SAndroid Build Coastguard Worker }
4195*61046927SAndroid Build Coastguard Worker }
4196*61046927SAndroid Build Coastguard Worker
4197*61046927SAndroid Build Coastguard Worker return start_block;
4198*61046927SAndroid Build Coastguard Worker }
4199*61046927SAndroid Build Coastguard Worker
4200*61046927SAndroid Build Coastguard Worker /* shader-db stuff */
4201*61046927SAndroid Build Coastguard Worker
4202*61046927SAndroid Build Coastguard Worker struct bi_stats {
4203*61046927SAndroid Build Coastguard Worker unsigned nr_clauses, nr_tuples, nr_ins;
4204*61046927SAndroid Build Coastguard Worker unsigned nr_arith, nr_texture, nr_varying, nr_ldst;
4205*61046927SAndroid Build Coastguard Worker };
4206*61046927SAndroid Build Coastguard Worker
4207*61046927SAndroid Build Coastguard Worker static void
bi_count_tuple_stats(bi_clause * clause,bi_tuple * tuple,struct bi_stats * stats)4208*61046927SAndroid Build Coastguard Worker bi_count_tuple_stats(bi_clause *clause, bi_tuple *tuple, struct bi_stats *stats)
4209*61046927SAndroid Build Coastguard Worker {
4210*61046927SAndroid Build Coastguard Worker /* Count instructions */
4211*61046927SAndroid Build Coastguard Worker stats->nr_ins += (tuple->fma ? 1 : 0) + (tuple->add ? 1 : 0);
4212*61046927SAndroid Build Coastguard Worker
4213*61046927SAndroid Build Coastguard Worker /* Non-message passing tuples are always arithmetic */
4214*61046927SAndroid Build Coastguard Worker if (tuple->add != clause->message) {
4215*61046927SAndroid Build Coastguard Worker stats->nr_arith++;
4216*61046927SAndroid Build Coastguard Worker return;
4217*61046927SAndroid Build Coastguard Worker }
4218*61046927SAndroid Build Coastguard Worker
4219*61046927SAndroid Build Coastguard Worker /* Message + FMA we'll count as arithmetic _and_ message */
4220*61046927SAndroid Build Coastguard Worker if (tuple->fma)
4221*61046927SAndroid Build Coastguard Worker stats->nr_arith++;
4222*61046927SAndroid Build Coastguard Worker
4223*61046927SAndroid Build Coastguard Worker switch (clause->message_type) {
4224*61046927SAndroid Build Coastguard Worker case BIFROST_MESSAGE_VARYING:
4225*61046927SAndroid Build Coastguard Worker /* Check components interpolated */
4226*61046927SAndroid Build Coastguard Worker stats->nr_varying +=
4227*61046927SAndroid Build Coastguard Worker (clause->message->vecsize + 1) *
4228*61046927SAndroid Build Coastguard Worker (bi_is_regfmt_16(clause->message->register_format) ? 1 : 2);
4229*61046927SAndroid Build Coastguard Worker break;
4230*61046927SAndroid Build Coastguard Worker
4231*61046927SAndroid Build Coastguard Worker case BIFROST_MESSAGE_VARTEX:
4232*61046927SAndroid Build Coastguard Worker /* 2 coordinates, fp32 each */
4233*61046927SAndroid Build Coastguard Worker stats->nr_varying += (2 * 2);
4234*61046927SAndroid Build Coastguard Worker FALLTHROUGH;
4235*61046927SAndroid Build Coastguard Worker case BIFROST_MESSAGE_TEX:
4236*61046927SAndroid Build Coastguard Worker stats->nr_texture++;
4237*61046927SAndroid Build Coastguard Worker break;
4238*61046927SAndroid Build Coastguard Worker
4239*61046927SAndroid Build Coastguard Worker case BIFROST_MESSAGE_ATTRIBUTE:
4240*61046927SAndroid Build Coastguard Worker case BIFROST_MESSAGE_LOAD:
4241*61046927SAndroid Build Coastguard Worker case BIFROST_MESSAGE_STORE:
4242*61046927SAndroid Build Coastguard Worker case BIFROST_MESSAGE_ATOMIC:
4243*61046927SAndroid Build Coastguard Worker stats->nr_ldst++;
4244*61046927SAndroid Build Coastguard Worker break;
4245*61046927SAndroid Build Coastguard Worker
4246*61046927SAndroid Build Coastguard Worker case BIFROST_MESSAGE_NONE:
4247*61046927SAndroid Build Coastguard Worker case BIFROST_MESSAGE_BARRIER:
4248*61046927SAndroid Build Coastguard Worker case BIFROST_MESSAGE_BLEND:
4249*61046927SAndroid Build Coastguard Worker case BIFROST_MESSAGE_TILE:
4250*61046927SAndroid Build Coastguard Worker case BIFROST_MESSAGE_Z_STENCIL:
4251*61046927SAndroid Build Coastguard Worker case BIFROST_MESSAGE_ATEST:
4252*61046927SAndroid Build Coastguard Worker case BIFROST_MESSAGE_JOB:
4253*61046927SAndroid Build Coastguard Worker case BIFROST_MESSAGE_64BIT:
4254*61046927SAndroid Build Coastguard Worker /* Nothing to do */
4255*61046927SAndroid Build Coastguard Worker break;
4256*61046927SAndroid Build Coastguard Worker };
4257*61046927SAndroid Build Coastguard Worker }
4258*61046927SAndroid Build Coastguard Worker
4259*61046927SAndroid Build Coastguard Worker /*
4260*61046927SAndroid Build Coastguard Worker * v7 allows preloading LD_VAR or VAR_TEX messages that must complete before the
4261*61046927SAndroid Build Coastguard Worker * shader completes. These costs are not accounted for in the general cycle
4262*61046927SAndroid Build Coastguard Worker * counts, so this function calculates the effective cost of these messages, as
4263*61046927SAndroid Build Coastguard Worker * if they were executed by shader code.
4264*61046927SAndroid Build Coastguard Worker */
4265*61046927SAndroid Build Coastguard Worker static unsigned
bi_count_preload_cost(bi_context * ctx)4266*61046927SAndroid Build Coastguard Worker bi_count_preload_cost(bi_context *ctx)
4267*61046927SAndroid Build Coastguard Worker {
4268*61046927SAndroid Build Coastguard Worker /* Units: 1/16 of a normalized cycle, assuming that we may interpolate
4269*61046927SAndroid Build Coastguard Worker * 16 fp16 varying components per cycle or fetch two texels per cycle.
4270*61046927SAndroid Build Coastguard Worker */
4271*61046927SAndroid Build Coastguard Worker unsigned cost = 0;
4272*61046927SAndroid Build Coastguard Worker
4273*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < ARRAY_SIZE(ctx->info.bifrost->messages); ++i) {
4274*61046927SAndroid Build Coastguard Worker struct bifrost_message_preload msg = ctx->info.bifrost->messages[i];
4275*61046927SAndroid Build Coastguard Worker
4276*61046927SAndroid Build Coastguard Worker if (msg.enabled && msg.texture) {
4277*61046927SAndroid Build Coastguard Worker /* 2 coordinate, 2 half-words each, plus texture */
4278*61046927SAndroid Build Coastguard Worker cost += 12;
4279*61046927SAndroid Build Coastguard Worker } else if (msg.enabled) {
4280*61046927SAndroid Build Coastguard Worker cost += (msg.num_components * (msg.fp16 ? 1 : 2));
4281*61046927SAndroid Build Coastguard Worker }
4282*61046927SAndroid Build Coastguard Worker }
4283*61046927SAndroid Build Coastguard Worker
4284*61046927SAndroid Build Coastguard Worker return cost;
4285*61046927SAndroid Build Coastguard Worker }
4286*61046927SAndroid Build Coastguard Worker
4287*61046927SAndroid Build Coastguard Worker static const char *
bi_shader_stage_name(bi_context * ctx)4288*61046927SAndroid Build Coastguard Worker bi_shader_stage_name(bi_context *ctx)
4289*61046927SAndroid Build Coastguard Worker {
4290*61046927SAndroid Build Coastguard Worker if (ctx->idvs == BI_IDVS_VARYING)
4291*61046927SAndroid Build Coastguard Worker return "MESA_SHADER_VARYING";
4292*61046927SAndroid Build Coastguard Worker else if (ctx->idvs == BI_IDVS_POSITION)
4293*61046927SAndroid Build Coastguard Worker return "MESA_SHADER_POSITION";
4294*61046927SAndroid Build Coastguard Worker else if (ctx->inputs->is_blend)
4295*61046927SAndroid Build Coastguard Worker return "MESA_SHADER_BLEND";
4296*61046927SAndroid Build Coastguard Worker else
4297*61046927SAndroid Build Coastguard Worker return gl_shader_stage_name(ctx->stage);
4298*61046927SAndroid Build Coastguard Worker }
4299*61046927SAndroid Build Coastguard Worker
4300*61046927SAndroid Build Coastguard Worker static char *
bi_print_stats(bi_context * ctx,unsigned size)4301*61046927SAndroid Build Coastguard Worker bi_print_stats(bi_context *ctx, unsigned size)
4302*61046927SAndroid Build Coastguard Worker {
4303*61046927SAndroid Build Coastguard Worker struct bi_stats stats = {0};
4304*61046927SAndroid Build Coastguard Worker
4305*61046927SAndroid Build Coastguard Worker /* Count instructions, clauses, and tuples. Also attempt to construct
4306*61046927SAndroid Build Coastguard Worker * normalized execution engine cycle counts, using the following ratio:
4307*61046927SAndroid Build Coastguard Worker *
4308*61046927SAndroid Build Coastguard Worker * 24 arith tuples/cycle
4309*61046927SAndroid Build Coastguard Worker * 2 texture messages/cycle
4310*61046927SAndroid Build Coastguard Worker * 16 x 16-bit varying channels interpolated/cycle
4311*61046927SAndroid Build Coastguard Worker * 1 load store message/cycle
4312*61046927SAndroid Build Coastguard Worker *
4313*61046927SAndroid Build Coastguard Worker * These numbers seem to match Arm Mobile Studio's heuristic. The real
4314*61046927SAndroid Build Coastguard Worker * cycle counts are surely more complicated.
4315*61046927SAndroid Build Coastguard Worker */
4316*61046927SAndroid Build Coastguard Worker
4317*61046927SAndroid Build Coastguard Worker bi_foreach_block(ctx, block) {
4318*61046927SAndroid Build Coastguard Worker bi_foreach_clause_in_block(block, clause) {
4319*61046927SAndroid Build Coastguard Worker stats.nr_clauses++;
4320*61046927SAndroid Build Coastguard Worker stats.nr_tuples += clause->tuple_count;
4321*61046927SAndroid Build Coastguard Worker
4322*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < clause->tuple_count; ++i)
4323*61046927SAndroid Build Coastguard Worker bi_count_tuple_stats(clause, &clause->tuples[i], &stats);
4324*61046927SAndroid Build Coastguard Worker }
4325*61046927SAndroid Build Coastguard Worker }
4326*61046927SAndroid Build Coastguard Worker
4327*61046927SAndroid Build Coastguard Worker float cycles_arith = ((float)stats.nr_arith) / 24.0;
4328*61046927SAndroid Build Coastguard Worker float cycles_texture = ((float)stats.nr_texture) / 2.0;
4329*61046927SAndroid Build Coastguard Worker float cycles_varying = ((float)stats.nr_varying) / 16.0;
4330*61046927SAndroid Build Coastguard Worker float cycles_ldst = ((float)stats.nr_ldst) / 1.0;
4331*61046927SAndroid Build Coastguard Worker
4332*61046927SAndroid Build Coastguard Worker float cycles_message = MAX3(cycles_texture, cycles_varying, cycles_ldst);
4333*61046927SAndroid Build Coastguard Worker float cycles_bound = MAX2(cycles_arith, cycles_message);
4334*61046927SAndroid Build Coastguard Worker
4335*61046927SAndroid Build Coastguard Worker /* Thread count and register pressure are traded off only on v7 */
4336*61046927SAndroid Build Coastguard Worker bool full_threads = (ctx->arch == 7 && ctx->info.work_reg_count <= 32);
4337*61046927SAndroid Build Coastguard Worker unsigned nr_threads = full_threads ? 2 : 1;
4338*61046927SAndroid Build Coastguard Worker
4339*61046927SAndroid Build Coastguard Worker /* Dump stats */
4340*61046927SAndroid Build Coastguard Worker char *str = ralloc_asprintf(
4341*61046927SAndroid Build Coastguard Worker NULL,
4342*61046927SAndroid Build Coastguard Worker "%s shader: "
4343*61046927SAndroid Build Coastguard Worker "%u inst, %u tuples, %u clauses, "
4344*61046927SAndroid Build Coastguard Worker "%f cycles, %f arith, %f texture, %f vary, %f ldst, "
4345*61046927SAndroid Build Coastguard Worker "%u quadwords, %u threads",
4346*61046927SAndroid Build Coastguard Worker bi_shader_stage_name(ctx), stats.nr_ins, stats.nr_tuples,
4347*61046927SAndroid Build Coastguard Worker stats.nr_clauses, cycles_bound, cycles_arith, cycles_texture,
4348*61046927SAndroid Build Coastguard Worker cycles_varying, cycles_ldst, size / 16, nr_threads);
4349*61046927SAndroid Build Coastguard Worker
4350*61046927SAndroid Build Coastguard Worker if (ctx->arch == 7) {
4351*61046927SAndroid Build Coastguard Worker ralloc_asprintf_append(&str, ", %u preloads", bi_count_preload_cost(ctx));
4352*61046927SAndroid Build Coastguard Worker }
4353*61046927SAndroid Build Coastguard Worker
4354*61046927SAndroid Build Coastguard Worker ralloc_asprintf_append(&str, ", %u loops, %u:%u spills:fills",
4355*61046927SAndroid Build Coastguard Worker ctx->loop_count, ctx->spills, ctx->fills);
4356*61046927SAndroid Build Coastguard Worker
4357*61046927SAndroid Build Coastguard Worker return str;
4358*61046927SAndroid Build Coastguard Worker }
4359*61046927SAndroid Build Coastguard Worker
4360*61046927SAndroid Build Coastguard Worker static char *
va_print_stats(bi_context * ctx,unsigned size)4361*61046927SAndroid Build Coastguard Worker va_print_stats(bi_context *ctx, unsigned size)
4362*61046927SAndroid Build Coastguard Worker {
4363*61046927SAndroid Build Coastguard Worker unsigned nr_ins = 0;
4364*61046927SAndroid Build Coastguard Worker struct va_stats stats = {0};
4365*61046927SAndroid Build Coastguard Worker
4366*61046927SAndroid Build Coastguard Worker /* Count instructions */
4367*61046927SAndroid Build Coastguard Worker bi_foreach_instr_global(ctx, I) {
4368*61046927SAndroid Build Coastguard Worker nr_ins++;
4369*61046927SAndroid Build Coastguard Worker va_count_instr_stats(I, &stats);
4370*61046927SAndroid Build Coastguard Worker }
4371*61046927SAndroid Build Coastguard Worker
4372*61046927SAndroid Build Coastguard Worker /* Mali G78 peak performance:
4373*61046927SAndroid Build Coastguard Worker *
4374*61046927SAndroid Build Coastguard Worker * 64 FMA instructions per cycle
4375*61046927SAndroid Build Coastguard Worker * 64 CVT instructions per cycle
4376*61046927SAndroid Build Coastguard Worker * 16 SFU instructions per cycle
4377*61046927SAndroid Build Coastguard Worker * 8 x 32-bit varying channels interpolated per cycle
4378*61046927SAndroid Build Coastguard Worker * 4 texture instructions per cycle
4379*61046927SAndroid Build Coastguard Worker * 1 load/store operation per cycle
4380*61046927SAndroid Build Coastguard Worker */
4381*61046927SAndroid Build Coastguard Worker
4382*61046927SAndroid Build Coastguard Worker float cycles_fma = ((float)stats.fma) / 64.0;
4383*61046927SAndroid Build Coastguard Worker float cycles_cvt = ((float)stats.cvt) / 64.0;
4384*61046927SAndroid Build Coastguard Worker float cycles_sfu = ((float)stats.sfu) / 16.0;
4385*61046927SAndroid Build Coastguard Worker float cycles_v = ((float)stats.v) / 16.0;
4386*61046927SAndroid Build Coastguard Worker float cycles_t = ((float)stats.t) / 4.0;
4387*61046927SAndroid Build Coastguard Worker float cycles_ls = ((float)stats.ls) / 1.0;
4388*61046927SAndroid Build Coastguard Worker
4389*61046927SAndroid Build Coastguard Worker /* Calculate the bound */
4390*61046927SAndroid Build Coastguard Worker float cycles = MAX2(MAX3(cycles_fma, cycles_cvt, cycles_sfu),
4391*61046927SAndroid Build Coastguard Worker MAX3(cycles_v, cycles_t, cycles_ls));
4392*61046927SAndroid Build Coastguard Worker
4393*61046927SAndroid Build Coastguard Worker /* Thread count and register pressure are traded off */
4394*61046927SAndroid Build Coastguard Worker unsigned nr_threads = (ctx->info.work_reg_count <= 32) ? 2 : 1;
4395*61046927SAndroid Build Coastguard Worker
4396*61046927SAndroid Build Coastguard Worker /* Dump stats */
4397*61046927SAndroid Build Coastguard Worker return ralloc_asprintf(NULL,
4398*61046927SAndroid Build Coastguard Worker "%s shader: "
4399*61046927SAndroid Build Coastguard Worker "%u inst, %f cycles, %f fma, %f cvt, %f sfu, %f v, "
4400*61046927SAndroid Build Coastguard Worker "%f t, %f ls, %u quadwords, %u threads, %u loops, "
4401*61046927SAndroid Build Coastguard Worker "%u:%u spills:fills",
4402*61046927SAndroid Build Coastguard Worker bi_shader_stage_name(ctx), nr_ins, cycles, cycles_fma,
4403*61046927SAndroid Build Coastguard Worker cycles_cvt, cycles_sfu, cycles_v, cycles_t, cycles_ls,
4404*61046927SAndroid Build Coastguard Worker size / 16, nr_threads, ctx->loop_count, ctx->spills,
4405*61046927SAndroid Build Coastguard Worker ctx->fills);
4406*61046927SAndroid Build Coastguard Worker }
4407*61046927SAndroid Build Coastguard Worker
4408*61046927SAndroid Build Coastguard Worker static int
glsl_type_size(const struct glsl_type * type,bool bindless)4409*61046927SAndroid Build Coastguard Worker glsl_type_size(const struct glsl_type *type, bool bindless)
4410*61046927SAndroid Build Coastguard Worker {
4411*61046927SAndroid Build Coastguard Worker return glsl_count_attribute_slots(type, false);
4412*61046927SAndroid Build Coastguard Worker }
4413*61046927SAndroid Build Coastguard Worker
4414*61046927SAndroid Build Coastguard Worker /* Split stores to memory. We don't split stores to vertex outputs, since
4415*61046927SAndroid Build Coastguard Worker * nir_lower_io_to_temporaries will ensure there's only a single write.
4416*61046927SAndroid Build Coastguard Worker */
4417*61046927SAndroid Build Coastguard Worker
4418*61046927SAndroid Build Coastguard Worker static bool
should_split_wrmask(const nir_instr * instr,UNUSED const void * data)4419*61046927SAndroid Build Coastguard Worker should_split_wrmask(const nir_instr *instr, UNUSED const void *data)
4420*61046927SAndroid Build Coastguard Worker {
4421*61046927SAndroid Build Coastguard Worker nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4422*61046927SAndroid Build Coastguard Worker
4423*61046927SAndroid Build Coastguard Worker switch (intr->intrinsic) {
4424*61046927SAndroid Build Coastguard Worker case nir_intrinsic_store_ssbo:
4425*61046927SAndroid Build Coastguard Worker case nir_intrinsic_store_shared:
4426*61046927SAndroid Build Coastguard Worker case nir_intrinsic_store_global:
4427*61046927SAndroid Build Coastguard Worker case nir_intrinsic_store_scratch:
4428*61046927SAndroid Build Coastguard Worker return true;
4429*61046927SAndroid Build Coastguard Worker default:
4430*61046927SAndroid Build Coastguard Worker return false;
4431*61046927SAndroid Build Coastguard Worker }
4432*61046927SAndroid Build Coastguard Worker }
4433*61046927SAndroid Build Coastguard Worker
4434*61046927SAndroid Build Coastguard Worker /*
4435*61046927SAndroid Build Coastguard Worker * Some operations are only available as 32-bit instructions. 64-bit floats are
4436*61046927SAndroid Build Coastguard Worker * unsupported and ints are lowered with nir_lower_int64. Certain 8-bit and
4437*61046927SAndroid Build Coastguard Worker * 16-bit instructions, however, are lowered here.
4438*61046927SAndroid Build Coastguard Worker */
4439*61046927SAndroid Build Coastguard Worker static unsigned
bi_lower_bit_size(const nir_instr * instr,UNUSED void * data)4440*61046927SAndroid Build Coastguard Worker bi_lower_bit_size(const nir_instr *instr, UNUSED void *data)
4441*61046927SAndroid Build Coastguard Worker {
4442*61046927SAndroid Build Coastguard Worker if (instr->type != nir_instr_type_alu)
4443*61046927SAndroid Build Coastguard Worker return 0;
4444*61046927SAndroid Build Coastguard Worker
4445*61046927SAndroid Build Coastguard Worker nir_alu_instr *alu = nir_instr_as_alu(instr);
4446*61046927SAndroid Build Coastguard Worker
4447*61046927SAndroid Build Coastguard Worker switch (alu->op) {
4448*61046927SAndroid Build Coastguard Worker case nir_op_fexp2:
4449*61046927SAndroid Build Coastguard Worker case nir_op_flog2:
4450*61046927SAndroid Build Coastguard Worker case nir_op_fpow:
4451*61046927SAndroid Build Coastguard Worker case nir_op_fsin:
4452*61046927SAndroid Build Coastguard Worker case nir_op_fcos:
4453*61046927SAndroid Build Coastguard Worker case nir_op_bit_count:
4454*61046927SAndroid Build Coastguard Worker case nir_op_bitfield_reverse:
4455*61046927SAndroid Build Coastguard Worker return (nir_src_bit_size(alu->src[0].src) == 32) ? 0 : 32;
4456*61046927SAndroid Build Coastguard Worker default:
4457*61046927SAndroid Build Coastguard Worker return 0;
4458*61046927SAndroid Build Coastguard Worker }
4459*61046927SAndroid Build Coastguard Worker }
4460*61046927SAndroid Build Coastguard Worker
4461*61046927SAndroid Build Coastguard Worker /* Although Bifrost generally supports packed 16-bit vec2 and 8-bit vec4,
4462*61046927SAndroid Build Coastguard Worker * transcendentals are an exception. Also shifts because of lane size mismatch
4463*61046927SAndroid Build Coastguard Worker * (8-bit in Bifrost, 32-bit in NIR TODO - workaround!). Some conversions need
4464*61046927SAndroid Build Coastguard Worker * to be scalarized due to type size. */
4465*61046927SAndroid Build Coastguard Worker
4466*61046927SAndroid Build Coastguard Worker static uint8_t
bi_vectorize_filter(const nir_instr * instr,const void * data)4467*61046927SAndroid Build Coastguard Worker bi_vectorize_filter(const nir_instr *instr, const void *data)
4468*61046927SAndroid Build Coastguard Worker {
4469*61046927SAndroid Build Coastguard Worker /* Defaults work for everything else */
4470*61046927SAndroid Build Coastguard Worker if (instr->type != nir_instr_type_alu)
4471*61046927SAndroid Build Coastguard Worker return 0;
4472*61046927SAndroid Build Coastguard Worker
4473*61046927SAndroid Build Coastguard Worker const nir_alu_instr *alu = nir_instr_as_alu(instr);
4474*61046927SAndroid Build Coastguard Worker
4475*61046927SAndroid Build Coastguard Worker switch (alu->op) {
4476*61046927SAndroid Build Coastguard Worker case nir_op_frcp:
4477*61046927SAndroid Build Coastguard Worker case nir_op_frsq:
4478*61046927SAndroid Build Coastguard Worker case nir_op_ishl:
4479*61046927SAndroid Build Coastguard Worker case nir_op_ishr:
4480*61046927SAndroid Build Coastguard Worker case nir_op_ushr:
4481*61046927SAndroid Build Coastguard Worker case nir_op_f2i16:
4482*61046927SAndroid Build Coastguard Worker case nir_op_f2u16:
4483*61046927SAndroid Build Coastguard Worker case nir_op_extract_u8:
4484*61046927SAndroid Build Coastguard Worker case nir_op_extract_i8:
4485*61046927SAndroid Build Coastguard Worker case nir_op_extract_u16:
4486*61046927SAndroid Build Coastguard Worker case nir_op_extract_i16:
4487*61046927SAndroid Build Coastguard Worker case nir_op_insert_u16:
4488*61046927SAndroid Build Coastguard Worker return 1;
4489*61046927SAndroid Build Coastguard Worker default:
4490*61046927SAndroid Build Coastguard Worker break;
4491*61046927SAndroid Build Coastguard Worker }
4492*61046927SAndroid Build Coastguard Worker
4493*61046927SAndroid Build Coastguard Worker /* Vectorized instructions cannot write more than 32-bit */
4494*61046927SAndroid Build Coastguard Worker int dst_bit_size = alu->def.bit_size;
4495*61046927SAndroid Build Coastguard Worker if (dst_bit_size == 16)
4496*61046927SAndroid Build Coastguard Worker return 2;
4497*61046927SAndroid Build Coastguard Worker else
4498*61046927SAndroid Build Coastguard Worker return 1;
4499*61046927SAndroid Build Coastguard Worker }
4500*61046927SAndroid Build Coastguard Worker
4501*61046927SAndroid Build Coastguard Worker static bool
bi_scalarize_filter(const nir_instr * instr,const void * data)4502*61046927SAndroid Build Coastguard Worker bi_scalarize_filter(const nir_instr *instr, const void *data)
4503*61046927SAndroid Build Coastguard Worker {
4504*61046927SAndroid Build Coastguard Worker if (instr->type != nir_instr_type_alu)
4505*61046927SAndroid Build Coastguard Worker return false;
4506*61046927SAndroid Build Coastguard Worker
4507*61046927SAndroid Build Coastguard Worker const nir_alu_instr *alu = nir_instr_as_alu(instr);
4508*61046927SAndroid Build Coastguard Worker
4509*61046927SAndroid Build Coastguard Worker switch (alu->op) {
4510*61046927SAndroid Build Coastguard Worker case nir_op_pack_uvec2_to_uint:
4511*61046927SAndroid Build Coastguard Worker case nir_op_pack_uvec4_to_uint:
4512*61046927SAndroid Build Coastguard Worker return false;
4513*61046927SAndroid Build Coastguard Worker default:
4514*61046927SAndroid Build Coastguard Worker return true;
4515*61046927SAndroid Build Coastguard Worker }
4516*61046927SAndroid Build Coastguard Worker }
4517*61046927SAndroid Build Coastguard Worker
4518*61046927SAndroid Build Coastguard Worker /* Ensure we write exactly 4 components */
4519*61046927SAndroid Build Coastguard Worker static nir_def *
bifrost_nir_valid_channel(nir_builder * b,nir_def * in,unsigned channel,unsigned first,unsigned mask)4520*61046927SAndroid Build Coastguard Worker bifrost_nir_valid_channel(nir_builder *b, nir_def *in, unsigned channel,
4521*61046927SAndroid Build Coastguard Worker unsigned first, unsigned mask)
4522*61046927SAndroid Build Coastguard Worker {
4523*61046927SAndroid Build Coastguard Worker if (!(mask & BITFIELD_BIT(channel)))
4524*61046927SAndroid Build Coastguard Worker channel = first;
4525*61046927SAndroid Build Coastguard Worker
4526*61046927SAndroid Build Coastguard Worker return nir_channel(b, in, channel);
4527*61046927SAndroid Build Coastguard Worker }
4528*61046927SAndroid Build Coastguard Worker
4529*61046927SAndroid Build Coastguard Worker /* Lower fragment store_output instructions to always write 4 components,
4530*61046927SAndroid Build Coastguard Worker * matching the hardware semantic. This may require additional moves. Skipping
4531*61046927SAndroid Build Coastguard Worker * these moves is possible in theory, but invokes undefined behaviour in the
4532*61046927SAndroid Build Coastguard Worker * compiler. The DDK inserts these moves, so we will as well. */
4533*61046927SAndroid Build Coastguard Worker
4534*61046927SAndroid Build Coastguard Worker static bool
bifrost_nir_lower_blend_components(struct nir_builder * b,nir_intrinsic_instr * intr,void * data)4535*61046927SAndroid Build Coastguard Worker bifrost_nir_lower_blend_components(struct nir_builder *b,
4536*61046927SAndroid Build Coastguard Worker nir_intrinsic_instr *intr, void *data)
4537*61046927SAndroid Build Coastguard Worker {
4538*61046927SAndroid Build Coastguard Worker if (intr->intrinsic != nir_intrinsic_store_output)
4539*61046927SAndroid Build Coastguard Worker return false;
4540*61046927SAndroid Build Coastguard Worker
4541*61046927SAndroid Build Coastguard Worker nir_def *in = intr->src[0].ssa;
4542*61046927SAndroid Build Coastguard Worker unsigned first = nir_intrinsic_component(intr);
4543*61046927SAndroid Build Coastguard Worker unsigned mask = nir_intrinsic_write_mask(intr);
4544*61046927SAndroid Build Coastguard Worker
4545*61046927SAndroid Build Coastguard Worker assert(first == 0 && "shouldn't get nonzero components");
4546*61046927SAndroid Build Coastguard Worker
4547*61046927SAndroid Build Coastguard Worker /* Nothing to do */
4548*61046927SAndroid Build Coastguard Worker if (mask == BITFIELD_MASK(4))
4549*61046927SAndroid Build Coastguard Worker return false;
4550*61046927SAndroid Build Coastguard Worker
4551*61046927SAndroid Build Coastguard Worker b->cursor = nir_before_instr(&intr->instr);
4552*61046927SAndroid Build Coastguard Worker
4553*61046927SAndroid Build Coastguard Worker /* Replicate the first valid component instead */
4554*61046927SAndroid Build Coastguard Worker nir_def *replicated =
4555*61046927SAndroid Build Coastguard Worker nir_vec4(b, bifrost_nir_valid_channel(b, in, 0, first, mask),
4556*61046927SAndroid Build Coastguard Worker bifrost_nir_valid_channel(b, in, 1, first, mask),
4557*61046927SAndroid Build Coastguard Worker bifrost_nir_valid_channel(b, in, 2, first, mask),
4558*61046927SAndroid Build Coastguard Worker bifrost_nir_valid_channel(b, in, 3, first, mask));
4559*61046927SAndroid Build Coastguard Worker
4560*61046927SAndroid Build Coastguard Worker /* Rewrite to use our replicated version */
4561*61046927SAndroid Build Coastguard Worker nir_src_rewrite(&intr->src[0], replicated);
4562*61046927SAndroid Build Coastguard Worker nir_intrinsic_set_component(intr, 0);
4563*61046927SAndroid Build Coastguard Worker nir_intrinsic_set_write_mask(intr, 0xF);
4564*61046927SAndroid Build Coastguard Worker intr->num_components = 4;
4565*61046927SAndroid Build Coastguard Worker
4566*61046927SAndroid Build Coastguard Worker return true;
4567*61046927SAndroid Build Coastguard Worker }
4568*61046927SAndroid Build Coastguard Worker
4569*61046927SAndroid Build Coastguard Worker static nir_mem_access_size_align
mem_access_size_align_cb(nir_intrinsic_op intrin,uint8_t bytes,uint8_t bit_size,uint32_t align_mul,uint32_t align_offset,bool offset_is_const,const void * cb_data)4570*61046927SAndroid Build Coastguard Worker mem_access_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes,
4571*61046927SAndroid Build Coastguard Worker uint8_t bit_size, uint32_t align_mul,
4572*61046927SAndroid Build Coastguard Worker uint32_t align_offset, bool offset_is_const,
4573*61046927SAndroid Build Coastguard Worker const void *cb_data)
4574*61046927SAndroid Build Coastguard Worker {
4575*61046927SAndroid Build Coastguard Worker uint32_t align = nir_combined_align(align_mul, align_offset);
4576*61046927SAndroid Build Coastguard Worker assert(util_is_power_of_two_nonzero(align));
4577*61046927SAndroid Build Coastguard Worker
4578*61046927SAndroid Build Coastguard Worker /* No more than 16 bytes at a time. */
4579*61046927SAndroid Build Coastguard Worker bytes = MIN2(bytes, 16);
4580*61046927SAndroid Build Coastguard Worker
4581*61046927SAndroid Build Coastguard Worker /* If the number of bytes is a multiple of 4, use 32-bit loads. Else if it's
4582*61046927SAndroid Build Coastguard Worker * a multiple of 2, use 16-bit loads. Else use 8-bit loads.
4583*61046927SAndroid Build Coastguard Worker *
4584*61046927SAndroid Build Coastguard Worker * But if we're only aligned to 1 byte, use 8-bit loads. If we're only
4585*61046927SAndroid Build Coastguard Worker * aligned to 2 bytes, use 16-bit loads, unless we needed 8-bit loads due to
4586*61046927SAndroid Build Coastguard Worker * the size.
4587*61046927SAndroid Build Coastguard Worker */
4588*61046927SAndroid Build Coastguard Worker if ((bytes & 1) || (align == 1))
4589*61046927SAndroid Build Coastguard Worker bit_size = 8;
4590*61046927SAndroid Build Coastguard Worker else if ((bytes & 2) || (align == 2))
4591*61046927SAndroid Build Coastguard Worker bit_size = 16;
4592*61046927SAndroid Build Coastguard Worker else if (bit_size >= 32)
4593*61046927SAndroid Build Coastguard Worker bit_size = 32;
4594*61046927SAndroid Build Coastguard Worker
4595*61046927SAndroid Build Coastguard Worker unsigned num_comps = MIN2(bytes / (bit_size / 8), 4);
4596*61046927SAndroid Build Coastguard Worker
4597*61046927SAndroid Build Coastguard Worker /* Push constants require 32-bit loads. */
4598*61046927SAndroid Build Coastguard Worker if (intrin == nir_intrinsic_load_push_constant) {
4599*61046927SAndroid Build Coastguard Worker if (align_mul >= 4) {
4600*61046927SAndroid Build Coastguard Worker /* If align_mul is bigger than 4 we can use align_offset to find
4601*61046927SAndroid Build Coastguard Worker * the exact number of words we need to read.
4602*61046927SAndroid Build Coastguard Worker */
4603*61046927SAndroid Build Coastguard Worker num_comps = DIV_ROUND_UP((align_offset % 4) + bytes, 4);
4604*61046927SAndroid Build Coastguard Worker } else {
4605*61046927SAndroid Build Coastguard Worker /* If bytes is aligned on 32-bit, the access might still cross one
4606*61046927SAndroid Build Coastguard Worker * word at the beginning, and one word at the end. If bytes is not
4607*61046927SAndroid Build Coastguard Worker * aligned on 32-bit, the extra two words should cover for both the
4608*61046927SAndroid Build Coastguard Worker * size and offset mis-alignment.
4609*61046927SAndroid Build Coastguard Worker */
4610*61046927SAndroid Build Coastguard Worker num_comps = (bytes / 4) + 2;
4611*61046927SAndroid Build Coastguard Worker }
4612*61046927SAndroid Build Coastguard Worker
4613*61046927SAndroid Build Coastguard Worker bit_size = MIN2(bit_size, 32);
4614*61046927SAndroid Build Coastguard Worker }
4615*61046927SAndroid Build Coastguard Worker
4616*61046927SAndroid Build Coastguard Worker return (nir_mem_access_size_align){
4617*61046927SAndroid Build Coastguard Worker .num_components = num_comps,
4618*61046927SAndroid Build Coastguard Worker .bit_size = bit_size,
4619*61046927SAndroid Build Coastguard Worker .align = bit_size / 8,
4620*61046927SAndroid Build Coastguard Worker };
4621*61046927SAndroid Build Coastguard Worker }
4622*61046927SAndroid Build Coastguard Worker
4623*61046927SAndroid Build Coastguard Worker static bool
mem_vectorize_cb(unsigned align_mul,unsigned align_offset,unsigned bit_size,unsigned num_components,nir_intrinsic_instr * low,nir_intrinsic_instr * high,void * data)4624*61046927SAndroid Build Coastguard Worker mem_vectorize_cb(unsigned align_mul, unsigned align_offset, unsigned bit_size,
4625*61046927SAndroid Build Coastguard Worker unsigned num_components, nir_intrinsic_instr *low,
4626*61046927SAndroid Build Coastguard Worker nir_intrinsic_instr *high, void *data)
4627*61046927SAndroid Build Coastguard Worker {
4628*61046927SAndroid Build Coastguard Worker /* Must be aligned to the size of the load */
4629*61046927SAndroid Build Coastguard Worker unsigned align = nir_combined_align(align_mul, align_offset);
4630*61046927SAndroid Build Coastguard Worker if ((bit_size / 8) > align)
4631*61046927SAndroid Build Coastguard Worker return false;
4632*61046927SAndroid Build Coastguard Worker
4633*61046927SAndroid Build Coastguard Worker if (num_components > 4)
4634*61046927SAndroid Build Coastguard Worker return false;
4635*61046927SAndroid Build Coastguard Worker
4636*61046927SAndroid Build Coastguard Worker if (bit_size > 32)
4637*61046927SAndroid Build Coastguard Worker return false;
4638*61046927SAndroid Build Coastguard Worker
4639*61046927SAndroid Build Coastguard Worker return true;
4640*61046927SAndroid Build Coastguard Worker }
4641*61046927SAndroid Build Coastguard Worker
4642*61046927SAndroid Build Coastguard Worker static void
bi_optimize_nir(nir_shader * nir,unsigned gpu_id,bool is_blend)4643*61046927SAndroid Build Coastguard Worker bi_optimize_nir(nir_shader *nir, unsigned gpu_id, bool is_blend)
4644*61046927SAndroid Build Coastguard Worker {
4645*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_opt_shrink_stores, true);
4646*61046927SAndroid Build Coastguard Worker
4647*61046927SAndroid Build Coastguard Worker bool progress;
4648*61046927SAndroid Build Coastguard Worker
4649*61046927SAndroid Build Coastguard Worker do {
4650*61046927SAndroid Build Coastguard Worker progress = false;
4651*61046927SAndroid Build Coastguard Worker
4652*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
4653*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, nir_lower_wrmasks, should_split_wrmask, NULL);
4654*61046927SAndroid Build Coastguard Worker
4655*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, nir_copy_prop);
4656*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, nir_opt_remove_phis);
4657*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, nir_opt_dce);
4658*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, nir_opt_dead_cf);
4659*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, nir_opt_cse);
4660*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
4661*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, nir_opt_algebraic);
4662*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, nir_opt_constant_folding);
4663*61046927SAndroid Build Coastguard Worker
4664*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, nir_opt_undef);
4665*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, nir_lower_undef_to_zero);
4666*61046927SAndroid Build Coastguard Worker
4667*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, nir_opt_shrink_vectors, false);
4668*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, nir_opt_loop_unroll);
4669*61046927SAndroid Build Coastguard Worker } while (progress);
4670*61046927SAndroid Build Coastguard Worker
4671*61046927SAndroid Build Coastguard Worker NIR_PASS(
4672*61046927SAndroid Build Coastguard Worker progress, nir, nir_opt_load_store_vectorize,
4673*61046927SAndroid Build Coastguard Worker &(const nir_load_store_vectorize_options){
4674*61046927SAndroid Build Coastguard Worker .modes = nir_var_mem_global | nir_var_mem_shared | nir_var_shader_temp,
4675*61046927SAndroid Build Coastguard Worker .callback = mem_vectorize_cb,
4676*61046927SAndroid Build Coastguard Worker });
4677*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, nir_lower_pack);
4678*61046927SAndroid Build Coastguard Worker
4679*61046927SAndroid Build Coastguard Worker /* TODO: Why is 64-bit getting rematerialized?
4680*61046927SAndroid Build Coastguard Worker * KHR-GLES31.core.shader_image_load_store.basic-allTargets-atomicFS */
4681*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, nir_lower_int64);
4682*61046927SAndroid Build Coastguard Worker
4683*61046927SAndroid Build Coastguard Worker /* We need to cleanup after each iteration of late algebraic
4684*61046927SAndroid Build Coastguard Worker * optimizations, since otherwise NIR can produce weird edge cases
4685*61046927SAndroid Build Coastguard Worker * (like fneg of a constant) which we don't handle */
4686*61046927SAndroid Build Coastguard Worker bool late_algebraic = true;
4687*61046927SAndroid Build Coastguard Worker while (late_algebraic) {
4688*61046927SAndroid Build Coastguard Worker late_algebraic = false;
4689*61046927SAndroid Build Coastguard Worker NIR_PASS(late_algebraic, nir, nir_opt_algebraic_late);
4690*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, nir_opt_constant_folding);
4691*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, nir_copy_prop);
4692*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, nir_opt_dce);
4693*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, nir_opt_cse);
4694*61046927SAndroid Build Coastguard Worker }
4695*61046927SAndroid Build Coastguard Worker
4696*61046927SAndroid Build Coastguard Worker /* This opt currently helps on Bifrost but not Valhall */
4697*61046927SAndroid Build Coastguard Worker if (gpu_id < 0x9000)
4698*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, bifrost_nir_opt_boolean_bitwise);
4699*61046927SAndroid Build Coastguard Worker
4700*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, nir_lower_alu_to_scalar, bi_scalarize_filter, NULL);
4701*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, nir_opt_vectorize, bi_vectorize_filter, NULL);
4702*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, nir_lower_bool_to_bitsize);
4703*61046927SAndroid Build Coastguard Worker
4704*61046927SAndroid Build Coastguard Worker /* Prepass to simplify instruction selection */
4705*61046927SAndroid Build Coastguard Worker late_algebraic = false;
4706*61046927SAndroid Build Coastguard Worker NIR_PASS(late_algebraic, nir, bifrost_nir_lower_algebraic_late);
4707*61046927SAndroid Build Coastguard Worker
4708*61046927SAndroid Build Coastguard Worker while (late_algebraic) {
4709*61046927SAndroid Build Coastguard Worker late_algebraic = false;
4710*61046927SAndroid Build Coastguard Worker NIR_PASS(late_algebraic, nir, nir_opt_algebraic_late);
4711*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, nir_opt_constant_folding);
4712*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, nir_copy_prop);
4713*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, nir_opt_dce);
4714*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, nir_opt_cse);
4715*61046927SAndroid Build Coastguard Worker }
4716*61046927SAndroid Build Coastguard Worker
4717*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, nir_lower_load_const_to_scalar);
4718*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, nir_opt_dce);
4719*61046927SAndroid Build Coastguard Worker
4720*61046927SAndroid Build Coastguard Worker if (nir->info.stage == MESA_SHADER_FRAGMENT) {
4721*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_shader_intrinsics_pass,
4722*61046927SAndroid Build Coastguard Worker bifrost_nir_lower_blend_components,
4723*61046927SAndroid Build Coastguard Worker nir_metadata_control_flow, NULL);
4724*61046927SAndroid Build Coastguard Worker }
4725*61046927SAndroid Build Coastguard Worker
4726*61046927SAndroid Build Coastguard Worker /* Backend scheduler is purely local, so do some global optimizations
4727*61046927SAndroid Build Coastguard Worker * to reduce register pressure. */
4728*61046927SAndroid Build Coastguard Worker nir_move_options move_all = nir_move_const_undef | nir_move_load_ubo |
4729*61046927SAndroid Build Coastguard Worker nir_move_load_input | nir_move_comparisons |
4730*61046927SAndroid Build Coastguard Worker nir_move_copies | nir_move_load_ssbo;
4731*61046927SAndroid Build Coastguard Worker
4732*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_opt_sink, move_all);
4733*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_opt_move, move_all);
4734*61046927SAndroid Build Coastguard Worker
4735*61046927SAndroid Build Coastguard Worker /* We might lower attribute, varying, and image indirects. Use the
4736*61046927SAndroid Build Coastguard Worker * gathered info to skip the extra analysis in the happy path. */
4737*61046927SAndroid Build Coastguard Worker bool any_indirects = nir->info.inputs_read_indirectly ||
4738*61046927SAndroid Build Coastguard Worker nir->info.outputs_accessed_indirectly ||
4739*61046927SAndroid Build Coastguard Worker nir->info.patch_inputs_read_indirectly ||
4740*61046927SAndroid Build Coastguard Worker nir->info.patch_outputs_accessed_indirectly ||
4741*61046927SAndroid Build Coastguard Worker nir->info.images_used[0];
4742*61046927SAndroid Build Coastguard Worker
4743*61046927SAndroid Build Coastguard Worker if (any_indirects) {
4744*61046927SAndroid Build Coastguard Worker nir_convert_to_lcssa(nir, true, true);
4745*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_divergence_analysis);
4746*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, bi_lower_divergent_indirects,
4747*61046927SAndroid Build Coastguard Worker pan_subgroup_size(pan_arch(gpu_id)));
4748*61046927SAndroid Build Coastguard Worker }
4749*61046927SAndroid Build Coastguard Worker }
4750*61046927SAndroid Build Coastguard Worker
4751*61046927SAndroid Build Coastguard Worker static void
bi_opt_post_ra(bi_context * ctx)4752*61046927SAndroid Build Coastguard Worker bi_opt_post_ra(bi_context *ctx)
4753*61046927SAndroid Build Coastguard Worker {
4754*61046927SAndroid Build Coastguard Worker bi_foreach_instr_global_safe(ctx, ins) {
4755*61046927SAndroid Build Coastguard Worker if (ins->op == BI_OPCODE_MOV_I32 &&
4756*61046927SAndroid Build Coastguard Worker bi_is_equiv(ins->dest[0], ins->src[0]))
4757*61046927SAndroid Build Coastguard Worker bi_remove_instruction(ins);
4758*61046927SAndroid Build Coastguard Worker }
4759*61046927SAndroid Build Coastguard Worker }
4760*61046927SAndroid Build Coastguard Worker
4761*61046927SAndroid Build Coastguard Worker /* Dead code elimination for branches at the end of a block - only one branch
4762*61046927SAndroid Build Coastguard Worker * per block is legal semantically, but unreachable jumps can be generated.
4763*61046927SAndroid Build Coastguard Worker * Likewise on Bifrost we can generate jumps to the terminal block which need
4764*61046927SAndroid Build Coastguard Worker * to be lowered away to a jump to #0x0, which induces successful termination.
4765*61046927SAndroid Build Coastguard Worker * That trick doesn't work on Valhall, which needs a NOP inserted in the
4766*61046927SAndroid Build Coastguard Worker * terminal block instead.
4767*61046927SAndroid Build Coastguard Worker */
4768*61046927SAndroid Build Coastguard Worker static void
bi_lower_branch(bi_context * ctx,bi_block * block)4769*61046927SAndroid Build Coastguard Worker bi_lower_branch(bi_context *ctx, bi_block *block)
4770*61046927SAndroid Build Coastguard Worker {
4771*61046927SAndroid Build Coastguard Worker bool cull_terminal = (ctx->arch <= 8);
4772*61046927SAndroid Build Coastguard Worker bool branched = false;
4773*61046927SAndroid Build Coastguard Worker
4774*61046927SAndroid Build Coastguard Worker bi_foreach_instr_in_block_safe(block, ins) {
4775*61046927SAndroid Build Coastguard Worker if (!ins->branch_target)
4776*61046927SAndroid Build Coastguard Worker continue;
4777*61046927SAndroid Build Coastguard Worker
4778*61046927SAndroid Build Coastguard Worker if (branched) {
4779*61046927SAndroid Build Coastguard Worker bi_remove_instruction(ins);
4780*61046927SAndroid Build Coastguard Worker continue;
4781*61046927SAndroid Build Coastguard Worker }
4782*61046927SAndroid Build Coastguard Worker
4783*61046927SAndroid Build Coastguard Worker branched = true;
4784*61046927SAndroid Build Coastguard Worker
4785*61046927SAndroid Build Coastguard Worker if (!bi_is_terminal_block(ins->branch_target))
4786*61046927SAndroid Build Coastguard Worker continue;
4787*61046927SAndroid Build Coastguard Worker
4788*61046927SAndroid Build Coastguard Worker if (cull_terminal)
4789*61046927SAndroid Build Coastguard Worker ins->branch_target = NULL;
4790*61046927SAndroid Build Coastguard Worker else if (ins->branch_target)
4791*61046927SAndroid Build Coastguard Worker ins->branch_target->needs_nop = true;
4792*61046927SAndroid Build Coastguard Worker }
4793*61046927SAndroid Build Coastguard Worker }
4794*61046927SAndroid Build Coastguard Worker
4795*61046927SAndroid Build Coastguard Worker static void
bi_pack_clauses(bi_context * ctx,struct util_dynarray * binary,unsigned offset)4796*61046927SAndroid Build Coastguard Worker bi_pack_clauses(bi_context *ctx, struct util_dynarray *binary, unsigned offset)
4797*61046927SAndroid Build Coastguard Worker {
4798*61046927SAndroid Build Coastguard Worker unsigned final_clause = bi_pack(ctx, binary);
4799*61046927SAndroid Build Coastguard Worker
4800*61046927SAndroid Build Coastguard Worker /* If we need to wait for ATEST or BLEND in the first clause, pass the
4801*61046927SAndroid Build Coastguard Worker * corresponding bits through to the renderer state descriptor */
4802*61046927SAndroid Build Coastguard Worker bi_block *first_block = list_first_entry(&ctx->blocks, bi_block, link);
4803*61046927SAndroid Build Coastguard Worker bi_clause *first_clause = bi_next_clause(ctx, first_block, NULL);
4804*61046927SAndroid Build Coastguard Worker
4805*61046927SAndroid Build Coastguard Worker unsigned first_deps = first_clause ? first_clause->dependencies : 0;
4806*61046927SAndroid Build Coastguard Worker ctx->info.bifrost->wait_6 = (first_deps & (1 << 6));
4807*61046927SAndroid Build Coastguard Worker ctx->info.bifrost->wait_7 = (first_deps & (1 << 7));
4808*61046927SAndroid Build Coastguard Worker
4809*61046927SAndroid Build Coastguard Worker /* Pad the shader with enough zero bytes to trick the prefetcher,
4810*61046927SAndroid Build Coastguard Worker * unless we're compiling an empty shader (in which case we don't pad
4811*61046927SAndroid Build Coastguard Worker * so the size remains 0) */
4812*61046927SAndroid Build Coastguard Worker unsigned prefetch_size = BIFROST_SHADER_PREFETCH - final_clause;
4813*61046927SAndroid Build Coastguard Worker
4814*61046927SAndroid Build Coastguard Worker if (binary->size - offset) {
4815*61046927SAndroid Build Coastguard Worker memset(util_dynarray_grow(binary, uint8_t, prefetch_size), 0,
4816*61046927SAndroid Build Coastguard Worker prefetch_size);
4817*61046927SAndroid Build Coastguard Worker }
4818*61046927SAndroid Build Coastguard Worker }
4819*61046927SAndroid Build Coastguard Worker
4820*61046927SAndroid Build Coastguard Worker /*
4821*61046927SAndroid Build Coastguard Worker * Build a bit mask of varyings (by location) that are flatshaded. This
4822*61046927SAndroid Build Coastguard Worker * information is needed by lower_mediump_io, as we don't yet support 16-bit
4823*61046927SAndroid Build Coastguard Worker * flat varyings.
4824*61046927SAndroid Build Coastguard Worker *
4825*61046927SAndroid Build Coastguard Worker * Also varyings that are used as texture coordinates should be kept at fp32 so
4826*61046927SAndroid Build Coastguard Worker * the texture instruction may be promoted to VAR_TEX. In general this is a good
4827*61046927SAndroid Build Coastguard Worker * idea, as fp16 texture coordinates are not supported by the hardware and are
4828*61046927SAndroid Build Coastguard Worker * usually inappropriate. (There are both relevant CTS bugs here, even.)
4829*61046927SAndroid Build Coastguard Worker *
4830*61046927SAndroid Build Coastguard Worker * TODO: If we compacted the varyings with some fixup code in the vertex shader,
4831*61046927SAndroid Build Coastguard Worker * we could implement 16-bit flat varyings. Consider if this case matters.
4832*61046927SAndroid Build Coastguard Worker *
4833*61046927SAndroid Build Coastguard Worker * TODO: The texture coordinate handling could be less heavyhanded.
4834*61046927SAndroid Build Coastguard Worker */
4835*61046927SAndroid Build Coastguard Worker static bool
bi_gather_texcoords(nir_builder * b,nir_instr * instr,void * data)4836*61046927SAndroid Build Coastguard Worker bi_gather_texcoords(nir_builder *b, nir_instr *instr, void *data)
4837*61046927SAndroid Build Coastguard Worker {
4838*61046927SAndroid Build Coastguard Worker uint64_t *mask = data;
4839*61046927SAndroid Build Coastguard Worker
4840*61046927SAndroid Build Coastguard Worker if (instr->type != nir_instr_type_tex)
4841*61046927SAndroid Build Coastguard Worker return false;
4842*61046927SAndroid Build Coastguard Worker
4843*61046927SAndroid Build Coastguard Worker nir_tex_instr *tex = nir_instr_as_tex(instr);
4844*61046927SAndroid Build Coastguard Worker
4845*61046927SAndroid Build Coastguard Worker int coord_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
4846*61046927SAndroid Build Coastguard Worker if (coord_idx < 0)
4847*61046927SAndroid Build Coastguard Worker return false;
4848*61046927SAndroid Build Coastguard Worker
4849*61046927SAndroid Build Coastguard Worker nir_src src = tex->src[coord_idx].src;
4850*61046927SAndroid Build Coastguard Worker nir_scalar x = nir_scalar_resolved(src.ssa, 0);
4851*61046927SAndroid Build Coastguard Worker nir_scalar y = nir_scalar_resolved(src.ssa, 1);
4852*61046927SAndroid Build Coastguard Worker
4853*61046927SAndroid Build Coastguard Worker if (x.def != y.def)
4854*61046927SAndroid Build Coastguard Worker return false;
4855*61046927SAndroid Build Coastguard Worker
4856*61046927SAndroid Build Coastguard Worker nir_instr *parent = x.def->parent_instr;
4857*61046927SAndroid Build Coastguard Worker
4858*61046927SAndroid Build Coastguard Worker if (parent->type != nir_instr_type_intrinsic)
4859*61046927SAndroid Build Coastguard Worker return false;
4860*61046927SAndroid Build Coastguard Worker
4861*61046927SAndroid Build Coastguard Worker nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
4862*61046927SAndroid Build Coastguard Worker
4863*61046927SAndroid Build Coastguard Worker if (intr->intrinsic != nir_intrinsic_load_interpolated_input)
4864*61046927SAndroid Build Coastguard Worker return false;
4865*61046927SAndroid Build Coastguard Worker
4866*61046927SAndroid Build Coastguard Worker nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
4867*61046927SAndroid Build Coastguard Worker *mask |= BITFIELD64_BIT(sem.location);
4868*61046927SAndroid Build Coastguard Worker return false;
4869*61046927SAndroid Build Coastguard Worker }
4870*61046927SAndroid Build Coastguard Worker
4871*61046927SAndroid Build Coastguard Worker static uint64_t
bi_fp32_varying_mask(nir_shader * nir)4872*61046927SAndroid Build Coastguard Worker bi_fp32_varying_mask(nir_shader *nir)
4873*61046927SAndroid Build Coastguard Worker {
4874*61046927SAndroid Build Coastguard Worker uint64_t mask = 0;
4875*61046927SAndroid Build Coastguard Worker
4876*61046927SAndroid Build Coastguard Worker assert(nir->info.stage == MESA_SHADER_FRAGMENT);
4877*61046927SAndroid Build Coastguard Worker
4878*61046927SAndroid Build Coastguard Worker nir_foreach_shader_in_variable(var, nir) {
4879*61046927SAndroid Build Coastguard Worker if (var->data.interpolation == INTERP_MODE_FLAT)
4880*61046927SAndroid Build Coastguard Worker mask |= BITFIELD64_BIT(var->data.location);
4881*61046927SAndroid Build Coastguard Worker }
4882*61046927SAndroid Build Coastguard Worker
4883*61046927SAndroid Build Coastguard Worker nir_shader_instructions_pass(nir, bi_gather_texcoords, nir_metadata_all,
4884*61046927SAndroid Build Coastguard Worker &mask);
4885*61046927SAndroid Build Coastguard Worker
4886*61046927SAndroid Build Coastguard Worker return mask;
4887*61046927SAndroid Build Coastguard Worker }
4888*61046927SAndroid Build Coastguard Worker
4889*61046927SAndroid Build Coastguard Worker static bool
bi_lower_sample_mask_writes(nir_builder * b,nir_intrinsic_instr * intr,void * data)4890*61046927SAndroid Build Coastguard Worker bi_lower_sample_mask_writes(nir_builder *b, nir_intrinsic_instr *intr,
4891*61046927SAndroid Build Coastguard Worker void *data)
4892*61046927SAndroid Build Coastguard Worker {
4893*61046927SAndroid Build Coastguard Worker if (intr->intrinsic != nir_intrinsic_store_output)
4894*61046927SAndroid Build Coastguard Worker return false;
4895*61046927SAndroid Build Coastguard Worker
4896*61046927SAndroid Build Coastguard Worker assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
4897*61046927SAndroid Build Coastguard Worker if (nir_intrinsic_io_semantics(intr).location != FRAG_RESULT_SAMPLE_MASK)
4898*61046927SAndroid Build Coastguard Worker return false;
4899*61046927SAndroid Build Coastguard Worker
4900*61046927SAndroid Build Coastguard Worker b->cursor = nir_before_instr(&intr->instr);
4901*61046927SAndroid Build Coastguard Worker
4902*61046927SAndroid Build Coastguard Worker nir_def *orig = nir_load_sample_mask(b);
4903*61046927SAndroid Build Coastguard Worker
4904*61046927SAndroid Build Coastguard Worker nir_src_rewrite(&intr->src[0],
4905*61046927SAndroid Build Coastguard Worker nir_b32csel(b, nir_load_multisampled_pan(b),
4906*61046927SAndroid Build Coastguard Worker nir_iand(b, orig, intr->src[0].ssa), orig));
4907*61046927SAndroid Build Coastguard Worker return true;
4908*61046927SAndroid Build Coastguard Worker }
4909*61046927SAndroid Build Coastguard Worker
4910*61046927SAndroid Build Coastguard Worker static bool
bi_lower_load_output(nir_builder * b,nir_intrinsic_instr * intr,UNUSED void * data)4911*61046927SAndroid Build Coastguard Worker bi_lower_load_output(nir_builder *b, nir_intrinsic_instr *intr,
4912*61046927SAndroid Build Coastguard Worker UNUSED void *data)
4913*61046927SAndroid Build Coastguard Worker {
4914*61046927SAndroid Build Coastguard Worker if (intr->intrinsic != nir_intrinsic_load_output)
4915*61046927SAndroid Build Coastguard Worker return false;
4916*61046927SAndroid Build Coastguard Worker
4917*61046927SAndroid Build Coastguard Worker unsigned loc = nir_intrinsic_io_semantics(intr).location;
4918*61046927SAndroid Build Coastguard Worker assert(loc >= FRAG_RESULT_DATA0);
4919*61046927SAndroid Build Coastguard Worker unsigned rt = loc - FRAG_RESULT_DATA0;
4920*61046927SAndroid Build Coastguard Worker
4921*61046927SAndroid Build Coastguard Worker b->cursor = nir_before_instr(&intr->instr);
4922*61046927SAndroid Build Coastguard Worker
4923*61046927SAndroid Build Coastguard Worker nir_def *conversion = nir_load_rt_conversion_pan(
4924*61046927SAndroid Build Coastguard Worker b, .base = rt, .src_type = nir_intrinsic_dest_type(intr));
4925*61046927SAndroid Build Coastguard Worker
4926*61046927SAndroid Build Coastguard Worker nir_def *lowered = nir_load_converted_output_pan(
4927*61046927SAndroid Build Coastguard Worker b, intr->def.num_components, intr->def.bit_size, conversion,
4928*61046927SAndroid Build Coastguard Worker .dest_type = nir_intrinsic_dest_type(intr),
4929*61046927SAndroid Build Coastguard Worker .io_semantics = nir_intrinsic_io_semantics(intr));
4930*61046927SAndroid Build Coastguard Worker
4931*61046927SAndroid Build Coastguard Worker nir_def_rewrite_uses(&intr->def, lowered);
4932*61046927SAndroid Build Coastguard Worker return true;
4933*61046927SAndroid Build Coastguard Worker }
4934*61046927SAndroid Build Coastguard Worker
4935*61046927SAndroid Build Coastguard Worker bool
bifrost_nir_lower_load_output(nir_shader * nir)4936*61046927SAndroid Build Coastguard Worker bifrost_nir_lower_load_output(nir_shader *nir)
4937*61046927SAndroid Build Coastguard Worker {
4938*61046927SAndroid Build Coastguard Worker assert(nir->info.stage == MESA_SHADER_FRAGMENT);
4939*61046927SAndroid Build Coastguard Worker
4940*61046927SAndroid Build Coastguard Worker return nir_shader_intrinsics_pass(
4941*61046927SAndroid Build Coastguard Worker nir, bi_lower_load_output,
4942*61046927SAndroid Build Coastguard Worker nir_metadata_control_flow, NULL);
4943*61046927SAndroid Build Coastguard Worker }
4944*61046927SAndroid Build Coastguard Worker
4945*61046927SAndroid Build Coastguard Worker static bool
bi_lower_load_push_const_with_dyn_offset(nir_builder * b,nir_intrinsic_instr * intr,UNUSED void * data)4946*61046927SAndroid Build Coastguard Worker bi_lower_load_push_const_with_dyn_offset(nir_builder *b,
4947*61046927SAndroid Build Coastguard Worker nir_intrinsic_instr *intr,
4948*61046927SAndroid Build Coastguard Worker UNUSED void *data)
4949*61046927SAndroid Build Coastguard Worker {
4950*61046927SAndroid Build Coastguard Worker if (intr->intrinsic != nir_intrinsic_load_push_constant)
4951*61046927SAndroid Build Coastguard Worker return false;
4952*61046927SAndroid Build Coastguard Worker
4953*61046927SAndroid Build Coastguard Worker /* Offset is constant, nothing to do. */
4954*61046927SAndroid Build Coastguard Worker if (nir_src_is_const(intr->src[0]))
4955*61046927SAndroid Build Coastguard Worker return false;
4956*61046927SAndroid Build Coastguard Worker
4957*61046927SAndroid Build Coastguard Worker /* nir_lower_mem_access_bit_sizes() should have lowered load_push_constant
4958*61046927SAndroid Build Coastguard Worker * to 32-bit and a maximum of 4 components.
4959*61046927SAndroid Build Coastguard Worker */
4960*61046927SAndroid Build Coastguard Worker assert(intr->def.num_components <= 4);
4961*61046927SAndroid Build Coastguard Worker assert(intr->def.bit_size == 32);
4962*61046927SAndroid Build Coastguard Worker
4963*61046927SAndroid Build Coastguard Worker uint32_t base = nir_intrinsic_base(intr);
4964*61046927SAndroid Build Coastguard Worker uint32_t range = nir_intrinsic_range(intr);
4965*61046927SAndroid Build Coastguard Worker uint32_t nwords = intr->def.num_components;
4966*61046927SAndroid Build Coastguard Worker
4967*61046927SAndroid Build Coastguard Worker b->cursor = nir_before_instr(&intr->instr);
4968*61046927SAndroid Build Coastguard Worker
4969*61046927SAndroid Build Coastguard Worker /* Dynamic indexing is only allowed for vulkan push constants, which is
4970*61046927SAndroid Build Coastguard Worker * currently limited to 256 bytes. That gives us a maximum of 64 32-bit
4971*61046927SAndroid Build Coastguard Worker * words to read from.
4972*61046927SAndroid Build Coastguard Worker */
4973*61046927SAndroid Build Coastguard Worker nir_def *lut[64] = {0};
4974*61046927SAndroid Build Coastguard Worker
4975*61046927SAndroid Build Coastguard Worker assert(range / 4 <= ARRAY_SIZE(lut));
4976*61046927SAndroid Build Coastguard Worker
4977*61046927SAndroid Build Coastguard Worker /* Load all words in the range. */
4978*61046927SAndroid Build Coastguard Worker for (uint32_t w = 0; w < range / 4; w++) {
4979*61046927SAndroid Build Coastguard Worker lut[w] = nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0),
4980*61046927SAndroid Build Coastguard Worker .base = base + (w * 4), .range = 4);
4981*61046927SAndroid Build Coastguard Worker }
4982*61046927SAndroid Build Coastguard Worker
4983*61046927SAndroid Build Coastguard Worker nir_def *index = intr->src[0].ssa;
4984*61046927SAndroid Build Coastguard Worker
4985*61046927SAndroid Build Coastguard Worker /* Index is dynamic, we need to do iteratively CSEL the values based on
4986*61046927SAndroid Build Coastguard Worker * the index. We start with the highest bit in the index, and for each
4987*61046927SAndroid Build Coastguard Worker * iteration we divide the scope by two.
4988*61046927SAndroid Build Coastguard Worker */
4989*61046927SAndroid Build Coastguard Worker for (uint32_t lut_sz = ARRAY_SIZE(lut); lut_sz > 0; lut_sz /= 2) {
4990*61046927SAndroid Build Coastguard Worker uint32_t stride = lut_sz / 2;
4991*61046927SAndroid Build Coastguard Worker nir_def *bit_test = NULL;
4992*61046927SAndroid Build Coastguard Worker
4993*61046927SAndroid Build Coastguard Worker /* Stop when the LUT is smaller than the number of words we're trying to
4994*61046927SAndroid Build Coastguard Worker * extract.
4995*61046927SAndroid Build Coastguard Worker */
4996*61046927SAndroid Build Coastguard Worker if (lut_sz <= nwords)
4997*61046927SAndroid Build Coastguard Worker break;
4998*61046927SAndroid Build Coastguard Worker
4999*61046927SAndroid Build Coastguard Worker for (uint32_t i = 0; i < stride; i++) {
5000*61046927SAndroid Build Coastguard Worker /* We only need a CSEL if we have two values, otherwise we pick the
5001*61046927SAndroid Build Coastguard Worker * non-NULL value.
5002*61046927SAndroid Build Coastguard Worker */
5003*61046927SAndroid Build Coastguard Worker if (lut[i] && lut[i + stride]) {
5004*61046927SAndroid Build Coastguard Worker /* Create the test src on-demand. The stride is in 32-bit words,
5005*61046927SAndroid Build Coastguard Worker * multiply by four to convert it into a byte stride we can use
5006*61046927SAndroid Build Coastguard Worker * to test if the corresponding bit is set in the index src.
5007*61046927SAndroid Build Coastguard Worker */
5008*61046927SAndroid Build Coastguard Worker if (!bit_test)
5009*61046927SAndroid Build Coastguard Worker bit_test = nir_i2b(b, nir_iand_imm(b, index, stride * 4));
5010*61046927SAndroid Build Coastguard Worker
5011*61046927SAndroid Build Coastguard Worker lut[i] = nir_bcsel(b, bit_test, lut[i + stride], lut[i]);
5012*61046927SAndroid Build Coastguard Worker } else if (lut[i + stride]) {
5013*61046927SAndroid Build Coastguard Worker lut[i] = lut[i + stride];
5014*61046927SAndroid Build Coastguard Worker }
5015*61046927SAndroid Build Coastguard Worker }
5016*61046927SAndroid Build Coastguard Worker }
5017*61046927SAndroid Build Coastguard Worker
5018*61046927SAndroid Build Coastguard Worker nir_def *res = nir_vec(b, &lut[0], nwords);
5019*61046927SAndroid Build Coastguard Worker
5020*61046927SAndroid Build Coastguard Worker nir_def_rewrite_uses(&intr->def, res);
5021*61046927SAndroid Build Coastguard Worker nir_instr_remove(&intr->instr);
5022*61046927SAndroid Build Coastguard Worker return true;
5023*61046927SAndroid Build Coastguard Worker }
5024*61046927SAndroid Build Coastguard Worker
5025*61046927SAndroid Build Coastguard Worker void
bifrost_preprocess_nir(nir_shader * nir,unsigned gpu_id)5026*61046927SAndroid Build Coastguard Worker bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id)
5027*61046927SAndroid Build Coastguard Worker {
5028*61046927SAndroid Build Coastguard Worker /* Lower gl_Position pre-optimisation, but after lowering vars to ssa
5029*61046927SAndroid Build Coastguard Worker * (so we don't accidentally duplicate the epilogue since mesa/st has
5030*61046927SAndroid Build Coastguard Worker * messed with our I/O quite a bit already) */
5031*61046927SAndroid Build Coastguard Worker
5032*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_lower_vars_to_ssa);
5033*61046927SAndroid Build Coastguard Worker
5034*61046927SAndroid Build Coastguard Worker if (nir->info.stage == MESA_SHADER_VERTEX) {
5035*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_lower_viewport_transform);
5036*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_lower_point_size, 1.0, 0.0);
5037*61046927SAndroid Build Coastguard Worker
5038*61046927SAndroid Build Coastguard Worker nir_variable *psiz = nir_find_variable_with_location(
5039*61046927SAndroid Build Coastguard Worker nir, nir_var_shader_out, VARYING_SLOT_PSIZ);
5040*61046927SAndroid Build Coastguard Worker if (psiz != NULL)
5041*61046927SAndroid Build Coastguard Worker psiz->data.precision = GLSL_PRECISION_MEDIUM;
5042*61046927SAndroid Build Coastguard Worker }
5043*61046927SAndroid Build Coastguard Worker
5044*61046927SAndroid Build Coastguard Worker /* Get rid of any global vars before we lower to scratch. */
5045*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_lower_global_vars_to_local);
5046*61046927SAndroid Build Coastguard Worker
5047*61046927SAndroid Build Coastguard Worker /* Valhall introduces packed thread local storage, which improves cache
5048*61046927SAndroid Build Coastguard Worker * locality of TLS access. However, access to packed TLS cannot
5049*61046927SAndroid Build Coastguard Worker * straddle 16-byte boundaries. As such, when packed TLS is in use
5050*61046927SAndroid Build Coastguard Worker * (currently unconditional for Valhall), we force vec4 alignment for
5051*61046927SAndroid Build Coastguard Worker * scratch access.
5052*61046927SAndroid Build Coastguard Worker */
5053*61046927SAndroid Build Coastguard Worker glsl_type_size_align_func vars_to_scratch_size_align_func =
5054*61046927SAndroid Build Coastguard Worker (gpu_id >= 0x9000) ? glsl_get_vec4_size_align_bytes
5055*61046927SAndroid Build Coastguard Worker : glsl_get_natural_size_align_bytes;
5056*61046927SAndroid Build Coastguard Worker /* Lower large arrays to scratch and small arrays to bcsel */
5057*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_lower_vars_to_scratch, nir_var_function_temp, 256,
5058*61046927SAndroid Build Coastguard Worker vars_to_scratch_size_align_func, vars_to_scratch_size_align_func);
5059*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_function_temp, ~0);
5060*61046927SAndroid Build Coastguard Worker
5061*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_split_var_copies);
5062*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_lower_var_copies);
5063*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_lower_vars_to_ssa);
5064*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
5065*61046927SAndroid Build Coastguard Worker glsl_type_size, 0);
5066*61046927SAndroid Build Coastguard Worker
5067*61046927SAndroid Build Coastguard Worker /* nir_lower[_explicit]_io is lazy and emits mul+add chains even for
5068*61046927SAndroid Build Coastguard Worker * offsets it could figure out are constant. Do some constant folding
5069*61046927SAndroid Build Coastguard Worker * before bifrost_nir_lower_store_component below.
5070*61046927SAndroid Build Coastguard Worker */
5071*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_opt_constant_folding);
5072*61046927SAndroid Build Coastguard Worker
5073*61046927SAndroid Build Coastguard Worker if (nir->info.stage == MESA_SHADER_FRAGMENT) {
5074*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_lower_mediump_io,
5075*61046927SAndroid Build Coastguard Worker nir_var_shader_in | nir_var_shader_out,
5076*61046927SAndroid Build Coastguard Worker ~bi_fp32_varying_mask(nir), false);
5077*61046927SAndroid Build Coastguard Worker
5078*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_shader_intrinsics_pass, bi_lower_sample_mask_writes,
5079*61046927SAndroid Build Coastguard Worker nir_metadata_control_flow, NULL);
5080*61046927SAndroid Build Coastguard Worker
5081*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, bifrost_nir_lower_load_output);
5082*61046927SAndroid Build Coastguard Worker } else if (nir->info.stage == MESA_SHADER_VERTEX) {
5083*61046927SAndroid Build Coastguard Worker if (gpu_id >= 0x9000) {
5084*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_lower_mediump_io, nir_var_shader_out,
5085*61046927SAndroid Build Coastguard Worker BITFIELD64_BIT(VARYING_SLOT_PSIZ), false);
5086*61046927SAndroid Build Coastguard Worker }
5087*61046927SAndroid Build Coastguard Worker
5088*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, pan_nir_lower_store_component);
5089*61046927SAndroid Build Coastguard Worker }
5090*61046927SAndroid Build Coastguard Worker
5091*61046927SAndroid Build Coastguard Worker nir_lower_mem_access_bit_sizes_options mem_size_options = {
5092*61046927SAndroid Build Coastguard Worker .modes = nir_var_mem_ubo | nir_var_mem_push_const | nir_var_mem_ssbo |
5093*61046927SAndroid Build Coastguard Worker nir_var_mem_constant | nir_var_mem_task_payload |
5094*61046927SAndroid Build Coastguard Worker nir_var_shader_temp | nir_var_function_temp |
5095*61046927SAndroid Build Coastguard Worker nir_var_mem_global | nir_var_mem_shared,
5096*61046927SAndroid Build Coastguard Worker .callback = mem_access_size_align_cb,
5097*61046927SAndroid Build Coastguard Worker };
5098*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_lower_mem_access_bit_sizes, &mem_size_options);
5099*61046927SAndroid Build Coastguard Worker
5100*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_shader_intrinsics_pass,
5101*61046927SAndroid Build Coastguard Worker bi_lower_load_push_const_with_dyn_offset,
5102*61046927SAndroid Build Coastguard Worker nir_metadata_control_flow, NULL);
5103*61046927SAndroid Build Coastguard Worker
5104*61046927SAndroid Build Coastguard Worker nir_lower_ssbo_options ssbo_opts = {
5105*61046927SAndroid Build Coastguard Worker .native_loads = pan_arch(gpu_id) >= 9,
5106*61046927SAndroid Build Coastguard Worker .native_offset = pan_arch(gpu_id) >= 9,
5107*61046927SAndroid Build Coastguard Worker };
5108*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_lower_ssbo, &ssbo_opts);
5109*61046927SAndroid Build Coastguard Worker
5110*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, pan_lower_sample_pos);
5111*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_lower_bit_size, bi_lower_bit_size, NULL);
5112*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_lower_64bit_phis);
5113*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, pan_lower_helper_invocation);
5114*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_lower_int64);
5115*61046927SAndroid Build Coastguard Worker
5116*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_opt_idiv_const, 8);
5117*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_lower_idiv,
5118*61046927SAndroid Build Coastguard Worker &(nir_lower_idiv_options){.allow_fp16 = true});
5119*61046927SAndroid Build Coastguard Worker
5120*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_lower_tex,
5121*61046927SAndroid Build Coastguard Worker &(nir_lower_tex_options){
5122*61046927SAndroid Build Coastguard Worker .lower_txs_lod = true,
5123*61046927SAndroid Build Coastguard Worker .lower_txp = ~0,
5124*61046927SAndroid Build Coastguard Worker .lower_tg4_broadcom_swizzle = true,
5125*61046927SAndroid Build Coastguard Worker .lower_txd = true,
5126*61046927SAndroid Build Coastguard Worker .lower_invalid_implicit_lod = true,
5127*61046927SAndroid Build Coastguard Worker .lower_index_to_offset = true,
5128*61046927SAndroid Build Coastguard Worker });
5129*61046927SAndroid Build Coastguard Worker
5130*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_lower_image_atomics_to_global);
5131*61046927SAndroid Build Coastguard Worker
5132*61046927SAndroid Build Coastguard Worker /* on bifrost, lower MSAA load/stores to 3D load/stores */
5133*61046927SAndroid Build Coastguard Worker if (pan_arch(gpu_id) < 9)
5134*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, pan_nir_lower_image_ms);
5135*61046927SAndroid Build Coastguard Worker
5136*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_lower_alu_to_scalar, bi_scalarize_filter, NULL);
5137*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
5138*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_lower_phis_to_scalar, true);
5139*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_lower_flrp, 16 | 32 | 64, false /* always_precise */);
5140*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_lower_var_copies);
5141*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_lower_alu);
5142*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_lower_frag_coord_to_pixel_coord);
5143*61046927SAndroid Build Coastguard Worker }
5144*61046927SAndroid Build Coastguard Worker
5145*61046927SAndroid Build Coastguard Worker static bi_context *
bi_compile_variant_nir(nir_shader * nir,const struct panfrost_compile_inputs * inputs,struct util_dynarray * binary,struct bi_shader_info info,enum bi_idvs_mode idvs)5146*61046927SAndroid Build Coastguard Worker bi_compile_variant_nir(nir_shader *nir,
5147*61046927SAndroid Build Coastguard Worker const struct panfrost_compile_inputs *inputs,
5148*61046927SAndroid Build Coastguard Worker struct util_dynarray *binary, struct bi_shader_info info,
5149*61046927SAndroid Build Coastguard Worker enum bi_idvs_mode idvs)
5150*61046927SAndroid Build Coastguard Worker {
5151*61046927SAndroid Build Coastguard Worker bi_context *ctx = rzalloc(NULL, bi_context);
5152*61046927SAndroid Build Coastguard Worker
5153*61046927SAndroid Build Coastguard Worker /* There may be another program in the dynarray, start at the end */
5154*61046927SAndroid Build Coastguard Worker unsigned offset = binary->size;
5155*61046927SAndroid Build Coastguard Worker
5156*61046927SAndroid Build Coastguard Worker ctx->inputs = inputs;
5157*61046927SAndroid Build Coastguard Worker ctx->nir = nir;
5158*61046927SAndroid Build Coastguard Worker ctx->stage = nir->info.stage;
5159*61046927SAndroid Build Coastguard Worker ctx->quirks = bifrost_get_quirks(inputs->gpu_id);
5160*61046927SAndroid Build Coastguard Worker ctx->arch = pan_arch(inputs->gpu_id);
5161*61046927SAndroid Build Coastguard Worker ctx->info = info;
5162*61046927SAndroid Build Coastguard Worker ctx->idvs = idvs;
5163*61046927SAndroid Build Coastguard Worker ctx->malloc_idvs = (ctx->arch >= 9) && !inputs->no_idvs;
5164*61046927SAndroid Build Coastguard Worker
5165*61046927SAndroid Build Coastguard Worker if (idvs != BI_IDVS_NONE) {
5166*61046927SAndroid Build Coastguard Worker /* Specializing shaders for IDVS is destructive, so we need to
5167*61046927SAndroid Build Coastguard Worker * clone. However, the last (second) IDVS shader does not need
5168*61046927SAndroid Build Coastguard Worker * to be preserved so we can skip cloning that one.
5169*61046927SAndroid Build Coastguard Worker */
5170*61046927SAndroid Build Coastguard Worker if (offset == 0)
5171*61046927SAndroid Build Coastguard Worker ctx->nir = nir = nir_shader_clone(ctx, nir);
5172*61046927SAndroid Build Coastguard Worker
5173*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, nir_shader_instructions_pass, bifrost_nir_specialize_idvs,
5174*61046927SAndroid Build Coastguard Worker nir_metadata_control_flow, &idvs);
5175*61046927SAndroid Build Coastguard Worker
5176*61046927SAndroid Build Coastguard Worker /* After specializing, clean up the mess */
5177*61046927SAndroid Build Coastguard Worker bool progress = true;
5178*61046927SAndroid Build Coastguard Worker
5179*61046927SAndroid Build Coastguard Worker while (progress) {
5180*61046927SAndroid Build Coastguard Worker progress = false;
5181*61046927SAndroid Build Coastguard Worker
5182*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, nir_opt_dce);
5183*61046927SAndroid Build Coastguard Worker NIR_PASS(progress, nir, nir_opt_dead_cf);
5184*61046927SAndroid Build Coastguard Worker }
5185*61046927SAndroid Build Coastguard Worker }
5186*61046927SAndroid Build Coastguard Worker
5187*61046927SAndroid Build Coastguard Worker /* If nothing is pushed, all UBOs need to be uploaded */
5188*61046927SAndroid Build Coastguard Worker ctx->ubo_mask = ~0;
5189*61046927SAndroid Build Coastguard Worker
5190*61046927SAndroid Build Coastguard Worker list_inithead(&ctx->blocks);
5191*61046927SAndroid Build Coastguard Worker
5192*61046927SAndroid Build Coastguard Worker bool skip_internal = nir->info.internal;
5193*61046927SAndroid Build Coastguard Worker skip_internal &= !(bifrost_debug & BIFROST_DBG_INTERNAL);
5194*61046927SAndroid Build Coastguard Worker
5195*61046927SAndroid Build Coastguard Worker if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal) {
5196*61046927SAndroid Build Coastguard Worker nir_print_shader(nir, stdout);
5197*61046927SAndroid Build Coastguard Worker }
5198*61046927SAndroid Build Coastguard Worker
5199*61046927SAndroid Build Coastguard Worker ctx->allocated_vec = _mesa_hash_table_u64_create(ctx);
5200*61046927SAndroid Build Coastguard Worker
5201*61046927SAndroid Build Coastguard Worker nir_foreach_function_impl(impl, nir) {
5202*61046927SAndroid Build Coastguard Worker nir_index_blocks(impl);
5203*61046927SAndroid Build Coastguard Worker
5204*61046927SAndroid Build Coastguard Worker ctx->indexed_nir_blocks =
5205*61046927SAndroid Build Coastguard Worker rzalloc_array(ctx, bi_block *, impl->num_blocks);
5206*61046927SAndroid Build Coastguard Worker
5207*61046927SAndroid Build Coastguard Worker ctx->ssa_alloc += impl->ssa_alloc;
5208*61046927SAndroid Build Coastguard Worker
5209*61046927SAndroid Build Coastguard Worker emit_cf_list(ctx, &impl->body);
5210*61046927SAndroid Build Coastguard Worker bi_emit_phis_deferred(ctx);
5211*61046927SAndroid Build Coastguard Worker break; /* TODO: Multi-function shaders */
5212*61046927SAndroid Build Coastguard Worker }
5213*61046927SAndroid Build Coastguard Worker
5214*61046927SAndroid Build Coastguard Worker /* Index blocks now that we're done emitting */
5215*61046927SAndroid Build Coastguard Worker bi_foreach_block(ctx, block) {
5216*61046927SAndroid Build Coastguard Worker block->index = ctx->num_blocks++;
5217*61046927SAndroid Build Coastguard Worker }
5218*61046927SAndroid Build Coastguard Worker
5219*61046927SAndroid Build Coastguard Worker bi_validate(ctx, "NIR -> BIR");
5220*61046927SAndroid Build Coastguard Worker
5221*61046927SAndroid Build Coastguard Worker /* If the shader doesn't write any colour or depth outputs, it may
5222*61046927SAndroid Build Coastguard Worker * still need an ATEST at the very end! */
5223*61046927SAndroid Build Coastguard Worker bool need_dummy_atest = (ctx->stage == MESA_SHADER_FRAGMENT) &&
5224*61046927SAndroid Build Coastguard Worker !ctx->emitted_atest && !bi_skip_atest(ctx, false);
5225*61046927SAndroid Build Coastguard Worker
5226*61046927SAndroid Build Coastguard Worker if (need_dummy_atest) {
5227*61046927SAndroid Build Coastguard Worker bi_block *end = list_last_entry(&ctx->blocks, bi_block, link);
5228*61046927SAndroid Build Coastguard Worker bi_builder b = bi_init_builder(ctx, bi_after_block(end));
5229*61046927SAndroid Build Coastguard Worker bi_emit_atest(&b, bi_zero());
5230*61046927SAndroid Build Coastguard Worker }
5231*61046927SAndroid Build Coastguard Worker
5232*61046927SAndroid Build Coastguard Worker bool optimize = !(bifrost_debug & BIFROST_DBG_NOOPT);
5233*61046927SAndroid Build Coastguard Worker
5234*61046927SAndroid Build Coastguard Worker /* Runs before constant folding */
5235*61046927SAndroid Build Coastguard Worker bi_lower_swizzle(ctx);
5236*61046927SAndroid Build Coastguard Worker bi_validate(ctx, "Early lowering");
5237*61046927SAndroid Build Coastguard Worker
5238*61046927SAndroid Build Coastguard Worker /* Runs before copy prop */
5239*61046927SAndroid Build Coastguard Worker if (optimize && !ctx->inputs->no_ubo_to_push) {
5240*61046927SAndroid Build Coastguard Worker bi_opt_push_ubo(ctx);
5241*61046927SAndroid Build Coastguard Worker }
5242*61046927SAndroid Build Coastguard Worker
5243*61046927SAndroid Build Coastguard Worker if (likely(optimize)) {
5244*61046927SAndroid Build Coastguard Worker bi_opt_copy_prop(ctx);
5245*61046927SAndroid Build Coastguard Worker
5246*61046927SAndroid Build Coastguard Worker while (bi_opt_constant_fold(ctx))
5247*61046927SAndroid Build Coastguard Worker bi_opt_copy_prop(ctx);
5248*61046927SAndroid Build Coastguard Worker
5249*61046927SAndroid Build Coastguard Worker bi_opt_mod_prop_forward(ctx);
5250*61046927SAndroid Build Coastguard Worker bi_opt_mod_prop_backward(ctx);
5251*61046927SAndroid Build Coastguard Worker
5252*61046927SAndroid Build Coastguard Worker /* Push LD_VAR_IMM/VAR_TEX instructions. Must run after
5253*61046927SAndroid Build Coastguard Worker * mod_prop_backward to fuse VAR_TEX */
5254*61046927SAndroid Build Coastguard Worker if (ctx->arch == 7 && ctx->stage == MESA_SHADER_FRAGMENT &&
5255*61046927SAndroid Build Coastguard Worker !(bifrost_debug & BIFROST_DBG_NOPRELOAD)) {
5256*61046927SAndroid Build Coastguard Worker bi_opt_dce(ctx, false);
5257*61046927SAndroid Build Coastguard Worker bi_opt_message_preload(ctx);
5258*61046927SAndroid Build Coastguard Worker bi_opt_copy_prop(ctx);
5259*61046927SAndroid Build Coastguard Worker }
5260*61046927SAndroid Build Coastguard Worker
5261*61046927SAndroid Build Coastguard Worker bi_opt_dce(ctx, false);
5262*61046927SAndroid Build Coastguard Worker bi_opt_cse(ctx);
5263*61046927SAndroid Build Coastguard Worker bi_opt_dce(ctx, false);
5264*61046927SAndroid Build Coastguard Worker if (!ctx->inputs->no_ubo_to_push)
5265*61046927SAndroid Build Coastguard Worker bi_opt_reorder_push(ctx);
5266*61046927SAndroid Build Coastguard Worker bi_validate(ctx, "Optimization passes");
5267*61046927SAndroid Build Coastguard Worker }
5268*61046927SAndroid Build Coastguard Worker
5269*61046927SAndroid Build Coastguard Worker bi_lower_opt_instructions(ctx);
5270*61046927SAndroid Build Coastguard Worker
5271*61046927SAndroid Build Coastguard Worker if (ctx->arch >= 9) {
5272*61046927SAndroid Build Coastguard Worker va_optimize(ctx);
5273*61046927SAndroid Build Coastguard Worker va_lower_isel(ctx);
5274*61046927SAndroid Build Coastguard Worker
5275*61046927SAndroid Build Coastguard Worker bi_foreach_instr_global_safe(ctx, I) {
5276*61046927SAndroid Build Coastguard Worker /* Phis become single moves so shouldn't be affected */
5277*61046927SAndroid Build Coastguard Worker if (I->op == BI_OPCODE_PHI)
5278*61046927SAndroid Build Coastguard Worker continue;
5279*61046927SAndroid Build Coastguard Worker
5280*61046927SAndroid Build Coastguard Worker va_lower_constants(ctx, I);
5281*61046927SAndroid Build Coastguard Worker
5282*61046927SAndroid Build Coastguard Worker bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
5283*61046927SAndroid Build Coastguard Worker va_repair_fau(&b, I);
5284*61046927SAndroid Build Coastguard Worker }
5285*61046927SAndroid Build Coastguard Worker
5286*61046927SAndroid Build Coastguard Worker /* We need to clean up after constant lowering */
5287*61046927SAndroid Build Coastguard Worker if (likely(optimize)) {
5288*61046927SAndroid Build Coastguard Worker bi_opt_cse(ctx);
5289*61046927SAndroid Build Coastguard Worker bi_opt_dce(ctx, false);
5290*61046927SAndroid Build Coastguard Worker }
5291*61046927SAndroid Build Coastguard Worker
5292*61046927SAndroid Build Coastguard Worker bi_validate(ctx, "Valhall passes");
5293*61046927SAndroid Build Coastguard Worker }
5294*61046927SAndroid Build Coastguard Worker
5295*61046927SAndroid Build Coastguard Worker bi_foreach_block(ctx, block) {
5296*61046927SAndroid Build Coastguard Worker bi_lower_branch(ctx, block);
5297*61046927SAndroid Build Coastguard Worker }
5298*61046927SAndroid Build Coastguard Worker
5299*61046927SAndroid Build Coastguard Worker if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal)
5300*61046927SAndroid Build Coastguard Worker bi_print_shader(ctx, stdout);
5301*61046927SAndroid Build Coastguard Worker
5302*61046927SAndroid Build Coastguard Worker /* Analyze before register allocation to avoid false dependencies. The
5303*61046927SAndroid Build Coastguard Worker * skip bit is a function of only the data flow graph and is invariant
5304*61046927SAndroid Build Coastguard Worker * under valid scheduling. Helpers are only defined for fragment
5305*61046927SAndroid Build Coastguard Worker * shaders, so this analysis is only required in fragment shaders.
5306*61046927SAndroid Build Coastguard Worker */
5307*61046927SAndroid Build Coastguard Worker if (ctx->stage == MESA_SHADER_FRAGMENT) {
5308*61046927SAndroid Build Coastguard Worker bi_opt_dce(ctx, false);
5309*61046927SAndroid Build Coastguard Worker bi_analyze_helper_requirements(ctx);
5310*61046927SAndroid Build Coastguard Worker }
5311*61046927SAndroid Build Coastguard Worker
5312*61046927SAndroid Build Coastguard Worker /* Fuse TEXC after analyzing helper requirements so the analysis
5313*61046927SAndroid Build Coastguard Worker * doesn't have to know about dual textures */
5314*61046927SAndroid Build Coastguard Worker if (likely(optimize)) {
5315*61046927SAndroid Build Coastguard Worker bi_opt_fuse_dual_texture(ctx);
5316*61046927SAndroid Build Coastguard Worker }
5317*61046927SAndroid Build Coastguard Worker
5318*61046927SAndroid Build Coastguard Worker /* Lower FAU after fusing dual texture, because fusing dual texture
5319*61046927SAndroid Build Coastguard Worker * creates new immediates that themselves may need lowering.
5320*61046927SAndroid Build Coastguard Worker */
5321*61046927SAndroid Build Coastguard Worker if (ctx->arch <= 8) {
5322*61046927SAndroid Build Coastguard Worker bi_lower_fau(ctx);
5323*61046927SAndroid Build Coastguard Worker }
5324*61046927SAndroid Build Coastguard Worker
5325*61046927SAndroid Build Coastguard Worker /* Lowering FAU can create redundant moves. Run CSE+DCE to clean up. */
5326*61046927SAndroid Build Coastguard Worker if (likely(optimize)) {
5327*61046927SAndroid Build Coastguard Worker bi_opt_cse(ctx);
5328*61046927SAndroid Build Coastguard Worker bi_opt_dce(ctx, false);
5329*61046927SAndroid Build Coastguard Worker }
5330*61046927SAndroid Build Coastguard Worker
5331*61046927SAndroid Build Coastguard Worker bi_validate(ctx, "Late lowering");
5332*61046927SAndroid Build Coastguard Worker
5333*61046927SAndroid Build Coastguard Worker if (likely(!(bifrost_debug & BIFROST_DBG_NOPSCHED))) {
5334*61046927SAndroid Build Coastguard Worker bi_pressure_schedule(ctx);
5335*61046927SAndroid Build Coastguard Worker bi_validate(ctx, "Pre-RA scheduling");
5336*61046927SAndroid Build Coastguard Worker }
5337*61046927SAndroid Build Coastguard Worker
5338*61046927SAndroid Build Coastguard Worker bi_register_allocate(ctx);
5339*61046927SAndroid Build Coastguard Worker
5340*61046927SAndroid Build Coastguard Worker if (likely(optimize))
5341*61046927SAndroid Build Coastguard Worker bi_opt_post_ra(ctx);
5342*61046927SAndroid Build Coastguard Worker
5343*61046927SAndroid Build Coastguard Worker if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal)
5344*61046927SAndroid Build Coastguard Worker bi_print_shader(ctx, stdout);
5345*61046927SAndroid Build Coastguard Worker
5346*61046927SAndroid Build Coastguard Worker if (ctx->arch >= 9) {
5347*61046927SAndroid Build Coastguard Worker va_assign_slots(ctx);
5348*61046927SAndroid Build Coastguard Worker va_insert_flow_control_nops(ctx);
5349*61046927SAndroid Build Coastguard Worker va_merge_flow(ctx);
5350*61046927SAndroid Build Coastguard Worker va_mark_last(ctx);
5351*61046927SAndroid Build Coastguard Worker } else {
5352*61046927SAndroid Build Coastguard Worker bi_schedule(ctx);
5353*61046927SAndroid Build Coastguard Worker bi_assign_scoreboard(ctx);
5354*61046927SAndroid Build Coastguard Worker
5355*61046927SAndroid Build Coastguard Worker /* Analyze after scheduling since we depend on instruction
5356*61046927SAndroid Build Coastguard Worker * order. Valhall calls as part of va_insert_flow_control_nops,
5357*61046927SAndroid Build Coastguard Worker * as the handling for clauses differs from instructions.
5358*61046927SAndroid Build Coastguard Worker */
5359*61046927SAndroid Build Coastguard Worker bi_analyze_helper_terminate(ctx);
5360*61046927SAndroid Build Coastguard Worker bi_mark_clauses_td(ctx);
5361*61046927SAndroid Build Coastguard Worker }
5362*61046927SAndroid Build Coastguard Worker
5363*61046927SAndroid Build Coastguard Worker if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal)
5364*61046927SAndroid Build Coastguard Worker bi_print_shader(ctx, stdout);
5365*61046927SAndroid Build Coastguard Worker
5366*61046927SAndroid Build Coastguard Worker if (ctx->arch <= 8) {
5367*61046927SAndroid Build Coastguard Worker bi_pack_clauses(ctx, binary, offset);
5368*61046927SAndroid Build Coastguard Worker } else {
5369*61046927SAndroid Build Coastguard Worker bi_pack_valhall(ctx, binary);
5370*61046927SAndroid Build Coastguard Worker }
5371*61046927SAndroid Build Coastguard Worker
5372*61046927SAndroid Build Coastguard Worker if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal) {
5373*61046927SAndroid Build Coastguard Worker if (ctx->arch <= 8) {
5374*61046927SAndroid Build Coastguard Worker disassemble_bifrost(stdout, binary->data + offset,
5375*61046927SAndroid Build Coastguard Worker binary->size - offset,
5376*61046927SAndroid Build Coastguard Worker bifrost_debug & BIFROST_DBG_VERBOSE);
5377*61046927SAndroid Build Coastguard Worker } else {
5378*61046927SAndroid Build Coastguard Worker disassemble_valhall(stdout, binary->data + offset,
5379*61046927SAndroid Build Coastguard Worker binary->size - offset,
5380*61046927SAndroid Build Coastguard Worker bifrost_debug & BIFROST_DBG_VERBOSE);
5381*61046927SAndroid Build Coastguard Worker }
5382*61046927SAndroid Build Coastguard Worker
5383*61046927SAndroid Build Coastguard Worker fflush(stdout);
5384*61046927SAndroid Build Coastguard Worker }
5385*61046927SAndroid Build Coastguard Worker
5386*61046927SAndroid Build Coastguard Worker if (!skip_internal &&
5387*61046927SAndroid Build Coastguard Worker ((bifrost_debug & BIFROST_DBG_SHADERDB) || inputs->debug)) {
5388*61046927SAndroid Build Coastguard Worker char *shaderdb;
5389*61046927SAndroid Build Coastguard Worker
5390*61046927SAndroid Build Coastguard Worker if (ctx->arch >= 9) {
5391*61046927SAndroid Build Coastguard Worker shaderdb = va_print_stats(ctx, binary->size - offset);
5392*61046927SAndroid Build Coastguard Worker } else {
5393*61046927SAndroid Build Coastguard Worker shaderdb = bi_print_stats(ctx, binary->size - offset);
5394*61046927SAndroid Build Coastguard Worker }
5395*61046927SAndroid Build Coastguard Worker
5396*61046927SAndroid Build Coastguard Worker if (bifrost_debug & BIFROST_DBG_SHADERDB)
5397*61046927SAndroid Build Coastguard Worker fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
5398*61046927SAndroid Build Coastguard Worker
5399*61046927SAndroid Build Coastguard Worker if (inputs->debug)
5400*61046927SAndroid Build Coastguard Worker util_debug_message(inputs->debug, SHADER_INFO, "%s", shaderdb);
5401*61046927SAndroid Build Coastguard Worker
5402*61046927SAndroid Build Coastguard Worker ralloc_free(shaderdb);
5403*61046927SAndroid Build Coastguard Worker }
5404*61046927SAndroid Build Coastguard Worker
5405*61046927SAndroid Build Coastguard Worker return ctx;
5406*61046927SAndroid Build Coastguard Worker }
5407*61046927SAndroid Build Coastguard Worker
5408*61046927SAndroid Build Coastguard Worker static void
bi_compile_variant(nir_shader * nir,const struct panfrost_compile_inputs * inputs,struct util_dynarray * binary,struct pan_shader_info * info,enum bi_idvs_mode idvs)5409*61046927SAndroid Build Coastguard Worker bi_compile_variant(nir_shader *nir,
5410*61046927SAndroid Build Coastguard Worker const struct panfrost_compile_inputs *inputs,
5411*61046927SAndroid Build Coastguard Worker struct util_dynarray *binary, struct pan_shader_info *info,
5412*61046927SAndroid Build Coastguard Worker enum bi_idvs_mode idvs)
5413*61046927SAndroid Build Coastguard Worker {
5414*61046927SAndroid Build Coastguard Worker struct bi_shader_info local_info = {
5415*61046927SAndroid Build Coastguard Worker .push = &info->push,
5416*61046927SAndroid Build Coastguard Worker .bifrost = &info->bifrost,
5417*61046927SAndroid Build Coastguard Worker .tls_size = info->tls_size,
5418*61046927SAndroid Build Coastguard Worker .push_offset = info->push.count,
5419*61046927SAndroid Build Coastguard Worker };
5420*61046927SAndroid Build Coastguard Worker
5421*61046927SAndroid Build Coastguard Worker unsigned offset = binary->size;
5422*61046927SAndroid Build Coastguard Worker
5423*61046927SAndroid Build Coastguard Worker /* If there is no position shader (gl_Position is not written), then
5424*61046927SAndroid Build Coastguard Worker * there is no need to build a varying shader either. This case is hit
5425*61046927SAndroid Build Coastguard Worker * for transform feedback only vertex shaders which only make sense with
5426*61046927SAndroid Build Coastguard Worker * rasterizer discard.
5427*61046927SAndroid Build Coastguard Worker */
5428*61046927SAndroid Build Coastguard Worker if ((offset == 0) && (idvs == BI_IDVS_VARYING))
5429*61046927SAndroid Build Coastguard Worker return;
5430*61046927SAndroid Build Coastguard Worker
5431*61046927SAndroid Build Coastguard Worker /* Software invariant: Only a secondary shader can appear at a nonzero
5432*61046927SAndroid Build Coastguard Worker * offset, to keep the ABI simple. */
5433*61046927SAndroid Build Coastguard Worker assert((offset == 0) ^ (idvs == BI_IDVS_VARYING));
5434*61046927SAndroid Build Coastguard Worker
5435*61046927SAndroid Build Coastguard Worker bi_context *ctx =
5436*61046927SAndroid Build Coastguard Worker bi_compile_variant_nir(nir, inputs, binary, local_info, idvs);
5437*61046927SAndroid Build Coastguard Worker
5438*61046927SAndroid Build Coastguard Worker /* A register is preloaded <==> it is live before the first block */
5439*61046927SAndroid Build Coastguard Worker bi_block *first_block = list_first_entry(&ctx->blocks, bi_block, link);
5440*61046927SAndroid Build Coastguard Worker uint64_t preload = first_block->reg_live_in;
5441*61046927SAndroid Build Coastguard Worker
5442*61046927SAndroid Build Coastguard Worker /* If multisampling is used with a blend shader, the blend shader needs
5443*61046927SAndroid Build Coastguard Worker * to access the sample coverage mask in r60 and the sample ID in r61.
5444*61046927SAndroid Build Coastguard Worker * Blend shaders run in the same context as fragment shaders, so if a
5445*61046927SAndroid Build Coastguard Worker * blend shader could run, we need to preload these registers
5446*61046927SAndroid Build Coastguard Worker * conservatively. There is believed to be little cost to doing so, so
5447*61046927SAndroid Build Coastguard Worker * do so always to avoid variants of the preload descriptor.
5448*61046927SAndroid Build Coastguard Worker *
5449*61046927SAndroid Build Coastguard Worker * We only do this on Valhall, as Bifrost has to update the RSD for
5450*61046927SAndroid Build Coastguard Worker * multisampling w/ blend shader anyway, so this is handled in the
5451*61046927SAndroid Build Coastguard Worker * driver. We could unify the paths if the cost is acceptable.
5452*61046927SAndroid Build Coastguard Worker */
5453*61046927SAndroid Build Coastguard Worker if (nir->info.stage == MESA_SHADER_FRAGMENT && ctx->arch >= 9)
5454*61046927SAndroid Build Coastguard Worker preload |= BITFIELD64_BIT(60) | BITFIELD64_BIT(61);
5455*61046927SAndroid Build Coastguard Worker
5456*61046927SAndroid Build Coastguard Worker info->ubo_mask |= ctx->ubo_mask;
5457*61046927SAndroid Build Coastguard Worker info->tls_size = MAX2(info->tls_size, ctx->info.tls_size);
5458*61046927SAndroid Build Coastguard Worker
5459*61046927SAndroid Build Coastguard Worker if (idvs == BI_IDVS_VARYING) {
5460*61046927SAndroid Build Coastguard Worker info->vs.secondary_enable = (binary->size > offset);
5461*61046927SAndroid Build Coastguard Worker info->vs.secondary_offset = offset;
5462*61046927SAndroid Build Coastguard Worker info->vs.secondary_preload = preload;
5463*61046927SAndroid Build Coastguard Worker info->vs.secondary_work_reg_count = ctx->info.work_reg_count;
5464*61046927SAndroid Build Coastguard Worker } else {
5465*61046927SAndroid Build Coastguard Worker info->preload = preload;
5466*61046927SAndroid Build Coastguard Worker info->work_reg_count = ctx->info.work_reg_count;
5467*61046927SAndroid Build Coastguard Worker }
5468*61046927SAndroid Build Coastguard Worker
5469*61046927SAndroid Build Coastguard Worker if (idvs == BI_IDVS_POSITION && !nir->info.internal &&
5470*61046927SAndroid Build Coastguard Worker nir->info.outputs_written & BITFIELD_BIT(VARYING_SLOT_PSIZ)) {
5471*61046927SAndroid Build Coastguard Worker /* Find the psiz write */
5472*61046927SAndroid Build Coastguard Worker bi_instr *write = NULL;
5473*61046927SAndroid Build Coastguard Worker
5474*61046927SAndroid Build Coastguard Worker bi_foreach_instr_global(ctx, I) {
5475*61046927SAndroid Build Coastguard Worker if (I->op == BI_OPCODE_STORE_I16 && I->seg == BI_SEG_POS) {
5476*61046927SAndroid Build Coastguard Worker write = I;
5477*61046927SAndroid Build Coastguard Worker break;
5478*61046927SAndroid Build Coastguard Worker }
5479*61046927SAndroid Build Coastguard Worker }
5480*61046927SAndroid Build Coastguard Worker
5481*61046927SAndroid Build Coastguard Worker assert(write != NULL);
5482*61046927SAndroid Build Coastguard Worker
5483*61046927SAndroid Build Coastguard Worker /* NOP it out, preserving its flow control. TODO: maybe DCE */
5484*61046927SAndroid Build Coastguard Worker if (write->flow) {
5485*61046927SAndroid Build Coastguard Worker bi_builder b = bi_init_builder(ctx, bi_before_instr(write));
5486*61046927SAndroid Build Coastguard Worker bi_instr *nop = bi_nop(&b);
5487*61046927SAndroid Build Coastguard Worker nop->flow = write->flow;
5488*61046927SAndroid Build Coastguard Worker }
5489*61046927SAndroid Build Coastguard Worker
5490*61046927SAndroid Build Coastguard Worker bi_remove_instruction(write);
5491*61046927SAndroid Build Coastguard Worker
5492*61046927SAndroid Build Coastguard Worker info->vs.no_psiz_offset = binary->size;
5493*61046927SAndroid Build Coastguard Worker bi_pack_valhall(ctx, binary);
5494*61046927SAndroid Build Coastguard Worker }
5495*61046927SAndroid Build Coastguard Worker
5496*61046927SAndroid Build Coastguard Worker ralloc_free(ctx);
5497*61046927SAndroid Build Coastguard Worker }
5498*61046927SAndroid Build Coastguard Worker
5499*61046927SAndroid Build Coastguard Worker /* Decide if Index-Driven Vertex Shading should be used for a given shader */
5500*61046927SAndroid Build Coastguard Worker static bool
bi_should_idvs(nir_shader * nir,const struct panfrost_compile_inputs * inputs)5501*61046927SAndroid Build Coastguard Worker bi_should_idvs(nir_shader *nir, const struct panfrost_compile_inputs *inputs)
5502*61046927SAndroid Build Coastguard Worker {
5503*61046927SAndroid Build Coastguard Worker /* Opt-out */
5504*61046927SAndroid Build Coastguard Worker if (inputs->no_idvs || bifrost_debug & BIFROST_DBG_NOIDVS)
5505*61046927SAndroid Build Coastguard Worker return false;
5506*61046927SAndroid Build Coastguard Worker
5507*61046927SAndroid Build Coastguard Worker /* IDVS splits up vertex shaders, not defined on other shader stages */
5508*61046927SAndroid Build Coastguard Worker if (nir->info.stage != MESA_SHADER_VERTEX)
5509*61046927SAndroid Build Coastguard Worker return false;
5510*61046927SAndroid Build Coastguard Worker
5511*61046927SAndroid Build Coastguard Worker /* Bifrost cannot write gl_PointSize during IDVS */
5512*61046927SAndroid Build Coastguard Worker if ((inputs->gpu_id < 0x9000) &&
5513*61046927SAndroid Build Coastguard Worker nir->info.outputs_written & BITFIELD_BIT(VARYING_SLOT_PSIZ))
5514*61046927SAndroid Build Coastguard Worker return false;
5515*61046927SAndroid Build Coastguard Worker
5516*61046927SAndroid Build Coastguard Worker /* Otherwise, IDVS is usually better */
5517*61046927SAndroid Build Coastguard Worker return true;
5518*61046927SAndroid Build Coastguard Worker }
5519*61046927SAndroid Build Coastguard Worker
5520*61046927SAndroid Build Coastguard Worker void
bifrost_compile_shader_nir(nir_shader * nir,const struct panfrost_compile_inputs * inputs,struct util_dynarray * binary,struct pan_shader_info * info)5521*61046927SAndroid Build Coastguard Worker bifrost_compile_shader_nir(nir_shader *nir,
5522*61046927SAndroid Build Coastguard Worker const struct panfrost_compile_inputs *inputs,
5523*61046927SAndroid Build Coastguard Worker struct util_dynarray *binary,
5524*61046927SAndroid Build Coastguard Worker struct pan_shader_info *info)
5525*61046927SAndroid Build Coastguard Worker {
5526*61046927SAndroid Build Coastguard Worker bifrost_debug = debug_get_option_bifrost_debug();
5527*61046927SAndroid Build Coastguard Worker
5528*61046927SAndroid Build Coastguard Worker /* Combine stores late, to give the driver a chance to lower dual-source
5529*61046927SAndroid Build Coastguard Worker * blending as regular store_output intrinsics.
5530*61046927SAndroid Build Coastguard Worker */
5531*61046927SAndroid Build Coastguard Worker NIR_PASS_V(nir, pan_nir_lower_zs_store);
5532*61046927SAndroid Build Coastguard Worker
5533*61046927SAndroid Build Coastguard Worker bi_optimize_nir(nir, inputs->gpu_id, inputs->is_blend);
5534*61046927SAndroid Build Coastguard Worker
5535*61046927SAndroid Build Coastguard Worker info->tls_size = nir->scratch_size;
5536*61046927SAndroid Build Coastguard Worker info->vs.idvs = bi_should_idvs(nir, inputs);
5537*61046927SAndroid Build Coastguard Worker
5538*61046927SAndroid Build Coastguard Worker pan_nir_collect_varyings(nir, info);
5539*61046927SAndroid Build Coastguard Worker
5540*61046927SAndroid Build Coastguard Worker if (info->vs.idvs) {
5541*61046927SAndroid Build Coastguard Worker bi_compile_variant(nir, inputs, binary, info, BI_IDVS_POSITION);
5542*61046927SAndroid Build Coastguard Worker bi_compile_variant(nir, inputs, binary, info, BI_IDVS_VARYING);
5543*61046927SAndroid Build Coastguard Worker } else {
5544*61046927SAndroid Build Coastguard Worker bi_compile_variant(nir, inputs, binary, info, BI_IDVS_NONE);
5545*61046927SAndroid Build Coastguard Worker }
5546*61046927SAndroid Build Coastguard Worker
5547*61046927SAndroid Build Coastguard Worker if (gl_shader_stage_is_compute(nir->info.stage)) {
5548*61046927SAndroid Build Coastguard Worker /* Workgroups may be merged if the structure of the workgroup is
5549*61046927SAndroid Build Coastguard Worker * not software visible. This is true if neither shared memory
5550*61046927SAndroid Build Coastguard Worker * nor barriers are used. The hardware may be able to optimize
5551*61046927SAndroid Build Coastguard Worker * compute shaders that set this flag.
5552*61046927SAndroid Build Coastguard Worker */
5553*61046927SAndroid Build Coastguard Worker info->cs.allow_merging_workgroups = (nir->info.shared_size == 0) &&
5554*61046927SAndroid Build Coastguard Worker !nir->info.uses_control_barrier &&
5555*61046927SAndroid Build Coastguard Worker !nir->info.uses_memory_barrier;
5556*61046927SAndroid Build Coastguard Worker }
5557*61046927SAndroid Build Coastguard Worker
5558*61046927SAndroid Build Coastguard Worker info->ubo_mask &= (1 << nir->info.num_ubos) - 1;
5559*61046927SAndroid Build Coastguard Worker }
5560